Browse Source

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "API:

   - add AEAD support to crypto engine

   - allow batch registration in simd

  Algorithms:

   - add CFB mode

   - add speck block cipher

   - add sm4 block cipher

   - new test case for crct10dif

   - improve scheduling latency on ARM

   - scatter/gather support to gcm in aesni

   - convert x86 crypto algorithms to skcihper

  Drivers:

   - hmac(sha224/sha256) support in inside-secure

   - aes gcm/ccm support in stm32

   - stm32mp1 support in stm32

   - ccree driver from staging tree

   - gcm support over QI in caam

   - add ks-sa hwrng driver"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (212 commits)
  crypto: ccree - remove unused enums
  crypto: ahash - Fix early termination in hash walk
  crypto: brcm - explicitly cast cipher to hash type
  crypto: talitos - don't leak pointers to authenc keys
  crypto: qat - don't leak pointers to authenc keys
  crypto: picoxcell - don't leak pointers to authenc keys
  crypto: ixp4xx - don't leak pointers to authenc keys
  crypto: chelsio - don't leak pointers to authenc keys
  crypto: caam/qi - don't leak pointers to authenc keys
  crypto: caam - don't leak pointers to authenc keys
  crypto: lrw - Free rctx->ext with kzfree
  crypto: talitos - fix IPsec cipher in length
  crypto: Deduplicate le32_to_cpu_array() and cpu_to_le32_array()
  crypto: doc - clarify hash callbacks state machine
  crypto: api - Keep failed instances alive
  crypto: api - Make crypto_alg_lookup static
  crypto: api - Remove unused crypto_type lookup function
  crypto: chelsio - Remove declaration of static function from header
  crypto: inside-secure - hmac(sha224) support
  crypto: inside-secure - hmac(sha256) support
  ..
Linus Torvalds 7 năm trước cách đây
mục cha
commit
9eb31227cb
100 tập tin đã thay đổi với 9658 bổ sung8439 xóa
  1. 48 0
      Documentation/crypto/crypto_engine.rst
  2. 8 0
      Documentation/crypto/devel-algos.rst
  3. 2 1
      Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
  4. 5 1
      Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
  5. 5 6
      Documentation/devicetree/bindings/rng/imx-rng.txt
  6. 21 0
      Documentation/devicetree/bindings/rng/ks-sa-rng.txt
  7. 6 1
      Documentation/devicetree/bindings/rng/omap_rng.txt
  8. 4 0
      Documentation/devicetree/bindings/rng/st,stm32-rng.txt
  9. 11 4
      MAINTAINERS
  10. 6 0
      arch/arm/crypto/Kconfig
  11. 4 0
      arch/arm/crypto/Makefile
  12. 10 9
      arch/arm/crypto/aes-cipher-core.S
  13. 432 0
      arch/arm/crypto/speck-neon-core.S
  14. 288 0
      arch/arm/crypto/speck-neon-glue.c
  15. 6 0
      arch/arm64/crypto/Kconfig
  16. 5 3
      arch/arm64/crypto/Makefile
  17. 23 24
      arch/arm64/crypto/aes-ce-ccm-glue.c
  18. 46 49
      arch/arm64/crypto/aes-glue.c
  19. 118 237
      arch/arm64/crypto/aes-modes.S
  20. 22 26
      arch/arm64/crypto/aes-neonbs-glue.c
  21. 9 3
      arch/arm64/crypto/chacha20-neon-glue.c
  22. 23 13
      arch/arm64/crypto/sha256-glue.c
  23. 352 0
      arch/arm64/crypto/speck-neon-core.S
  24. 282 0
      arch/arm64/crypto/speck-neon-glue.c
  25. 693 721
      arch/x86/crypto/aesni-intel_asm.S
  26. 206 24
      arch/x86/crypto/aesni-intel_glue.c
  27. 113 117
      arch/x86/crypto/blowfish_glue.c
  28. 99 392
      arch/x86/crypto/camellia_aesni_avx2_glue.c
  29. 118 377
      arch/x86/crypto/camellia_aesni_avx_glue.c
  30. 79 277
      arch/x86/crypto/camellia_glue.c
  31. 127 225
      arch/x86/crypto/cast5_avx_glue.c
  32. 100 389
      arch/x86/crypto/cast6_avx_glue.c
  33. 119 119
      arch/x86/crypto/des3_ede_glue.c
  34. 125 266
      arch/x86/crypto/glue_helper.c
  35. 97 381
      arch/x86/crypto/serpent_avx2_glue.c
  36. 115 403
      arch/x86/crypto/serpent_avx_glue.c
  37. 69 450
      arch/x86/crypto/serpent_sse2_glue.c
  38. 3 25
      arch/x86/crypto/sha1-mb/sha1_mb.c
  39. 3 5
      arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
  40. 3 24
      arch/x86/crypto/sha256-mb/sha256_mb.c
  41. 3 5
      arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
  42. 3 27
      arch/x86/crypto/sha512-mb/sha512_mb.c
  43. 3 5
      arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
  44. 121 372
      arch/x86/crypto/twofish_avx_glue.c
  45. 67 272
      arch/x86/crypto/twofish_glue_3way.c
  46. 5 11
      arch/x86/include/asm/crypto/camellia.h
  47. 12 63
      arch/x86/include/asm/crypto/glue_helper.h
  48. 5 12
      arch/x86/include/asm/crypto/serpent-avx.h
  49. 0 19
      arch/x86/include/asm/crypto/twofish.h
  50. 69 60
      crypto/Kconfig
  51. 3 1
      crypto/Makefile
  52. 0 150
      crypto/ablk_helper.c
  53. 6 19
      crypto/ahash.c
  54. 0 8
      crypto/algapi.c
  55. 16 18
      crypto/api.c
  56. 353 0
      crypto/cfb.c
  57. 164 137
      crypto/crypto_engine.c
  58. 1 1
      crypto/crypto_user.c
  59. 17 6
      crypto/ecc.c
  60. 17 6
      crypto/ecdh.c
  61. 0 1
      crypto/internal.h
  62. 40 114
      crypto/lrw.c
  63. 4 30
      crypto/mcryptd.c
  64. 0 17
      crypto/md4.c
  65. 0 17
      crypto/md5.c
  66. 1 1
      crypto/rsa-pkcs1pad.c
  67. 50 0
      crypto/simd.c
  68. 244 0
      crypto/sm4_generic.c
  69. 307 0
      crypto/speck.c
  70. 3 0
      crypto/tcrypt.c
  71. 45 0
      crypto/testmgr.c
  72. 3321 1445
      crypto/testmgr.h
  73. 0 72
      crypto/xts.c
  74. 7 0
      drivers/char/hw_random/Kconfig
  75. 1 0
      drivers/char/hw_random/Makefile
  76. 2 0
      drivers/char/hw_random/bcm2835-rng.c
  77. 1 1
      drivers/char/hw_random/cavium-rng-vf.c
  78. 1 1
      drivers/char/hw_random/cavium-rng.c
  79. 1 1
      drivers/char/hw_random/imx-rngc.c
  80. 257 0
      drivers/char/hw_random/ks-sa-rng.c
  81. 14 9
      drivers/char/hw_random/mxc-rnga.c
  82. 18 4
      drivers/char/hw_random/omap-rng.c
  83. 28 16
      drivers/char/hw_random/stm32-rng.c
  84. 27 7
      drivers/crypto/Kconfig
  85. 1 1
      drivers/crypto/Makefile
  86. 2 6
      drivers/crypto/atmel-aes.c
  87. 2 7
      drivers/crypto/atmel-sha.c
  88. 2 7
      drivers/crypto/atmel-tdes.c
  89. 2 2
      drivers/crypto/bcm/cipher.c
  90. 0 1
      drivers/crypto/bcm/util.c
  91. 0 743
      drivers/crypto/bfin_crc.c
  92. 0 124
      drivers/crypto/bfin_crc.h
  93. 15 6
      drivers/crypto/caam/caamalg.c
  94. 152 13
      drivers/crypto/caam/caamalg_desc.c
  95. 18 6
      drivers/crypto/caam/caamalg_desc.h
  96. 387 1
      drivers/crypto/caam/caamalg_qi.c
  97. 24 18
      drivers/crypto/caam/ctrl.c
  98. 9 2
      drivers/crypto/caam/qi.c
  99. 1 1
      drivers/crypto/cavium/cpt/cptpf_main.c
  100. 1 1
      drivers/crypto/ccp/ccp-crypto-aes-cmac.c

+ 48 - 0
Documentation/crypto/crypto_engine.rst

@@ -0,0 +1,48 @@
+=============
+CRYPTO ENGINE
+=============
+
+Overview
+--------
+The crypto engine API (CE), is a crypto queue manager.
+
+Requirement
+-----------
+You have to put at start of your tfm_ctx the struct crypto_engine_ctx
+struct your_tfm_ctx {
+        struct crypto_engine_ctx enginectx;
+        ...
+};
+Why: Since CE manage only crypto_async_request, it cannot know the underlying
+request_type and so have access only on the TFM.
+So using container_of for accessing __ctx is impossible.
+Furthermore, the crypto engine cannot know the "struct your_tfm_ctx",
+so it must assume that crypto_engine_ctx is at start of it.
+
+Order of operations
+-------------------
+You have to obtain a struct crypto_engine via crypto_engine_alloc_init().
+And start it via crypto_engine_start().
+
+Before transferring any request, you have to fill the enginectx.
+- prepare_request: (taking a function pointer) If you need to do some processing before doing the request
+- unprepare_request: (taking a function pointer) Undoing what's done in prepare_request
+- do_one_request: (taking a function pointer) Do encryption for current request
+
+Note: that those three functions get the crypto_async_request associated with the received request.
+So your need to get the original request via container_of(areq, struct yourrequesttype_request, base);
+
+When your driver receive a crypto_request, you have to transfer it to
+the cryptoengine via one of:
+- crypto_transfer_ablkcipher_request_to_engine()
+- crypto_transfer_aead_request_to_engine()
+- crypto_transfer_akcipher_request_to_engine()
+- crypto_transfer_hash_request_to_engine()
+- crypto_transfer_skcipher_request_to_engine()
+
+At the end of the request process, a call to one of the following function is needed:
+- crypto_finalize_ablkcipher_request
+- crypto_finalize_aead_request
+- crypto_finalize_akcipher_request
+- crypto_finalize_hash_request
+- crypto_finalize_skcipher_request

+ 8 - 0
Documentation/crypto/devel-algos.rst

@@ -236,6 +236,14 @@ when used from another part of the kernel.
                                |
                                '---------------> HASH2
 
+Note that it is perfectly legal to "abandon" a request object:
+- call .init() and then (as many times) .update()
+- _not_ call any of .final(), .finup() or .export() at any point in future
+
+In other words implementations should mind the resource allocation and clean-up.
+No resources related to request objects should remain allocated after a call
+to .init() or .update(), since there might be no chance to free them.
+
 
 Specifics Of Asynchronous HASH Transformation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+ 2 - 1
Documentation/devicetree/bindings/crypto/arm-cryptocell.txt

@@ -1,7 +1,8 @@
 Arm TrustZone CryptoCell cryptographic engine
 
 Required properties:
-- compatible: Should be "arm,cryptocell-712-ree".
+- compatible: Should be one of: "arm,cryptocell-712-ree",
+  "arm,cryptocell-710-ree" or "arm,cryptocell-630p-ree".
 - reg: Base physical address of the engine and length of memory mapped region.
 - interrupts: Interrupt number for the device.
 

+ 5 - 1
Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt

@@ -8,7 +8,11 @@ Required properties:
 - interrupt-names: Should be "ring0", "ring1", "ring2", "ring3", "eip", "mem".
 
 Optional properties:
-- clocks: Reference to the crypto engine clock.
+- clocks: Reference to the crypto engine clocks, the second clock is
+          needed for the Armada 7K/8K SoCs.
+- clock-names: mandatory if there is a second clock, in this case the
+               name must be "core" for the first clock and "reg" for
+               the second one.
 
 Example:
 

+ 5 - 6
Documentation/devicetree/bindings/rng/imx-rngc.txt → Documentation/devicetree/bindings/rng/imx-rng.txt

@@ -1,15 +1,14 @@
-Freescale RNGC (Random Number Generator Version C)
-
-The driver also supports version B, which is mostly compatible
-to version C.
+Freescale RNGA/RNGB/RNGC (Random Number Generator Versions A, B and C)
 
 Required properties:
 - compatible : should be one of
+               "fsl,imx21-rnga"
+               "fsl,imx31-rnga" (backward compatible with "fsl,imx21-rnga")
                "fsl,imx25-rngb"
                "fsl,imx35-rngc"
 - reg : offset and length of the register set of this block
-- interrupts : the interrupt number for the RNGC block
-- clocks : the RNGC clk source
+- interrupts : the interrupt number for the RNG block
+- clocks : the RNG clk source
 
 Example:
 

+ 21 - 0
Documentation/devicetree/bindings/rng/ks-sa-rng.txt

@@ -0,0 +1,21 @@
+Keystone SoC Hardware Random Number Generator(HWRNG) Module
+
+On Keystone SoCs HWRNG module is a submodule of the Security Accelerator.
+
+- compatible: should be "ti,keystone-rng"
+- ti,syscon-sa-cfg: phandle to syscon node of the SA configuration registers.
+		    This registers are shared between hwrng and crypto drivers.
+- clocks: phandle to the reference clocks for the subsystem
+- clock-names: functional clock name. Should be set to "fck"
+- reg: HWRNG module register space
+
+Example:
+/* K2HK */
+
+rng@24000 {
+	compatible = "ti,keystone-rng";
+	ti,syscon-sa-cfg = <&sa_config>;
+	clocks = <&clksa>;
+	clock-names = "fck";
+	reg = <0x24000 0x1000>;
+};

+ 6 - 1
Documentation/devicetree/bindings/rng/omap_rng.txt

@@ -13,7 +13,12 @@ Required properties:
 - interrupts : the interrupt number for the RNG module.
 		Used for "ti,omap4-rng" and "inside-secure,safexcel-eip76"
 - clocks: the trng clock source. Only mandatory for the
-  "inside-secure,safexcel-eip76" compatible.
+  "inside-secure,safexcel-eip76" compatible, the second clock is
+  needed for the Armada 7K/8K SoCs
+- clock-names: mandatory if there is a second clock, in this case the
+  name must be "core" for the first clock and "reg" for the second
+  one
+
 
 Example:
 /* AM335x */

+ 4 - 0
Documentation/devicetree/bindings/rng/st,stm32-rng.txt

@@ -11,6 +11,10 @@ Required properties:
 - interrupts : The designated IRQ line for the RNG
 - clocks : The clock needed to enable the RNG
 
+Optional properties:
+- resets : The reset to properly start RNG
+- clock-error-detect : Enable the clock detection management
+
 Example:
 
 	rng: rng@50060800 {

+ 11 - 4
MAINTAINERS

@@ -3252,12 +3252,11 @@ F:	drivers/net/ieee802154/cc2520.c
 F:	include/linux/spi/cc2520.h
 F:	Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
 
-CCREE ARM TRUSTZONE CRYPTOCELL 700 REE DRIVER
+CCREE ARM TRUSTZONE CRYPTOCELL REE DRIVER
 M:	Gilad Ben-Yossef <gilad@benyossef.com>
 L:	linux-crypto@vger.kernel.org
-L:	driverdev-devel@linuxdriverproject.org
 S:	Supported
-F:	drivers/staging/ccree/
+F:	drivers/crypto/ccree/
 W:	https://developer.arm.com/products/system-ip/trustzone-cryptocell/cryptocell-700-family
 
 CEC FRAMEWORK
@@ -6962,7 +6961,7 @@ F:	drivers/input/input-mt.c
 K:	\b(ABS|SYN)_MT_
 
 INSIDE SECURE CRYPTO DRIVER
-M:	Antoine Tenart <antoine.tenart@free-electrons.com>
+M:	Antoine Tenart <antoine.tenart@bootlin.com>
 F:	drivers/crypto/inside-secure/
 S:	Maintained
 L:	linux-crypto@vger.kernel.org
@@ -7200,6 +7199,14 @@ L:	linux-rdma@vger.kernel.org
 S:	Supported
 F:	drivers/infiniband/hw/i40iw/
 
+INTEL SHA MULTIBUFFER DRIVER
+M:	Megha Dey <megha.dey@linux.intel.com>
+R:	Tim Chen <tim.c.chen@linux.intel.com>
+L:	linux-crypto@vger.kernel.org
+S:	Supported
+F:	arch/x86/crypto/sha*-mb
+F:	crypto/mcryptd.c
+
 INTEL TELEMETRY DRIVER
 M:	Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
 L:	platform-driver-x86@vger.kernel.org

+ 6 - 0
arch/arm/crypto/Kconfig

@@ -121,4 +121,10 @@ config CRYPTO_CHACHA20_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_CHACHA20
 
+config CRYPTO_SPECK_NEON
+	tristate "NEON accelerated Speck cipher algorithms"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_SPECK
+
 endif

+ 4 - 0
arch/arm/crypto/Makefile

@@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
+obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
 
 ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -53,7 +54,9 @@ ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
 crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
 chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
+speck-neon-y := speck-neon-core.o speck-neon-glue.o
 
+ifdef REGENERATE_ARM_CRYPTO
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)
 
@@ -62,5 +65,6 @@ $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
 
 $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
 	$(call cmd,perl)
+endif
 
 .PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S

+ 10 - 9
arch/arm/crypto/aes-cipher-core.S

@@ -174,6 +174,16 @@
 	.ltorg
 	.endm
 
+ENTRY(__aes_arm_encrypt)
+	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
+ENDPROC(__aes_arm_encrypt)
+
+	.align		5
+ENTRY(__aes_arm_decrypt)
+	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
+ENDPROC(__aes_arm_decrypt)
+
+	.section	".rodata", "a"
 	.align		L1_CACHE_SHIFT
 	.type		__aes_arm_inverse_sbox, %object
 __aes_arm_inverse_sbox:
@@ -210,12 +220,3 @@ __aes_arm_inverse_sbox:
 	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
 	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
 	.size		__aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
-
-ENTRY(__aes_arm_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
-ENDPROC(__aes_arm_encrypt)
-
-	.align		5
-ENTRY(__aes_arm_decrypt)
-	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
-ENDPROC(__aes_arm_decrypt)

+ 432 - 0
arch/arm/crypto/speck-neon-core.S

@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
+ *
+ * Copyright (c) 2018 Google, Inc
+ *
+ * Author: Eric Biggers <ebiggers@google.com>
+ */
+
+#include <linux/linkage.h>
+
+	.text
+	.fpu		neon
+
+	// arguments
+	ROUND_KEYS	.req	r0	// const {u64,u32} *round_keys
+	NROUNDS		.req	r1	// int nrounds
+	DST		.req	r2	// void *dst
+	SRC		.req	r3	// const void *src
+	NBYTES		.req	r4	// unsigned int nbytes
+	TWEAK		.req	r5	// void *tweak
+
+	// registers which hold the data being encrypted/decrypted
+	X0		.req	q0
+	X0_L		.req	d0
+	X0_H		.req	d1
+	Y0		.req	q1
+	Y0_H		.req	d3
+	X1		.req	q2
+	X1_L		.req	d4
+	X1_H		.req	d5
+	Y1		.req	q3
+	Y1_H		.req	d7
+	X2		.req	q4
+	X2_L		.req	d8
+	X2_H		.req	d9
+	Y2		.req	q5
+	Y2_H		.req	d11
+	X3		.req	q6
+	X3_L		.req	d12
+	X3_H		.req	d13
+	Y3		.req	q7
+	Y3_H		.req	d15
+
+	// the round key, duplicated in all lanes
+	ROUND_KEY	.req	q8
+	ROUND_KEY_L	.req	d16
+	ROUND_KEY_H	.req	d17
+
+	// index vector for vtbl-based 8-bit rotates
+	ROTATE_TABLE	.req	d18
+
+	// multiplication table for updating XTS tweaks
+	GF128MUL_TABLE	.req	d19
+	GF64MUL_TABLE	.req	d19
+
+	// current XTS tweak value(s)
+	TWEAKV		.req	q10
+	TWEAKV_L	.req	d20
+	TWEAKV_H	.req	d21
+
+	TMP0		.req	q12
+	TMP0_L		.req	d24
+	TMP0_H		.req	d25
+	TMP1		.req	q13
+	TMP2		.req	q14
+	TMP3		.req	q15
+
+	.align		4
+.Lror64_8_table:
+	.byte		1, 2, 3, 4, 5, 6, 7, 0
+.Lror32_8_table:
+	.byte		1, 2, 3, 0, 5, 6, 7, 4
+.Lrol64_8_table:
+	.byte		7, 0, 1, 2, 3, 4, 5, 6
+.Lrol32_8_table:
+	.byte		3, 0, 1, 2, 7, 4, 5, 6
+.Lgf128mul_table:
+	.byte		0, 0x87
+	.fill		14
+.Lgf64mul_table:
+	.byte		0, 0x1b, (0x1b << 1), (0x1b << 1) ^ 0x1b
+	.fill		12
+
+/*
+ * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
+ *
+ * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
+ * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
+ * of ROUND_KEY.  'n' is the lane size: 64 for Speck128, or 32 for Speck64.
+ *
+ * The 8-bit rotates are implemented using vtbl instead of vshr + vsli because
+ * the vtbl approach is faster on some processors and the same speed on others.
+ */
+.macro _speck_round_128bytes	n
+
+	// x = ror(x, 8)
+	vtbl.8		X0_L, {X0_L}, ROTATE_TABLE
+	vtbl.8		X0_H, {X0_H}, ROTATE_TABLE
+	vtbl.8		X1_L, {X1_L}, ROTATE_TABLE
+	vtbl.8		X1_H, {X1_H}, ROTATE_TABLE
+	vtbl.8		X2_L, {X2_L}, ROTATE_TABLE
+	vtbl.8		X2_H, {X2_H}, ROTATE_TABLE
+	vtbl.8		X3_L, {X3_L}, ROTATE_TABLE
+	vtbl.8		X3_H, {X3_H}, ROTATE_TABLE
+
+	// x += y
+	vadd.u\n	X0, Y0
+	vadd.u\n	X1, Y1
+	vadd.u\n	X2, Y2
+	vadd.u\n	X3, Y3
+
+	// x ^= k
+	veor		X0, ROUND_KEY
+	veor		X1, ROUND_KEY
+	veor		X2, ROUND_KEY
+	veor		X3, ROUND_KEY
+
+	// y = rol(y, 3)
+	vshl.u\n	TMP0, Y0, #3
+	vshl.u\n	TMP1, Y1, #3
+	vshl.u\n	TMP2, Y2, #3
+	vshl.u\n	TMP3, Y3, #3
+	vsri.u\n	TMP0, Y0, #(\n - 3)
+	vsri.u\n	TMP1, Y1, #(\n - 3)
+	vsri.u\n	TMP2, Y2, #(\n - 3)
+	vsri.u\n	TMP3, Y3, #(\n - 3)
+
+	// y ^= x
+	veor		Y0, TMP0, X0
+	veor		Y1, TMP1, X1
+	veor		Y2, TMP2, X2
+	veor		Y3, TMP3, X3
+.endm
+
+/*
+ * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
+ *
+ * This is the inverse of _speck_round_128bytes().
+ */
+.macro _speck_unround_128bytes	n
+
+	// y ^= x
+	veor		TMP0, Y0, X0
+	veor		TMP1, Y1, X1
+	veor		TMP2, Y2, X2
+	veor		TMP3, Y3, X3
+
+	// y = ror(y, 3)
+	vshr.u\n	Y0, TMP0, #3
+	vshr.u\n	Y1, TMP1, #3
+	vshr.u\n	Y2, TMP2, #3
+	vshr.u\n	Y3, TMP3, #3
+	vsli.u\n	Y0, TMP0, #(\n - 3)
+	vsli.u\n	Y1, TMP1, #(\n - 3)
+	vsli.u\n	Y2, TMP2, #(\n - 3)
+	vsli.u\n	Y3, TMP3, #(\n - 3)
+
+	// x ^= k
+	veor		X0, ROUND_KEY
+	veor		X1, ROUND_KEY
+	veor		X2, ROUND_KEY
+	veor		X3, ROUND_KEY
+
+	// x -= y
+	vsub.u\n	X0, Y0
+	vsub.u\n	X1, Y1
+	vsub.u\n	X2, Y2
+	vsub.u\n	X3, Y3
+
+	// x = rol(x, 8);
+	vtbl.8		X0_L, {X0_L}, ROTATE_TABLE
+	vtbl.8		X0_H, {X0_H}, ROTATE_TABLE
+	vtbl.8		X1_L, {X1_L}, ROTATE_TABLE
+	vtbl.8		X1_H, {X1_H}, ROTATE_TABLE
+	vtbl.8		X2_L, {X2_L}, ROTATE_TABLE
+	vtbl.8		X2_H, {X2_H}, ROTATE_TABLE
+	vtbl.8		X3_L, {X3_L}, ROTATE_TABLE
+	vtbl.8		X3_H, {X3_H}, ROTATE_TABLE
+.endm
+
+.macro _xts128_precrypt_one	dst_reg, tweak_buf, tmp
+
+	// Load the next source block
+	vld1.8		{\dst_reg}, [SRC]!
+
+	// Save the current tweak in the tweak buffer
+	vst1.8		{TWEAKV}, [\tweak_buf:128]!
+
+	// XOR the next source block with the current tweak
+	veor		\dst_reg, TWEAKV
+
+	/*
+	 * Calculate the next tweak by multiplying the current one by x,
+	 * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
+	 */
+	vshr.u64	\tmp, TWEAKV, #63
+	vshl.u64	TWEAKV, #1
+	veor		TWEAKV_H, \tmp\()_L
+	vtbl.8		\tmp\()_H, {GF128MUL_TABLE}, \tmp\()_H
+	veor		TWEAKV_L, \tmp\()_H
+.endm
+
+.macro _xts64_precrypt_two	dst_reg, tweak_buf, tmp
+
+	// Load the next two source blocks
+	vld1.8		{\dst_reg}, [SRC]!
+
+	// Save the current two tweaks in the tweak buffer
+	vst1.8		{TWEAKV}, [\tweak_buf:128]!
+
+	// XOR the next two source blocks with the current two tweaks
+	veor		\dst_reg, TWEAKV
+
+	/*
+	 * Calculate the next two tweaks by multiplying the current ones by x^2,
+	 * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
+	 */
+	vshr.u64	\tmp, TWEAKV, #62
+	vshl.u64	TWEAKV, #2
+	vtbl.8		\tmp\()_L, {GF64MUL_TABLE}, \tmp\()_L
+	vtbl.8		\tmp\()_H, {GF64MUL_TABLE}, \tmp\()_H
+	veor		TWEAKV, \tmp
+.endm
+
+/*
+ * _speck_xts_crypt() - Speck-XTS encryption/decryption
+ *
+ * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
+ * using Speck-XTS, specifically the variant with a block size of '2n' and round
+ * count given by NROUNDS.  The expanded round keys are given in ROUND_KEYS, and
+ * the current XTS tweak value is given in TWEAK.  It's assumed that NBYTES is a
+ * nonzero multiple of 128.
+ */
+.macro _speck_xts_crypt	n, decrypting
+	push		{r4-r7}
+	mov		r7, sp
+
+	/*
+	 * The first four parameters were passed in registers r0-r3.  Load the
+	 * additional parameters, which were passed on the stack.
+	 */
+	ldr		NBYTES, [sp, #16]
+	ldr		TWEAK, [sp, #20]
+
+	/*
+	 * If decrypting, modify the ROUND_KEYS parameter to point to the last
+	 * round key rather than the first, since for decryption the round keys
+	 * are used in reverse order.
+	 */
+.if \decrypting
+.if \n == 64
+	add		ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #3
+	sub		ROUND_KEYS, #8
+.else
+	add		ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #2
+	sub		ROUND_KEYS, #4
+.endif
+.endif
+
+	// Load the index vector for vtbl-based 8-bit rotates
+.if \decrypting
+	ldr		r12, =.Lrol\n\()_8_table
+.else
+	ldr		r12, =.Lror\n\()_8_table
+.endif
+	vld1.8		{ROTATE_TABLE}, [r12:64]
+
+	// One-time XTS preparation
+
+	/*
+	 * Allocate stack space to store 128 bytes worth of tweaks.  For
+	 * performance, this space is aligned to a 16-byte boundary so that we
+	 * can use the load/store instructions that declare 16-byte alignment.
+	 */
+	sub		sp, #128
+	bic		sp, #0xf
+
+.if \n == 64
+	// Load first tweak
+	vld1.8		{TWEAKV}, [TWEAK]
+
+	// Load GF(2^128) multiplication table
+	ldr		r12, =.Lgf128mul_table
+	vld1.8		{GF128MUL_TABLE}, [r12:64]
+.else
+	// Load first tweak
+	vld1.8		{TWEAKV_L}, [TWEAK]
+
+	// Load GF(2^64) multiplication table
+	ldr		r12, =.Lgf64mul_table
+	vld1.8		{GF64MUL_TABLE}, [r12:64]
+
+	// Calculate second tweak, packing it together with the first
+	vshr.u64	TMP0_L, TWEAKV_L, #63
+	vtbl.u8		TMP0_L, {GF64MUL_TABLE}, TMP0_L
+	vshl.u64	TWEAKV_H, TWEAKV_L, #1
+	veor		TWEAKV_H, TMP0_L
+.endif
+
+.Lnext_128bytes_\@:
+
+	/*
+	 * Load the source blocks into {X,Y}[0-3], XOR them with their XTS tweak
+	 * values, and save the tweaks on the stack for later.  Then
+	 * de-interleave the 'x' and 'y' elements of each block, i.e. make it so
+	 * that the X[0-3] registers contain only the second halves of blocks,
+	 * and the Y[0-3] registers contain only the first halves of blocks.
+	 * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
+	 */
+	mov		r12, sp
+.if \n == 64
+	_xts128_precrypt_one	X0, r12, TMP0
+	_xts128_precrypt_one	Y0, r12, TMP0
+	_xts128_precrypt_one	X1, r12, TMP0
+	_xts128_precrypt_one	Y1, r12, TMP0
+	_xts128_precrypt_one	X2, r12, TMP0
+	_xts128_precrypt_one	Y2, r12, TMP0
+	_xts128_precrypt_one	X3, r12, TMP0
+	_xts128_precrypt_one	Y3, r12, TMP0
+	vswp		X0_L, Y0_H
+	vswp		X1_L, Y1_H
+	vswp		X2_L, Y2_H
+	vswp		X3_L, Y3_H
+.else
+	_xts64_precrypt_two	X0, r12, TMP0
+	_xts64_precrypt_two	Y0, r12, TMP0
+	_xts64_precrypt_two	X1, r12, TMP0
+	_xts64_precrypt_two	Y1, r12, TMP0
+	_xts64_precrypt_two	X2, r12, TMP0
+	_xts64_precrypt_two	Y2, r12, TMP0
+	_xts64_precrypt_two	X3, r12, TMP0
+	_xts64_precrypt_two	Y3, r12, TMP0
+	vuzp.32		Y0, X0
+	vuzp.32		Y1, X1
+	vuzp.32		Y2, X2
+	vuzp.32		Y3, X3
+.endif
+
+	// Do the cipher rounds
+
+	mov		r12, ROUND_KEYS
+	mov		r6, NROUNDS
+
+.Lnext_round_\@:
+.if \decrypting
+.if \n == 64
+	vld1.64		ROUND_KEY_L, [r12]
+	sub		r12, #8
+	vmov		ROUND_KEY_H, ROUND_KEY_L
+.else
+	vld1.32		{ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]
+	sub		r12, #4
+.endif
+	_speck_unround_128bytes	\n
+.else
+.if \n == 64
+	vld1.64		ROUND_KEY_L, [r12]!
+	vmov		ROUND_KEY_H, ROUND_KEY_L
+.else
+	vld1.32		{ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]!
+.endif
+	_speck_round_128bytes	\n
+.endif
+	subs		r6, r6, #1
+	bne		.Lnext_round_\@
+
+	// Re-interleave the 'x' and 'y' elements of each block
+.if \n == 64
+	vswp		X0_L, Y0_H
+	vswp		X1_L, Y1_H
+	vswp		X2_L, Y2_H
+	vswp		X3_L, Y3_H
+.else
+	vzip.32		Y0, X0
+	vzip.32		Y1, X1
+	vzip.32		Y2, X2
+	vzip.32		Y3, X3
+.endif
+
+	// XOR the encrypted/decrypted blocks with the tweaks we saved earlier
+	mov		r12, sp
+	vld1.8		{TMP0, TMP1}, [r12:128]!
+	vld1.8		{TMP2, TMP3}, [r12:128]!
+	veor		X0, TMP0
+	veor		Y0, TMP1
+	veor		X1, TMP2
+	veor		Y1, TMP3
+	vld1.8		{TMP0, TMP1}, [r12:128]!
+	vld1.8		{TMP2, TMP3}, [r12:128]!
+	veor		X2, TMP0
+	veor		Y2, TMP1
+	veor		X3, TMP2
+	veor		Y3, TMP3
+
+	// Store the ciphertext in the destination buffer
+	vst1.8		{X0, Y0}, [DST]!
+	vst1.8		{X1, Y1}, [DST]!
+	vst1.8		{X2, Y2}, [DST]!
+	vst1.8		{X3, Y3}, [DST]!
+
+	// Continue if there are more 128-byte chunks remaining, else return
+	subs		NBYTES, #128
+	bne		.Lnext_128bytes_\@
+
+	// Store the next tweak
+.if \n == 64
+	vst1.8		{TWEAKV}, [TWEAK]
+.else
+	vst1.8		{TWEAKV_L}, [TWEAK]
+.endif
+
+	mov		sp, r7
+	pop		{r4-r7}
+	bx		lr
+.endm
+
+ENTRY(speck128_xts_encrypt_neon)
+	_speck_xts_crypt	n=64, decrypting=0
+ENDPROC(speck128_xts_encrypt_neon)
+
+ENTRY(speck128_xts_decrypt_neon)
+	_speck_xts_crypt	n=64, decrypting=1
+ENDPROC(speck128_xts_decrypt_neon)
+
+ENTRY(speck64_xts_encrypt_neon)
+	_speck_xts_crypt	n=32, decrypting=0
+ENDPROC(speck64_xts_encrypt_neon)
+
+ENTRY(speck64_xts_decrypt_neon)
+	_speck_xts_crypt	n=32, decrypting=1
+ENDPROC(speck64_xts_decrypt_neon)

+ 288 - 0
arch/arm/crypto/speck-neon-glue.c

@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
+ *
+ * Copyright (c) 2018 Google, Inc
+ *
+ * Note: the NIST recommendation for XTS only specifies a 128-bit block size,
+ * but a 64-bit version (needed for Speck64) is fairly straightforward; the math
+ * is just done in GF(2^64) instead of GF(2^128), with the reducing polynomial
+ * x^64 + x^4 + x^3 + x + 1 from the original XEX paper (Rogaway, 2004:
+ * "Efficient Instantiations of Tweakable Blockciphers and Refinements to Modes
+ * OCB and PMAC"), represented as 0x1B.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/algapi.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/speck.h>
+#include <crypto/xts.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+/* The assembly functions only handle multiples of 128 bytes */
+#define SPECK_NEON_CHUNK_SIZE	128
+
+/* Speck128 */
+
+struct speck128_xts_tfm_ctx {
+	struct speck128_tfm_ctx main_key;
+	struct speck128_tfm_ctx tweak_key;
+};
+
+asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
+					  void *dst, const void *src,
+					  unsigned int nbytes, void *tweak);
+
+asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
+					  void *dst, const void *src,
+					  unsigned int nbytes, void *tweak);
+
+typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
+				     u8 *, const u8 *);
+typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
+					  const void *, unsigned int, void *);
+
+static __always_inline int
+__speck128_xts_crypt(struct skcipher_request *req,
+		     speck128_crypt_one_t crypt_one,
+		     speck128_xts_crypt_many_t crypt_many)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	le128 tweak;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+		u8 *dst = walk.dst.virt.addr;
+		const u8 *src = walk.src.virt.addr;
+
+		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
+			unsigned int count;
+
+			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
+			kernel_neon_begin();
+			(*crypt_many)(ctx->main_key.round_keys,
+				      ctx->main_key.nrounds,
+				      dst, src, count, &tweak);
+			kernel_neon_end();
+			dst += count;
+			src += count;
+			nbytes -= count;
+		}
+
+		/* Handle any remainder with generic code */
+		while (nbytes >= sizeof(tweak)) {
+			le128_xor((le128 *)dst, (const le128 *)src, &tweak);
+			(*crypt_one)(&ctx->main_key, dst, dst);
+			le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
+			gf128mul_x_ble(&tweak, &tweak);
+
+			dst += sizeof(tweak);
+			src += sizeof(tweak);
+			nbytes -= sizeof(tweak);
+		}
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+
+	return err;
+}
+
+static int speck128_xts_encrypt(struct skcipher_request *req)
+{
+	return __speck128_xts_crypt(req, crypto_speck128_encrypt,
+				    speck128_xts_encrypt_neon);
+}
+
+static int speck128_xts_decrypt(struct skcipher_request *req)
+{
+	return __speck128_xts_crypt(req, crypto_speck128_decrypt,
+				    speck128_xts_decrypt_neon);
+}
+
+static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int keylen)
+{
+	struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	keylen /= 2;
+
+	err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
+	if (err)
+		return err;
+
+	return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
+}
+
+/* Speck64 */
+
+struct speck64_xts_tfm_ctx {
+	struct speck64_tfm_ctx main_key;
+	struct speck64_tfm_ctx tweak_key;
+};
+
+asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
+					 void *dst, const void *src,
+					 unsigned int nbytes, void *tweak);
+
+asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
+					 void *dst, const void *src,
+					 unsigned int nbytes, void *tweak);
+
+typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
+				    u8 *, const u8 *);
+typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
+					 const void *, unsigned int, void *);
+
+static __always_inline int
+__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
+		    speck64_xts_crypt_many_t crypt_many)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	__le64 tweak;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+		u8 *dst = walk.dst.virt.addr;
+		const u8 *src = walk.src.virt.addr;
+
+		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
+			unsigned int count;
+
+			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
+			kernel_neon_begin();
+			(*crypt_many)(ctx->main_key.round_keys,
+				      ctx->main_key.nrounds,
+				      dst, src, count, &tweak);
+			kernel_neon_end();
+			dst += count;
+			src += count;
+			nbytes -= count;
+		}
+
+		/* Handle any remainder with generic code */
+		while (nbytes >= sizeof(tweak)) {
+			*(__le64 *)dst = *(__le64 *)src ^ tweak;
+			(*crypt_one)(&ctx->main_key, dst, dst);
+			*(__le64 *)dst ^= tweak;
+			tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
+					    ((tweak & cpu_to_le64(1ULL << 63)) ?
+					     0x1B : 0));
+			dst += sizeof(tweak);
+			src += sizeof(tweak);
+			nbytes -= sizeof(tweak);
+		}
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+
+	return err;
+}
+
+static int speck64_xts_encrypt(struct skcipher_request *req)
+{
+	return __speck64_xts_crypt(req, crypto_speck64_encrypt,
+				   speck64_xts_encrypt_neon);
+}
+
+static int speck64_xts_decrypt(struct skcipher_request *req)
+{
+	return __speck64_xts_crypt(req, crypto_speck64_decrypt,
+				   speck64_xts_decrypt_neon);
+}
+
+static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	keylen /= 2;
+
+	err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
+	if (err)
+		return err;
+
+	return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
+}
+
+static struct skcipher_alg speck_algs[] = {
+	{
+		.base.cra_name		= "xts(speck128)",
+		.base.cra_driver_name	= "xts-speck128-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= SPECK128_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct speck128_xts_tfm_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SPECK128_128_KEY_SIZE,
+		.max_keysize		= 2 * SPECK128_256_KEY_SIZE,
+		.ivsize			= SPECK128_BLOCK_SIZE,
+		.walksize		= SPECK_NEON_CHUNK_SIZE,
+		.setkey			= speck128_xts_setkey,
+		.encrypt		= speck128_xts_encrypt,
+		.decrypt		= speck128_xts_decrypt,
+	}, {
+		.base.cra_name		= "xts(speck64)",
+		.base.cra_driver_name	= "xts-speck64-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= SPECK64_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct speck64_xts_tfm_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SPECK64_96_KEY_SIZE,
+		.max_keysize		= 2 * SPECK64_128_KEY_SIZE,
+		.ivsize			= SPECK64_BLOCK_SIZE,
+		.walksize		= SPECK_NEON_CHUNK_SIZE,
+		.setkey			= speck64_xts_setkey,
+		.encrypt		= speck64_xts_encrypt,
+		.decrypt		= speck64_xts_decrypt,
+	}
+};
+
+static int __init speck_neon_module_init(void)
+{
+	if (!(elf_hwcap & HWCAP_NEON))
+		return -ENODEV;
+	return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+static void __exit speck_neon_module_exit(void)
+{
+	crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+module_init(speck_neon_module_init);
+module_exit(speck_neon_module_exit);
+
+MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("xts(speck128)");
+MODULE_ALIAS_CRYPTO("xts-speck128-neon");
+MODULE_ALIAS_CRYPTO("xts(speck64)");
+MODULE_ALIAS_CRYPTO("xts-speck64-neon");

+ 6 - 0
arch/arm64/crypto/Kconfig

@@ -113,4 +113,10 @@ config CRYPTO_AES_ARM64_BS
 	select CRYPTO_AES_ARM64
 	select CRYPTO_SIMD
 
+config CRYPTO_SPECK_NEON
+	tristate "NEON accelerated Speck cipher algorithms"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_SPECK
+
 endif

+ 5 - 3
arch/arm64/crypto/Makefile

@@ -53,20 +53,21 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
 chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
 
+obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
+speck-neon-y := speck-neon-core.o speck-neon-glue.o
+
 obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
 aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
 
 obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o
 aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
 
-AFLAGS_aes-ce.o		:= -DINTERLEAVE=4
-AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
-
 CFLAGS_aes-glue-ce.o	:= -DUSE_V8_CRYPTO_EXTENSIONS
 
 $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
+ifdef REGENERATE_ARM64_CRYPTO
 quiet_cmd_perlasm = PERLASM $@
       cmd_perlasm = $(PERL) $(<) void $(@)
 
@@ -75,5 +76,6 @@ $(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl
 
 $(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
 	$(call cmd,perlasm)
+endif
 
 .PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S

+ 23 - 24
arch/arm64/crypto/aes-ce-ccm-glue.c

@@ -107,11 +107,13 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
 }
 
 static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
-			   u32 abytes, u32 *macp, bool use_neon)
+			   u32 abytes, u32 *macp)
 {
-	if (likely(use_neon)) {
+	if (may_use_simd()) {
+		kernel_neon_begin();
 		ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
 				     num_rounds(key));
+		kernel_neon_end();
 	} else {
 		if (*macp > 0 && *macp < AES_BLOCK_SIZE) {
 			int added = min(abytes, AES_BLOCK_SIZE - *macp);
@@ -143,8 +145,7 @@ static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
 	}
 }
 
-static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
-				   bool use_neon)
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
@@ -163,7 +164,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
 		ltag.len = 6;
 	}
 
-	ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp, use_neon);
+	ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp);
 	scatterwalk_start(&walk, req->src);
 
 	do {
@@ -175,7 +176,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
 			n = scatterwalk_clamp(&walk, len);
 		}
 		p = scatterwalk_map(&walk);
-		ccm_update_mac(ctx, mac, p, n, &macp, use_neon);
+		ccm_update_mac(ctx, mac, p, n, &macp);
 		len -= n;
 
 		scatterwalk_unmap(p);
@@ -242,43 +243,42 @@ static int ccm_encrypt(struct aead_request *req)
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen;
-	bool use_neon = may_use_simd();
 	int err;
 
 	err = ccm_init_mac(req, mac, len);
 	if (err)
 		return err;
 
-	if (likely(use_neon))
-		kernel_neon_begin();
-
 	if (req->assoclen)
-		ccm_calculate_auth_mac(req, mac, use_neon);
+		ccm_calculate_auth_mac(req, mac);
 
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
 	err = skcipher_walk_aead_encrypt(&walk, req, true);
 
-	if (likely(use_neon)) {
+	if (may_use_simd()) {
 		while (walk.nbytes) {
 			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
 			if (walk.nbytes == walk.total)
 				tail = 0;
 
+			kernel_neon_begin();
 			ce_aes_ccm_encrypt(walk.dst.virt.addr,
 					   walk.src.virt.addr,
 					   walk.nbytes - tail, ctx->key_enc,
 					   num_rounds(ctx), mac, walk.iv);
+			kernel_neon_end();
 
 			err = skcipher_walk_done(&walk, tail);
 		}
-		if (!err)
+		if (!err) {
+			kernel_neon_begin();
 			ce_aes_ccm_final(mac, buf, ctx->key_enc,
 					 num_rounds(ctx));
-
-		kernel_neon_end();
+			kernel_neon_end();
+		}
 	} else {
 		err = ccm_crypt_fallback(&walk, mac, buf, ctx, true);
 	}
@@ -301,43 +301,42 @@ static int ccm_decrypt(struct aead_request *req)
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen - authsize;
-	bool use_neon = may_use_simd();
 	int err;
 
 	err = ccm_init_mac(req, mac, len);
 	if (err)
 		return err;
 
-	if (likely(use_neon))
-		kernel_neon_begin();
-
 	if (req->assoclen)
-		ccm_calculate_auth_mac(req, mac, use_neon);
+		ccm_calculate_auth_mac(req, mac);
 
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
 	err = skcipher_walk_aead_decrypt(&walk, req, true);
 
-	if (likely(use_neon)) {
+	if (may_use_simd()) {
 		while (walk.nbytes) {
 			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
 			if (walk.nbytes == walk.total)
 				tail = 0;
 
+			kernel_neon_begin();
 			ce_aes_ccm_decrypt(walk.dst.virt.addr,
 					   walk.src.virt.addr,
 					   walk.nbytes - tail, ctx->key_enc,
 					   num_rounds(ctx), mac, walk.iv);
+			kernel_neon_end();
 
 			err = skcipher_walk_done(&walk, tail);
 		}
-		if (!err)
+		if (!err) {
+			kernel_neon_begin();
 			ce_aes_ccm_final(mac, buf, ctx->key_enc,
 					 num_rounds(ctx));
-
-		kernel_neon_end();
+			kernel_neon_end();
+		}
 	} else {
 		err = ccm_crypt_fallback(&walk, mac, buf, ctx, false);
 	}

+ 46 - 49
arch/arm64/crypto/aes-glue.c

@@ -64,17 +64,17 @@ MODULE_LICENSE("GPL v2");
 
 /* defined in aes-modes.S */
 asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, int first);
+				int rounds, int blocks);
 asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, int first);
+				int rounds, int blocks);
 
 asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, u8 iv[], int first);
+				int rounds, int blocks, u8 iv[]);
 asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, u8 iv[], int first);
+				int rounds, int blocks, u8 iv[]);
 
 asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, u8 ctr[], int first);
+				int rounds, int blocks, u8 ctr[]);
 
 asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
 				int rounds, int blocks, u8 const rk2[], u8 iv[],
@@ -133,19 +133,19 @@ static int ecb_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_enc, rounds, blocks, first);
+				(u8 *)ctx->key_enc, rounds, blocks);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 	return err;
 }
 
@@ -153,19 +153,19 @@ static int ecb_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_dec, rounds, blocks, first);
+				(u8 *)ctx->key_dec, rounds, blocks);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 	return err;
 }
 
@@ -173,20 +173,19 @@ static int cbc_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
-				first);
+				(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 	return err;
 }
 
@@ -194,20 +193,19 @@ static int cbc_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_dec, rounds, blocks, walk.iv,
-				first);
+				(u8 *)ctx->key_dec, rounds, blocks, walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 	return err;
 }
 
@@ -215,20 +213,18 @@ static int ctr_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	first = 1;
-	kernel_neon_begin();
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
-				first);
+				(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
-		first = 0;
+		kernel_neon_end();
 	}
 	if (walk.nbytes) {
 		u8 __aligned(8) tail[AES_BLOCK_SIZE];
@@ -241,12 +237,13 @@ static int ctr_encrypt(struct skcipher_request *req)
 		 */
 		blocks = -1;
 
+		kernel_neon_begin();
 		aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
-				blocks, walk.iv, first);
+				blocks, walk.iv);
+		kernel_neon_end();
 		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
 		err = skcipher_walk_done(&walk, 0);
 	}
-	kernel_neon_end();
 
 	return err;
 }
@@ -270,16 +267,16 @@ static int xts_encrypt(struct skcipher_request *req)
 	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		kernel_neon_begin();
 		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key1.key_enc, rounds, blocks,
 				(u8 *)ctx->key2.key_enc, walk.iv, first);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 
 	return err;
 }
@@ -292,16 +289,16 @@ static int xts_decrypt(struct skcipher_request *req)
 	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		kernel_neon_begin();
 		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key1.key_dec, rounds, blocks,
 				(u8 *)ctx->key2.key_enc, walk.iv, first);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 
 	return err;
 }
@@ -425,7 +422,7 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
 
 	/* encrypt the zero vector */
 	kernel_neon_begin();
-	aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1, 1);
+	aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1);
 	kernel_neon_end();
 
 	cmac_gf128_mul_by_x(consts, consts);
@@ -454,8 +451,8 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
 		return err;
 
 	kernel_neon_begin();
-	aes_ecb_encrypt(key, ks[0], rk, rounds, 1, 1);
-	aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2, 0);
+	aes_ecb_encrypt(key, ks[0], rk, rounds, 1);
+	aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2);
 	kernel_neon_end();
 
 	return cbcmac_setkey(tfm, key, sizeof(key));

+ 118 - 237
arch/arm64/crypto/aes-modes.S

@@ -13,127 +13,39 @@
 	.text
 	.align		4
 
-/*
- * There are several ways to instantiate this code:
- * - no interleave, all inline
- * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
- * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
- * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
- * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
- *
- * Macros imported by this code:
- * - enc_prepare	- setup NEON registers for encryption
- * - dec_prepare	- setup NEON registers for decryption
- * - enc_switch_key	- change to new key after having prepared for encryption
- * - encrypt_block	- encrypt a single block
- * - decrypt block	- decrypt a single block
- * - encrypt_block2x	- encrypt 2 blocks in parallel (if INTERLEAVE == 2)
- * - decrypt_block2x	- decrypt 2 blocks in parallel (if INTERLEAVE == 2)
- * - encrypt_block4x	- encrypt 4 blocks in parallel (if INTERLEAVE == 4)
- * - decrypt_block4x	- decrypt 4 blocks in parallel (if INTERLEAVE == 4)
- */
-
-#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
-#define FRAME_PUSH	stp x29, x30, [sp,#-16]! ; mov x29, sp
-#define FRAME_POP	ldp x29, x30, [sp],#16
-
-#if INTERLEAVE == 2
-
-aes_encrypt_block2x:
-	encrypt_block2x	v0, v1, w3, x2, x6, w7
-	ret
-ENDPROC(aes_encrypt_block2x)
-
-aes_decrypt_block2x:
-	decrypt_block2x	v0, v1, w3, x2, x6, w7
-	ret
-ENDPROC(aes_decrypt_block2x)
-
-#elif INTERLEAVE == 4
-
 aes_encrypt_block4x:
-	encrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	encrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
 	ret
 ENDPROC(aes_encrypt_block4x)
 
 aes_decrypt_block4x:
-	decrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	decrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
 	ret
 ENDPROC(aes_decrypt_block4x)
 
-#else
-#error INTERLEAVE should equal 2 or 4
-#endif
-
-	.macro		do_encrypt_block2x
-	bl		aes_encrypt_block2x
-	.endm
-
-	.macro		do_decrypt_block2x
-	bl		aes_decrypt_block2x
-	.endm
-
-	.macro		do_encrypt_block4x
-	bl		aes_encrypt_block4x
-	.endm
-
-	.macro		do_decrypt_block4x
-	bl		aes_decrypt_block4x
-	.endm
-
-#else
-#define FRAME_PUSH
-#define FRAME_POP
-
-	.macro		do_encrypt_block2x
-	encrypt_block2x	v0, v1, w3, x2, x6, w7
-	.endm
-
-	.macro		do_decrypt_block2x
-	decrypt_block2x	v0, v1, w3, x2, x6, w7
-	.endm
-
-	.macro		do_encrypt_block4x
-	encrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
-	.endm
-
-	.macro		do_decrypt_block4x
-	decrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
-	.endm
-
-#endif
-
 	/*
 	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, int first)
+	 *		   int blocks)
 	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, int first)
+	 *		   int blocks)
 	 */
 
 AES_ENTRY(aes_ecb_encrypt)
-	FRAME_PUSH
-	cbz		w5, .LecbencloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 	enc_prepare	w3, x2, x5
 
 .LecbencloopNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lecbenc1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 pt blocks */
-	do_encrypt_block2x
-	st1		{v0.16b-v1.16b}, [x0], #32
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
-	do_encrypt_block4x
+	bl		aes_encrypt_block4x
 	st1		{v0.16b-v3.16b}, [x0], #64
-#endif
 	b		.LecbencloopNx
 .Lecbenc1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lecbencout
-#endif
 .Lecbencloop:
 	ld1		{v0.16b}, [x1], #16		/* get next pt block */
 	encrypt_block	v0, w3, x2, x5, w6
@@ -141,35 +53,27 @@ AES_ENTRY(aes_ecb_encrypt)
 	subs		w4, w4, #1
 	bne		.Lecbencloop
 .Lecbencout:
-	FRAME_POP
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_ecb_encrypt)
 
 
 AES_ENTRY(aes_ecb_decrypt)
-	FRAME_PUSH
-	cbz		w5, .LecbdecloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 	dec_prepare	w3, x2, x5
 
 .LecbdecloopNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lecbdec1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
-	do_decrypt_block2x
-	st1		{v0.16b-v1.16b}, [x0], #32
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
-	do_decrypt_block4x
+	bl		aes_decrypt_block4x
 	st1		{v0.16b-v3.16b}, [x0], #64
-#endif
 	b		.LecbdecloopNx
 .Lecbdec1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lecbdecout
-#endif
 .Lecbdecloop:
 	ld1		{v0.16b}, [x1], #16		/* get next ct block */
 	decrypt_block	v0, w3, x2, x5, w6
@@ -177,62 +81,68 @@ AES_ENTRY(aes_ecb_decrypt)
 	subs		w4, w4, #1
 	bne		.Lecbdecloop
 .Lecbdecout:
-	FRAME_POP
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_ecb_decrypt)
 
 
 	/*
 	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, u8 iv[], int first)
+	 *		   int blocks, u8 iv[])
 	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, u8 iv[], int first)
+	 *		   int blocks, u8 iv[])
 	 */
 
 AES_ENTRY(aes_cbc_encrypt)
-	cbz		w6, .Lcbcencloop
-
-	ld1		{v0.16b}, [x5]			/* get iv */
+	ld1		{v4.16b}, [x5]			/* get iv */
 	enc_prepare	w3, x2, x6
 
-.Lcbcencloop:
-	ld1		{v1.16b}, [x1], #16		/* get next pt block */
-	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with iv */
+.Lcbcencloop4x:
+	subs		w4, w4, #4
+	bmi		.Lcbcenc1x
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
+	eor		v0.16b, v0.16b, v4.16b		/* ..and xor with iv */
 	encrypt_block	v0, w3, x2, x6, w7
-	st1		{v0.16b}, [x0], #16
+	eor		v1.16b, v1.16b, v0.16b
+	encrypt_block	v1, w3, x2, x6, w7
+	eor		v2.16b, v2.16b, v1.16b
+	encrypt_block	v2, w3, x2, x6, w7
+	eor		v3.16b, v3.16b, v2.16b
+	encrypt_block	v3, w3, x2, x6, w7
+	st1		{v0.16b-v3.16b}, [x0], #64
+	mov		v4.16b, v3.16b
+	b		.Lcbcencloop4x
+.Lcbcenc1x:
+	adds		w4, w4, #4
+	beq		.Lcbcencout
+.Lcbcencloop:
+	ld1		{v0.16b}, [x1], #16		/* get next pt block */
+	eor		v4.16b, v4.16b, v0.16b		/* ..and xor with iv */
+	encrypt_block	v4, w3, x2, x6, w7
+	st1		{v4.16b}, [x0], #16
 	subs		w4, w4, #1
 	bne		.Lcbcencloop
-	st1		{v0.16b}, [x5]			/* return iv */
+.Lcbcencout:
+	st1		{v4.16b}, [x5]			/* return iv */
 	ret
 AES_ENDPROC(aes_cbc_encrypt)
 
 
 AES_ENTRY(aes_cbc_decrypt)
-	FRAME_PUSH
-	cbz		w6, .LcbcdecloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 	ld1		{v7.16b}, [x5]			/* get iv */
 	dec_prepare	w3, x2, x6
 
 .LcbcdecloopNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lcbcdec1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
-	mov		v2.16b, v0.16b
-	mov		v3.16b, v1.16b
-	do_decrypt_block2x
-	eor		v0.16b, v0.16b, v7.16b
-	eor		v1.16b, v1.16b, v2.16b
-	mov		v7.16b, v3.16b
-	st1		{v0.16b-v1.16b}, [x0], #32
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
 	mov		v4.16b, v0.16b
 	mov		v5.16b, v1.16b
 	mov		v6.16b, v2.16b
-	do_decrypt_block4x
+	bl		aes_decrypt_block4x
 	sub		x1, x1, #16
 	eor		v0.16b, v0.16b, v7.16b
 	eor		v1.16b, v1.16b, v4.16b
@@ -240,12 +150,10 @@ AES_ENTRY(aes_cbc_decrypt)
 	eor		v2.16b, v2.16b, v5.16b
 	eor		v3.16b, v3.16b, v6.16b
 	st1		{v0.16b-v3.16b}, [x0], #64
-#endif
 	b		.LcbcdecloopNx
 .Lcbcdec1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lcbcdecout
-#endif
 .Lcbcdecloop:
 	ld1		{v1.16b}, [x1], #16		/* get next ct block */
 	mov		v0.16b, v1.16b			/* ...and copy to v0 */
@@ -256,49 +164,33 @@ AES_ENTRY(aes_cbc_decrypt)
 	subs		w4, w4, #1
 	bne		.Lcbcdecloop
 .Lcbcdecout:
-	FRAME_POP
 	st1		{v7.16b}, [x5]			/* return iv */
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_cbc_decrypt)
 
 
 	/*
 	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, u8 ctr[], int first)
+	 *		   int blocks, u8 ctr[])
 	 */
 
 AES_ENTRY(aes_ctr_encrypt)
-	FRAME_PUSH
-	cbz		w6, .Lctrnotfirst	/* 1st time around? */
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
+
 	enc_prepare	w3, x2, x6
 	ld1		{v4.16b}, [x5]
 
-.Lctrnotfirst:
-	umov		x8, v4.d[1]		/* keep swabbed ctr in reg */
-	rev		x8, x8
-#if INTERLEAVE >= 2
-	cmn		w8, w4			/* 32 bit overflow? */
+	umov		x6, v4.d[1]		/* keep swabbed ctr in reg */
+	rev		x6, x6
+	cmn		w6, w4			/* 32 bit overflow? */
 	bcs		.Lctrloop
 .LctrloopNx:
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lctr1x
-#if INTERLEAVE == 2
-	mov		v0.8b, v4.8b
-	mov		v1.8b, v4.8b
-	rev		x7, x8
-	add		x8, x8, #1
-	ins		v0.d[1], x7
-	rev		x7, x8
-	add		x8, x8, #1
-	ins		v1.d[1], x7
-	ld1		{v2.16b-v3.16b}, [x1], #32	/* get 2 input blocks */
-	do_encrypt_block2x
-	eor		v0.16b, v0.16b, v2.16b
-	eor		v1.16b, v1.16b, v3.16b
-	st1		{v0.16b-v1.16b}, [x0], #32
-#else
 	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
-	dup		v7.4s, w8
+	dup		v7.4s, w6
 	mov		v0.16b, v4.16b
 	add		v7.4s, v7.4s, v8.4s
 	mov		v1.16b, v4.16b
@@ -309,29 +201,27 @@ AES_ENTRY(aes_ctr_encrypt)
 	mov		v2.s[3], v8.s[1]
 	mov		v3.s[3], v8.s[2]
 	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
-	do_encrypt_block4x
+	bl		aes_encrypt_block4x
 	eor		v0.16b, v5.16b, v0.16b
 	ld1		{v5.16b}, [x1], #16		/* get 1 input block  */
 	eor		v1.16b, v6.16b, v1.16b
 	eor		v2.16b, v7.16b, v2.16b
 	eor		v3.16b, v5.16b, v3.16b
 	st1		{v0.16b-v3.16b}, [x0], #64
-	add		x8, x8, #INTERLEAVE
-#endif
-	rev		x7, x8
+	add		x6, x6, #4
+	rev		x7, x6
 	ins		v4.d[1], x7
 	cbz		w4, .Lctrout
 	b		.LctrloopNx
 .Lctr1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lctrout
-#endif
 .Lctrloop:
 	mov		v0.16b, v4.16b
-	encrypt_block	v0, w3, x2, x6, w7
+	encrypt_block	v0, w3, x2, x8, w7
 
-	adds		x8, x8, #1		/* increment BE ctr */
-	rev		x7, x8
+	adds		x6, x6, #1		/* increment BE ctr */
+	rev		x7, x6
 	ins		v4.d[1], x7
 	bcs		.Lctrcarry		/* overflow? */
 
@@ -345,12 +235,12 @@ AES_ENTRY(aes_ctr_encrypt)
 
 .Lctrout:
 	st1		{v4.16b}, [x5]		/* return next CTR value */
-	FRAME_POP
+	ldp		x29, x30, [sp], #16
 	ret
 
 .Lctrtailblock:
 	st1		{v0.16b}, [x0]
-	FRAME_POP
+	ldp		x29, x30, [sp], #16
 	ret
 
 .Lctrcarry:
@@ -384,39 +274,26 @@ CPU_LE(	.quad		1, 0x87		)
 CPU_BE(	.quad		0x87, 1		)
 
 AES_ENTRY(aes_xts_encrypt)
-	FRAME_PUSH
-	cbz		w7, .LxtsencloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 	ld1		{v4.16b}, [x6]
-	enc_prepare	w3, x5, x6
-	encrypt_block	v4, w3, x5, x6, w7		/* first tweak */
-	enc_switch_key	w3, x2, x6
+	cbz		w7, .Lxtsencnotfirst
+
+	enc_prepare	w3, x5, x8
+	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
+	enc_switch_key	w3, x2, x8
 	ldr		q7, .Lxts_mul_x
 	b		.LxtsencNx
 
+.Lxtsencnotfirst:
+	enc_prepare	w3, x2, x8
 .LxtsencloopNx:
 	ldr		q7, .Lxts_mul_x
 	next_tweak	v4, v4, v7, v8
 .LxtsencNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lxtsenc1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 pt blocks */
-	next_tweak	v5, v4, v7, v8
-	eor		v0.16b, v0.16b, v4.16b
-	eor		v1.16b, v1.16b, v5.16b
-	do_encrypt_block2x
-	eor		v0.16b, v0.16b, v4.16b
-	eor		v1.16b, v1.16b, v5.16b
-	st1		{v0.16b-v1.16b}, [x0], #32
-	cbz		w4, .LxtsencoutNx
-	next_tweak	v4, v5, v7, v8
-	b		.LxtsencNx
-.LxtsencoutNx:
-	mov		v4.16b, v5.16b
-	b		.Lxtsencout
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
 	next_tweak	v5, v4, v7, v8
 	eor		v0.16b, v0.16b, v4.16b
@@ -425,7 +302,7 @@ AES_ENTRY(aes_xts_encrypt)
 	eor		v2.16b, v2.16b, v6.16b
 	next_tweak	v7, v6, v7, v8
 	eor		v3.16b, v3.16b, v7.16b
-	do_encrypt_block4x
+	bl		aes_encrypt_block4x
 	eor		v3.16b, v3.16b, v7.16b
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v1.16b, v1.16b, v5.16b
@@ -434,15 +311,13 @@ AES_ENTRY(aes_xts_encrypt)
 	mov		v4.16b, v7.16b
 	cbz		w4, .Lxtsencout
 	b		.LxtsencloopNx
-#endif
 .Lxtsenc1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lxtsencout
-#endif
 .Lxtsencloop:
 	ld1		{v1.16b}, [x1], #16
 	eor		v0.16b, v1.16b, v4.16b
-	encrypt_block	v0, w3, x2, x6, w7
+	encrypt_block	v0, w3, x2, x8, w7
 	eor		v0.16b, v0.16b, v4.16b
 	st1		{v0.16b}, [x0], #16
 	subs		w4, w4, #1
@@ -450,45 +325,33 @@ AES_ENTRY(aes_xts_encrypt)
 	next_tweak	v4, v4, v7, v8
 	b		.Lxtsencloop
 .Lxtsencout:
-	FRAME_POP
+	st1		{v4.16b}, [x6]
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_xts_encrypt)
 
 
 AES_ENTRY(aes_xts_decrypt)
-	FRAME_PUSH
-	cbz		w7, .LxtsdecloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 	ld1		{v4.16b}, [x6]
-	enc_prepare	w3, x5, x6
-	encrypt_block	v4, w3, x5, x6, w7		/* first tweak */
-	dec_prepare	w3, x2, x6
+	cbz		w7, .Lxtsdecnotfirst
+
+	enc_prepare	w3, x5, x8
+	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
+	dec_prepare	w3, x2, x8
 	ldr		q7, .Lxts_mul_x
 	b		.LxtsdecNx
 
+.Lxtsdecnotfirst:
+	dec_prepare	w3, x2, x8
 .LxtsdecloopNx:
 	ldr		q7, .Lxts_mul_x
 	next_tweak	v4, v4, v7, v8
 .LxtsdecNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lxtsdec1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
-	next_tweak	v5, v4, v7, v8
-	eor		v0.16b, v0.16b, v4.16b
-	eor		v1.16b, v1.16b, v5.16b
-	do_decrypt_block2x
-	eor		v0.16b, v0.16b, v4.16b
-	eor		v1.16b, v1.16b, v5.16b
-	st1		{v0.16b-v1.16b}, [x0], #32
-	cbz		w4, .LxtsdecoutNx
-	next_tweak	v4, v5, v7, v8
-	b		.LxtsdecNx
-.LxtsdecoutNx:
-	mov		v4.16b, v5.16b
-	b		.Lxtsdecout
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
 	next_tweak	v5, v4, v7, v8
 	eor		v0.16b, v0.16b, v4.16b
@@ -497,7 +360,7 @@ AES_ENTRY(aes_xts_decrypt)
 	eor		v2.16b, v2.16b, v6.16b
 	next_tweak	v7, v6, v7, v8
 	eor		v3.16b, v3.16b, v7.16b
-	do_decrypt_block4x
+	bl		aes_decrypt_block4x
 	eor		v3.16b, v3.16b, v7.16b
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v1.16b, v1.16b, v5.16b
@@ -506,15 +369,13 @@ AES_ENTRY(aes_xts_decrypt)
 	mov		v4.16b, v7.16b
 	cbz		w4, .Lxtsdecout
 	b		.LxtsdecloopNx
-#endif
 .Lxtsdec1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lxtsdecout
-#endif
 .Lxtsdecloop:
 	ld1		{v1.16b}, [x1], #16
 	eor		v0.16b, v1.16b, v4.16b
-	decrypt_block	v0, w3, x2, x6, w7
+	decrypt_block	v0, w3, x2, x8, w7
 	eor		v0.16b, v0.16b, v4.16b
 	st1		{v0.16b}, [x0], #16
 	subs		w4, w4, #1
@@ -522,7 +383,8 @@ AES_ENTRY(aes_xts_decrypt)
 	next_tweak	v4, v4, v7, v8
 	b		.Lxtsdecloop
 .Lxtsdecout:
-	FRAME_POP
+	st1		{v4.16b}, [x6]
+	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_xts_decrypt)
 
@@ -533,8 +395,28 @@ AES_ENDPROC(aes_xts_decrypt)
 AES_ENTRY(aes_mac_update)
 	ld1		{v0.16b}, [x4]			/* get dg */
 	enc_prepare	w2, x1, x7
-	cbnz		w5, .Lmacenc
+	cbz		w5, .Lmacloop4x
+
+	encrypt_block	v0, w2, x1, x7, w8
 
+.Lmacloop4x:
+	subs		w3, w3, #4
+	bmi		.Lmac1x
+	ld1		{v1.16b-v4.16b}, [x0], #64	/* get next pt block */
+	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
+	encrypt_block	v0, w2, x1, x7, w8
+	eor		v0.16b, v0.16b, v2.16b
+	encrypt_block	v0, w2, x1, x7, w8
+	eor		v0.16b, v0.16b, v3.16b
+	encrypt_block	v0, w2, x1, x7, w8
+	eor		v0.16b, v0.16b, v4.16b
+	cmp		w3, wzr
+	csinv		x5, x6, xzr, eq
+	cbz		w5, .Lmacout
+	encrypt_block	v0, w2, x1, x7, w8
+	b		.Lmacloop4x
+.Lmac1x:
+	add		w3, w3, #4
 .Lmacloop:
 	cbz		w3, .Lmacout
 	ld1		{v1.16b}, [x0], #16		/* get next pt block */
@@ -544,7 +426,6 @@ AES_ENTRY(aes_mac_update)
 	csinv		x5, x6, xzr, eq
 	cbz		w5, .Lmacout
 
-.Lmacenc:
 	encrypt_block	v0, w2, x1, x7, w8
 	b		.Lmacloop
 

+ 22 - 26
arch/arm64/crypto/aes-neonbs-glue.c

@@ -46,10 +46,9 @@ asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
 
 /* borrowed from aes-neon-blk.ko */
 asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
-				     int rounds, int blocks, int first);
+				     int rounds, int blocks);
 asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
-				     int rounds, int blocks, u8 iv[],
-				     int first);
+				     int rounds, int blocks, u8 iv[]);
 
 struct aesbs_ctx {
 	u8	rk[13 * (8 * AES_BLOCK_SIZE) + 32];
@@ -100,9 +99,8 @@ static int __ecb_crypt(struct skcipher_request *req,
 	struct skcipher_walk walk;
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 
@@ -110,12 +108,13 @@ static int __ecb_crypt(struct skcipher_request *req,
 			blocks = round_down(blocks,
 					    walk.stride / AES_BLOCK_SIZE);
 
+		kernel_neon_begin();
 		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
 		   ctx->rounds, blocks);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 
 	return err;
 }
@@ -157,22 +156,21 @@ static int cbc_encrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_walk walk;
-	int err, first = 1;
+	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 
 		/* fall back to the non-bitsliced NEON implementation */
+		kernel_neon_begin();
 		neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				     ctx->enc, ctx->key.rounds, blocks, walk.iv,
-				     first);
+				     ctx->enc, ctx->key.rounds, blocks,
+				     walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
-		first = 0;
 	}
-	kernel_neon_end();
 	return err;
 }
 
@@ -183,9 +181,8 @@ static int cbc_decrypt(struct skcipher_request *req)
 	struct skcipher_walk walk;
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 
@@ -193,13 +190,14 @@ static int cbc_decrypt(struct skcipher_request *req)
 			blocks = round_down(blocks,
 					    walk.stride / AES_BLOCK_SIZE);
 
+		kernel_neon_begin();
 		aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				  ctx->key.rk, ctx->key.rounds, blocks,
 				  walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 
 	return err;
 }
@@ -231,9 +229,8 @@ static int ctr_encrypt(struct skcipher_request *req)
 	u8 buf[AES_BLOCK_SIZE];
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	while (walk.nbytes > 0) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 		u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
@@ -244,8 +241,10 @@ static int ctr_encrypt(struct skcipher_request *req)
 			final = NULL;
 		}
 
+		kernel_neon_begin();
 		aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				  ctx->rk, ctx->rounds, blocks, walk.iv, final);
+		kernel_neon_end();
 
 		if (final) {
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
@@ -260,8 +259,6 @@ static int ctr_encrypt(struct skcipher_request *req)
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
-
 	return err;
 }
 
@@ -306,12 +303,11 @@ static int __xts_crypt(struct skcipher_request *req,
 	struct skcipher_walk walk;
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	kernel_neon_begin();
-
-	neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey,
-			     ctx->key.rounds, 1, 1);
+	neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1);
+	kernel_neon_end();
 
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -320,13 +316,13 @@ static int __xts_crypt(struct skcipher_request *req,
 			blocks = round_down(blocks,
 					    walk.stride / AES_BLOCK_SIZE);
 
+		kernel_neon_begin();
 		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
 		   ctx->key.rounds, blocks, walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
-
 	return err;
 }
 

+ 9 - 3
arch/arm64/crypto/chacha20-neon-glue.c

@@ -37,12 +37,19 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
 	u8 buf[CHACHA20_BLOCK_SIZE];
 
 	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
+		kernel_neon_begin();
 		chacha20_4block_xor_neon(state, dst, src);
+		kernel_neon_end();
 		bytes -= CHACHA20_BLOCK_SIZE * 4;
 		src += CHACHA20_BLOCK_SIZE * 4;
 		dst += CHACHA20_BLOCK_SIZE * 4;
 		state[12] += 4;
 	}
+
+	if (!bytes)
+		return;
+
+	kernel_neon_begin();
 	while (bytes >= CHACHA20_BLOCK_SIZE) {
 		chacha20_block_xor_neon(state, dst, src);
 		bytes -= CHACHA20_BLOCK_SIZE;
@@ -55,6 +62,7 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
 		chacha20_block_xor_neon(state, buf, buf);
 		memcpy(dst, buf, bytes);
 	}
+	kernel_neon_end();
 }
 
 static int chacha20_neon(struct skcipher_request *req)
@@ -68,11 +76,10 @@ static int chacha20_neon(struct skcipher_request *req)
 	if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
 		return crypto_chacha20_crypt(req);
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	crypto_chacha20_init(state, ctx, walk.iv);
 
-	kernel_neon_begin();
 	while (walk.nbytes > 0) {
 		unsigned int nbytes = walk.nbytes;
 
@@ -83,7 +90,6 @@ static int chacha20_neon(struct skcipher_request *req)
 				nbytes);
 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
-	kernel_neon_end();
 
 	return err;
 }

+ 23 - 13
arch/arm64/crypto/sha256-glue.c

@@ -89,21 +89,32 @@ static struct shash_alg algs[] = { {
 static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
 			      unsigned int len)
 {
-	/*
-	 * Stacking and unstacking a substantial slice of the NEON register
-	 * file may significantly affect performance for small updates when
-	 * executing in interrupt context, so fall back to the scalar code
-	 * in that case.
-	 */
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
 	if (!may_use_simd())
 		return sha256_base_do_update(desc, data, len,
 				(sha256_block_fn *)sha256_block_data_order);
 
-	kernel_neon_begin();
-	sha256_base_do_update(desc, data, len,
-				(sha256_block_fn *)sha256_block_neon);
-	kernel_neon_end();
+	while (len > 0) {
+		unsigned int chunk = len;
+
+		/*
+		 * Don't hog the CPU for the entire time it takes to process all
+		 * input when running on a preemptible kernel, but process the
+		 * data block by block instead.
+		 */
+		if (IS_ENABLED(CONFIG_PREEMPT) &&
+		    chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE)
+			chunk = SHA256_BLOCK_SIZE -
+				sctx->count % SHA256_BLOCK_SIZE;
 
+		kernel_neon_begin();
+		sha256_base_do_update(desc, data, chunk,
+				      (sha256_block_fn *)sha256_block_neon);
+		kernel_neon_end();
+		data += chunk;
+		len -= chunk;
+	}
 	return 0;
 }
 
@@ -117,10 +128,9 @@ static int sha256_finup_neon(struct shash_desc *desc, const u8 *data,
 		sha256_base_do_finalize(desc,
 				(sha256_block_fn *)sha256_block_data_order);
 	} else {
-		kernel_neon_begin();
 		if (len)
-			sha256_base_do_update(desc, data, len,
-				(sha256_block_fn *)sha256_block_neon);
+			sha256_update_neon(desc, data, len);
+		kernel_neon_begin();
 		sha256_base_do_finalize(desc,
 				(sha256_block_fn *)sha256_block_neon);
 		kernel_neon_end();

+ 352 - 0
arch/arm64/crypto/speck-neon-core.S

@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
+ *
+ * Copyright (c) 2018 Google, Inc
+ *
+ * Author: Eric Biggers <ebiggers@google.com>
+ */
+
+#include <linux/linkage.h>
+
+	.text
+
+	// arguments
+	ROUND_KEYS	.req	x0	// const {u64,u32} *round_keys
+	NROUNDS		.req	w1	// int nrounds
+	NROUNDS_X	.req	x1
+	DST		.req	x2	// void *dst
+	SRC		.req	x3	// const void *src
+	NBYTES		.req	w4	// unsigned int nbytes
+	TWEAK		.req	x5	// void *tweak
+
+	// registers which hold the data being encrypted/decrypted
+	// (underscores avoid a naming collision with ARM64 registers x0-x3)
+	X_0		.req	v0
+	Y_0		.req	v1
+	X_1		.req	v2
+	Y_1		.req	v3
+	X_2		.req	v4
+	Y_2		.req	v5
+	X_3		.req	v6
+	Y_3		.req	v7
+
+	// the round key, duplicated in all lanes
+	ROUND_KEY	.req	v8
+
+	// index vector for tbl-based 8-bit rotates
+	ROTATE_TABLE	.req	v9
+	ROTATE_TABLE_Q	.req	q9
+
+	// temporary registers
+	TMP0		.req	v10
+	TMP1		.req	v11
+	TMP2		.req	v12
+	TMP3		.req	v13
+
+	// multiplication table for updating XTS tweaks
+	GFMUL_TABLE	.req	v14
+	GFMUL_TABLE_Q	.req	q14
+
+	// next XTS tweak value(s)
+	TWEAKV_NEXT	.req	v15
+
+	// XTS tweaks for the blocks currently being encrypted/decrypted
+	TWEAKV0		.req	v16
+	TWEAKV1		.req	v17
+	TWEAKV2		.req	v18
+	TWEAKV3		.req	v19
+	TWEAKV4		.req	v20
+	TWEAKV5		.req	v21
+	TWEAKV6		.req	v22
+	TWEAKV7		.req	v23
+
+	.align		4
+.Lror64_8_table:
+	.octa		0x080f0e0d0c0b0a090007060504030201
+.Lror32_8_table:
+	.octa		0x0c0f0e0d080b0a090407060500030201
+.Lrol64_8_table:
+	.octa		0x0e0d0c0b0a09080f0605040302010007
+.Lrol32_8_table:
+	.octa		0x0e0d0c0f0a09080b0605040702010003
+.Lgf128mul_table:
+	.octa		0x00000000000000870000000000000001
+.Lgf64mul_table:
+	.octa		0x0000000000000000000000002d361b00
+
+/*
+ * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
+ *
+ * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
+ * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
+ * of ROUND_KEY.  'n' is the lane size: 64 for Speck128, or 32 for Speck64.
+ * 'lanes' is the lane specifier: "2d" for Speck128 or "4s" for Speck64.
+ */
+.macro _speck_round_128bytes	n, lanes
+
+	// x = ror(x, 8)
+	tbl		X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
+	tbl		X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
+	tbl		X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
+	tbl		X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
+
+	// x += y
+	add		X_0.\lanes, X_0.\lanes, Y_0.\lanes
+	add		X_1.\lanes, X_1.\lanes, Y_1.\lanes
+	add		X_2.\lanes, X_2.\lanes, Y_2.\lanes
+	add		X_3.\lanes, X_3.\lanes, Y_3.\lanes
+
+	// x ^= k
+	eor		X_0.16b, X_0.16b, ROUND_KEY.16b
+	eor		X_1.16b, X_1.16b, ROUND_KEY.16b
+	eor		X_2.16b, X_2.16b, ROUND_KEY.16b
+	eor		X_3.16b, X_3.16b, ROUND_KEY.16b
+
+	// y = rol(y, 3)
+	shl		TMP0.\lanes, Y_0.\lanes, #3
+	shl		TMP1.\lanes, Y_1.\lanes, #3
+	shl		TMP2.\lanes, Y_2.\lanes, #3
+	shl		TMP3.\lanes, Y_3.\lanes, #3
+	sri		TMP0.\lanes, Y_0.\lanes, #(\n - 3)
+	sri		TMP1.\lanes, Y_1.\lanes, #(\n - 3)
+	sri		TMP2.\lanes, Y_2.\lanes, #(\n - 3)
+	sri		TMP3.\lanes, Y_3.\lanes, #(\n - 3)
+
+	// y ^= x
+	eor		Y_0.16b, TMP0.16b, X_0.16b
+	eor		Y_1.16b, TMP1.16b, X_1.16b
+	eor		Y_2.16b, TMP2.16b, X_2.16b
+	eor		Y_3.16b, TMP3.16b, X_3.16b
+.endm
+
+/*
+ * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
+ *
+ * This is the inverse of _speck_round_128bytes().
+ */
+.macro _speck_unround_128bytes	n, lanes
+
+	// y ^= x
+	eor		TMP0.16b, Y_0.16b, X_0.16b
+	eor		TMP1.16b, Y_1.16b, X_1.16b
+	eor		TMP2.16b, Y_2.16b, X_2.16b
+	eor		TMP3.16b, Y_3.16b, X_3.16b
+
+	// y = ror(y, 3)
+	ushr		Y_0.\lanes, TMP0.\lanes, #3
+	ushr		Y_1.\lanes, TMP1.\lanes, #3
+	ushr		Y_2.\lanes, TMP2.\lanes, #3
+	ushr		Y_3.\lanes, TMP3.\lanes, #3
+	sli		Y_0.\lanes, TMP0.\lanes, #(\n - 3)
+	sli		Y_1.\lanes, TMP1.\lanes, #(\n - 3)
+	sli		Y_2.\lanes, TMP2.\lanes, #(\n - 3)
+	sli		Y_3.\lanes, TMP3.\lanes, #(\n - 3)
+
+	// x ^= k
+	eor		X_0.16b, X_0.16b, ROUND_KEY.16b
+	eor		X_1.16b, X_1.16b, ROUND_KEY.16b
+	eor		X_2.16b, X_2.16b, ROUND_KEY.16b
+	eor		X_3.16b, X_3.16b, ROUND_KEY.16b
+
+	// x -= y
+	sub		X_0.\lanes, X_0.\lanes, Y_0.\lanes
+	sub		X_1.\lanes, X_1.\lanes, Y_1.\lanes
+	sub		X_2.\lanes, X_2.\lanes, Y_2.\lanes
+	sub		X_3.\lanes, X_3.\lanes, Y_3.\lanes
+
+	// x = rol(x, 8)
+	tbl		X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
+	tbl		X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
+	tbl		X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
+	tbl		X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
+.endm
+
+.macro _next_xts_tweak	next, cur, tmp, n
+.if \n == 64
+	/*
+	 * Calculate the next tweak by multiplying the current one by x,
+	 * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
+	 */
+	sshr		\tmp\().2d, \cur\().2d, #63
+	and		\tmp\().16b, \tmp\().16b, GFMUL_TABLE.16b
+	shl		\next\().2d, \cur\().2d, #1
+	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
+	eor		\next\().16b, \next\().16b, \tmp\().16b
+.else
+	/*
+	 * Calculate the next two tweaks by multiplying the current ones by x^2,
+	 * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
+	 */
+	ushr		\tmp\().2d, \cur\().2d, #62
+	shl		\next\().2d, \cur\().2d, #2
+	tbl		\tmp\().16b, {GFMUL_TABLE.16b}, \tmp\().16b
+	eor		\next\().16b, \next\().16b, \tmp\().16b
+.endif
+.endm
+
+/*
+ * _speck_xts_crypt() - Speck-XTS encryption/decryption
+ *
+ * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
+ * using Speck-XTS, specifically the variant with a block size of '2n' and round
+ * count given by NROUNDS.  The expanded round keys are given in ROUND_KEYS, and
+ * the current XTS tweak value is given in TWEAK.  It's assumed that NBYTES is a
+ * nonzero multiple of 128.
+ */
+.macro _speck_xts_crypt	n, lanes, decrypting
+
+	/*
+	 * If decrypting, modify the ROUND_KEYS parameter to point to the last
+	 * round key rather than the first, since for decryption the round keys
+	 * are used in reverse order.
+	 */
+.if \decrypting
+	mov		NROUNDS, NROUNDS	/* zero the high 32 bits */
+.if \n == 64
+	add		ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #3
+	sub		ROUND_KEYS, ROUND_KEYS, #8
+.else
+	add		ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #2
+	sub		ROUND_KEYS, ROUND_KEYS, #4
+.endif
+.endif
+
+	// Load the index vector for tbl-based 8-bit rotates
+.if \decrypting
+	ldr		ROTATE_TABLE_Q, .Lrol\n\()_8_table
+.else
+	ldr		ROTATE_TABLE_Q, .Lror\n\()_8_table
+.endif
+
+	// One-time XTS preparation
+.if \n == 64
+	// Load first tweak
+	ld1		{TWEAKV0.16b}, [TWEAK]
+
+	// Load GF(2^128) multiplication table
+	ldr		GFMUL_TABLE_Q, .Lgf128mul_table
+.else
+	// Load first tweak
+	ld1		{TWEAKV0.8b}, [TWEAK]
+
+	// Load GF(2^64) multiplication table
+	ldr		GFMUL_TABLE_Q, .Lgf64mul_table
+
+	// Calculate second tweak, packing it together with the first
+	ushr		TMP0.2d, TWEAKV0.2d, #63
+	shl		TMP1.2d, TWEAKV0.2d, #1
+	tbl		TMP0.8b, {GFMUL_TABLE.16b}, TMP0.8b
+	eor		TMP0.8b, TMP0.8b, TMP1.8b
+	mov		TWEAKV0.d[1], TMP0.d[0]
+.endif
+
+.Lnext_128bytes_\@:
+
+	// Calculate XTS tweaks for next 128 bytes
+	_next_xts_tweak	TWEAKV1, TWEAKV0, TMP0, \n
+	_next_xts_tweak	TWEAKV2, TWEAKV1, TMP0, \n
+	_next_xts_tweak	TWEAKV3, TWEAKV2, TMP0, \n
+	_next_xts_tweak	TWEAKV4, TWEAKV3, TMP0, \n
+	_next_xts_tweak	TWEAKV5, TWEAKV4, TMP0, \n
+	_next_xts_tweak	TWEAKV6, TWEAKV5, TMP0, \n
+	_next_xts_tweak	TWEAKV7, TWEAKV6, TMP0, \n
+	_next_xts_tweak	TWEAKV_NEXT, TWEAKV7, TMP0, \n
+
+	// Load the next source blocks into {X,Y}[0-3]
+	ld1		{X_0.16b-Y_1.16b}, [SRC], #64
+	ld1		{X_2.16b-Y_3.16b}, [SRC], #64
+
+	// XOR the source blocks with their XTS tweaks
+	eor		TMP0.16b, X_0.16b, TWEAKV0.16b
+	eor		Y_0.16b,  Y_0.16b, TWEAKV1.16b
+	eor		TMP1.16b, X_1.16b, TWEAKV2.16b
+	eor		Y_1.16b,  Y_1.16b, TWEAKV3.16b
+	eor		TMP2.16b, X_2.16b, TWEAKV4.16b
+	eor		Y_2.16b,  Y_2.16b, TWEAKV5.16b
+	eor		TMP3.16b, X_3.16b, TWEAKV6.16b
+	eor		Y_3.16b,  Y_3.16b, TWEAKV7.16b
+
+	/*
+	 * De-interleave the 'x' and 'y' elements of each block, i.e. make it so
+	 * that the X[0-3] registers contain only the second halves of blocks,
+	 * and the Y[0-3] registers contain only the first halves of blocks.
+	 * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
+	 */
+	uzp2		X_0.\lanes, TMP0.\lanes, Y_0.\lanes
+	uzp1		Y_0.\lanes, TMP0.\lanes, Y_0.\lanes
+	uzp2		X_1.\lanes, TMP1.\lanes, Y_1.\lanes
+	uzp1		Y_1.\lanes, TMP1.\lanes, Y_1.\lanes
+	uzp2		X_2.\lanes, TMP2.\lanes, Y_2.\lanes
+	uzp1		Y_2.\lanes, TMP2.\lanes, Y_2.\lanes
+	uzp2		X_3.\lanes, TMP3.\lanes, Y_3.\lanes
+	uzp1		Y_3.\lanes, TMP3.\lanes, Y_3.\lanes
+
+	// Do the cipher rounds
+	mov		x6, ROUND_KEYS
+	mov		w7, NROUNDS
+.Lnext_round_\@:
+.if \decrypting
+	ld1r		{ROUND_KEY.\lanes}, [x6]
+	sub		x6, x6, #( \n / 8 )
+	_speck_unround_128bytes	\n, \lanes
+.else
+	ld1r		{ROUND_KEY.\lanes}, [x6], #( \n / 8 )
+	_speck_round_128bytes	\n, \lanes
+.endif
+	subs		w7, w7, #1
+	bne		.Lnext_round_\@
+
+	// Re-interleave the 'x' and 'y' elements of each block
+	zip1		TMP0.\lanes, Y_0.\lanes, X_0.\lanes
+	zip2		Y_0.\lanes,  Y_0.\lanes, X_0.\lanes
+	zip1		TMP1.\lanes, Y_1.\lanes, X_1.\lanes
+	zip2		Y_1.\lanes,  Y_1.\lanes, X_1.\lanes
+	zip1		TMP2.\lanes, Y_2.\lanes, X_2.\lanes
+	zip2		Y_2.\lanes,  Y_2.\lanes, X_2.\lanes
+	zip1		TMP3.\lanes, Y_3.\lanes, X_3.\lanes
+	zip2		Y_3.\lanes,  Y_3.\lanes, X_3.\lanes
+
+	// XOR the encrypted/decrypted blocks with the tweaks calculated earlier
+	eor		X_0.16b, TMP0.16b, TWEAKV0.16b
+	eor		Y_0.16b, Y_0.16b,  TWEAKV1.16b
+	eor		X_1.16b, TMP1.16b, TWEAKV2.16b
+	eor		Y_1.16b, Y_1.16b,  TWEAKV3.16b
+	eor		X_2.16b, TMP2.16b, TWEAKV4.16b
+	eor		Y_2.16b, Y_2.16b,  TWEAKV5.16b
+	eor		X_3.16b, TMP3.16b, TWEAKV6.16b
+	eor		Y_3.16b, Y_3.16b,  TWEAKV7.16b
+	mov		TWEAKV0.16b, TWEAKV_NEXT.16b
+
+	// Store the ciphertext in the destination buffer
+	st1		{X_0.16b-Y_1.16b}, [DST], #64
+	st1		{X_2.16b-Y_3.16b}, [DST], #64
+
+	// Continue if there are more 128-byte chunks remaining
+	subs		NBYTES, NBYTES, #128
+	bne		.Lnext_128bytes_\@
+
+	// Store the next tweak and return
+.if \n == 64
+	st1		{TWEAKV_NEXT.16b}, [TWEAK]
+.else
+	st1		{TWEAKV_NEXT.8b}, [TWEAK]
+.endif
+	ret
+.endm
+
+ENTRY(speck128_xts_encrypt_neon)
+	_speck_xts_crypt	n=64, lanes=2d, decrypting=0
+ENDPROC(speck128_xts_encrypt_neon)
+
+ENTRY(speck128_xts_decrypt_neon)
+	_speck_xts_crypt	n=64, lanes=2d, decrypting=1
+ENDPROC(speck128_xts_decrypt_neon)
+
+ENTRY(speck64_xts_encrypt_neon)
+	_speck_xts_crypt	n=32, lanes=4s, decrypting=0
+ENDPROC(speck64_xts_encrypt_neon)
+
+ENTRY(speck64_xts_decrypt_neon)
+	_speck_xts_crypt	n=32, lanes=4s, decrypting=1
+ENDPROC(speck64_xts_decrypt_neon)

+ 282 - 0
arch/arm64/crypto/speck-neon-glue.c

@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
+ * (64-bit version; based on the 32-bit version)
+ *
+ * Copyright (c) 2018 Google, Inc
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/algapi.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/speck.h>
+#include <crypto/xts.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+/* The assembly functions only handle multiples of 128 bytes */
+#define SPECK_NEON_CHUNK_SIZE	128
+
+/* Speck128 */
+
+struct speck128_xts_tfm_ctx {
+	struct speck128_tfm_ctx main_key;
+	struct speck128_tfm_ctx tweak_key;
+};
+
+asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
+					  void *dst, const void *src,
+					  unsigned int nbytes, void *tweak);
+
+asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
+					  void *dst, const void *src,
+					  unsigned int nbytes, void *tweak);
+
+typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
+				     u8 *, const u8 *);
+typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
+					  const void *, unsigned int, void *);
+
+static __always_inline int
+__speck128_xts_crypt(struct skcipher_request *req,
+		     speck128_crypt_one_t crypt_one,
+		     speck128_xts_crypt_many_t crypt_many)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	le128 tweak;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+		u8 *dst = walk.dst.virt.addr;
+		const u8 *src = walk.src.virt.addr;
+
+		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
+			unsigned int count;
+
+			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
+			kernel_neon_begin();
+			(*crypt_many)(ctx->main_key.round_keys,
+				      ctx->main_key.nrounds,
+				      dst, src, count, &tweak);
+			kernel_neon_end();
+			dst += count;
+			src += count;
+			nbytes -= count;
+		}
+
+		/* Handle any remainder with generic code */
+		while (nbytes >= sizeof(tweak)) {
+			le128_xor((le128 *)dst, (const le128 *)src, &tweak);
+			(*crypt_one)(&ctx->main_key, dst, dst);
+			le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
+			gf128mul_x_ble(&tweak, &tweak);
+
+			dst += sizeof(tweak);
+			src += sizeof(tweak);
+			nbytes -= sizeof(tweak);
+		}
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+
+	return err;
+}
+
+static int speck128_xts_encrypt(struct skcipher_request *req)
+{
+	return __speck128_xts_crypt(req, crypto_speck128_encrypt,
+				    speck128_xts_encrypt_neon);
+}
+
+static int speck128_xts_decrypt(struct skcipher_request *req)
+{
+	return __speck128_xts_crypt(req, crypto_speck128_decrypt,
+				    speck128_xts_decrypt_neon);
+}
+
+static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int keylen)
+{
+	struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	keylen /= 2;
+
+	err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
+	if (err)
+		return err;
+
+	return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
+}
+
+/* Speck64 */
+
+struct speck64_xts_tfm_ctx {
+	struct speck64_tfm_ctx main_key;
+	struct speck64_tfm_ctx tweak_key;
+};
+
+asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
+					 void *dst, const void *src,
+					 unsigned int nbytes, void *tweak);
+
+asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
+					 void *dst, const void *src,
+					 unsigned int nbytes, void *tweak);
+
+typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
+				    u8 *, const u8 *);
+typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
+					 const void *, unsigned int, void *);
+
+static __always_inline int
+__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
+		    speck64_xts_crypt_many_t crypt_many)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	__le64 tweak;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+		u8 *dst = walk.dst.virt.addr;
+		const u8 *src = walk.src.virt.addr;
+
+		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
+			unsigned int count;
+
+			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
+			kernel_neon_begin();
+			(*crypt_many)(ctx->main_key.round_keys,
+				      ctx->main_key.nrounds,
+				      dst, src, count, &tweak);
+			kernel_neon_end();
+			dst += count;
+			src += count;
+			nbytes -= count;
+		}
+
+		/* Handle any remainder with generic code */
+		while (nbytes >= sizeof(tweak)) {
+			*(__le64 *)dst = *(__le64 *)src ^ tweak;
+			(*crypt_one)(&ctx->main_key, dst, dst);
+			*(__le64 *)dst ^= tweak;
+			tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
+					    ((tweak & cpu_to_le64(1ULL << 63)) ?
+					     0x1B : 0));
+			dst += sizeof(tweak);
+			src += sizeof(tweak);
+			nbytes -= sizeof(tweak);
+		}
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+
+	return err;
+}
+
+static int speck64_xts_encrypt(struct skcipher_request *req)
+{
+	return __speck64_xts_crypt(req, crypto_speck64_encrypt,
+				   speck64_xts_encrypt_neon);
+}
+
+static int speck64_xts_decrypt(struct skcipher_request *req)
+{
+	return __speck64_xts_crypt(req, crypto_speck64_decrypt,
+				   speck64_xts_decrypt_neon);
+}
+
+static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	keylen /= 2;
+
+	err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
+	if (err)
+		return err;
+
+	return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
+}
+
+static struct skcipher_alg speck_algs[] = {
+	{
+		.base.cra_name		= "xts(speck128)",
+		.base.cra_driver_name	= "xts-speck128-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= SPECK128_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct speck128_xts_tfm_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SPECK128_128_KEY_SIZE,
+		.max_keysize		= 2 * SPECK128_256_KEY_SIZE,
+		.ivsize			= SPECK128_BLOCK_SIZE,
+		.walksize		= SPECK_NEON_CHUNK_SIZE,
+		.setkey			= speck128_xts_setkey,
+		.encrypt		= speck128_xts_encrypt,
+		.decrypt		= speck128_xts_decrypt,
+	}, {
+		.base.cra_name		= "xts(speck64)",
+		.base.cra_driver_name	= "xts-speck64-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= SPECK64_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct speck64_xts_tfm_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SPECK64_96_KEY_SIZE,
+		.max_keysize		= 2 * SPECK64_128_KEY_SIZE,
+		.ivsize			= SPECK64_BLOCK_SIZE,
+		.walksize		= SPECK_NEON_CHUNK_SIZE,
+		.setkey			= speck64_xts_setkey,
+		.encrypt		= speck64_xts_encrypt,
+		.decrypt		= speck64_xts_decrypt,
+	}
+};
+
+static int __init speck_neon_module_init(void)
+{
+	if (!(elf_hwcap & HWCAP_ASIMD))
+		return -ENODEV;
+	return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+static void __exit speck_neon_module_exit(void)
+{
+	crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+module_init(speck_neon_module_init);
+module_exit(speck_neon_module_exit);
+
+MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("xts(speck128)");
+MODULE_ALIAS_CRYPTO("xts-speck128-neon");
+MODULE_ALIAS_CRYPTO("xts(speck64)");
+MODULE_ALIAS_CRYPTO("xts-speck64-neon");

+ 693 - 721
arch/x86/crypto/aesni-intel_asm.S

@@ -94,23 +94,30 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
 
 
 #define	STACK_OFFSET    8*3
-#define	HashKey		16*0	// store HashKey <<1 mod poly here
-#define	HashKey_2	16*1	// store HashKey^2 <<1 mod poly here
-#define	HashKey_3	16*2	// store HashKey^3 <<1 mod poly here
-#define	HashKey_4	16*3	// store HashKey^4 <<1 mod poly here
-#define	HashKey_k	16*4	// store XOR of High 64 bits and Low 64
+
+#define AadHash 16*0
+#define AadLen 16*1
+#define InLen (16*1)+8
+#define PBlockEncKey 16*2
+#define OrigIV 16*3
+#define CurCount 16*4
+#define PBlockLen 16*5
+#define	HashKey		16*6	// store HashKey <<1 mod poly here
+#define	HashKey_2	16*7	// store HashKey^2 <<1 mod poly here
+#define	HashKey_3	16*8	// store HashKey^3 <<1 mod poly here
+#define	HashKey_4	16*9	// store HashKey^4 <<1 mod poly here
+#define	HashKey_k	16*10	// store XOR of High 64 bits and Low 64
 				// bits of  HashKey <<1 mod poly here
 				//(for Karatsuba purposes)
-#define	HashKey_2_k	16*5	// store XOR of High 64 bits and Low 64
+#define	HashKey_2_k	16*11	// store XOR of High 64 bits and Low 64
 				// bits of  HashKey^2 <<1 mod poly here
 				// (for Karatsuba purposes)
-#define	HashKey_3_k	16*6	// store XOR of High 64 bits and Low 64
+#define	HashKey_3_k	16*12	// store XOR of High 64 bits and Low 64
 				// bits of  HashKey^3 <<1 mod poly here
 				// (for Karatsuba purposes)
-#define	HashKey_4_k	16*7	// store XOR of High 64 bits and Low 64
+#define	HashKey_4_k	16*13	// store XOR of High 64 bits and Low 64
 				// bits of  HashKey^4 <<1 mod poly here
 				// (for Karatsuba purposes)
-#define	VARIABLE_OFFSET	16*8
 
 #define arg1 rdi
 #define arg2 rsi
@@ -118,10 +125,11 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
 #define arg4 rcx
 #define arg5 r8
 #define arg6 r9
-#define arg7 STACK_OFFSET+8(%r14)
-#define arg8 STACK_OFFSET+16(%r14)
-#define arg9 STACK_OFFSET+24(%r14)
-#define arg10 STACK_OFFSET+32(%r14)
+#define arg7 STACK_OFFSET+8(%rsp)
+#define arg8 STACK_OFFSET+16(%rsp)
+#define arg9 STACK_OFFSET+24(%rsp)
+#define arg10 STACK_OFFSET+32(%rsp)
+#define arg11 STACK_OFFSET+40(%rsp)
 #define keysize 2*15*16(%arg1)
 #endif
 
@@ -171,6 +179,332 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
 #define TKEYP	T1
 #endif
 
+.macro FUNC_SAVE
+	push	%r12
+	push	%r13
+	push	%r14
+#
+# states of %xmm registers %xmm6:%xmm15 not saved
+# all %xmm registers are clobbered
+#
+.endm
+
+
+.macro FUNC_RESTORE
+	pop	%r14
+	pop	%r13
+	pop	%r12
+.endm
+
+# Precompute hashkeys.
+# Input: Hash subkey.
+# Output: HashKeys stored in gcm_context_data.  Only needs to be called
+# once per key.
+# clobbers r12, and tmp xmm registers.
+.macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7
+	mov	\SUBKEY, %r12
+	movdqu	(%r12), \TMP3
+	movdqa	SHUF_MASK(%rip), \TMP2
+	PSHUFB_XMM \TMP2, \TMP3
+
+	# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
+
+	movdqa	\TMP3, \TMP2
+	psllq	$1, \TMP3
+	psrlq	$63, \TMP2
+	movdqa	\TMP2, \TMP1
+	pslldq	$8, \TMP2
+	psrldq	$8, \TMP1
+	por	\TMP2, \TMP3
+
+	# reduce HashKey<<1
+
+	pshufd	$0x24, \TMP1, \TMP2
+	pcmpeqd TWOONE(%rip), \TMP2
+	pand	POLY(%rip), \TMP2
+	pxor	\TMP2, \TMP3
+	movdqa	\TMP3, HashKey(%arg2)
+
+	movdqa	   \TMP3, \TMP5
+	pshufd	   $78, \TMP3, \TMP1
+	pxor	   \TMP3, \TMP1
+	movdqa	   \TMP1, HashKey_k(%arg2)
+
+	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^2<<1 (mod poly)
+	movdqa	   \TMP5, HashKey_2(%arg2)
+# HashKey_2 = HashKey^2<<1 (mod poly)
+	pshufd	   $78, \TMP5, \TMP1
+	pxor	   \TMP5, \TMP1
+	movdqa	   \TMP1, HashKey_2_k(%arg2)
+
+	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^3<<1 (mod poly)
+	movdqa	   \TMP5, HashKey_3(%arg2)
+	pshufd	   $78, \TMP5, \TMP1
+	pxor	   \TMP5, \TMP1
+	movdqa	   \TMP1, HashKey_3_k(%arg2)
+
+	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^3<<1 (mod poly)
+	movdqa	   \TMP5, HashKey_4(%arg2)
+	pshufd	   $78, \TMP5, \TMP1
+	pxor	   \TMP5, \TMP1
+	movdqa	   \TMP1, HashKey_4_k(%arg2)
+.endm
+
+# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
+# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13
+.macro GCM_INIT Iv SUBKEY AAD AADLEN
+	mov \AADLEN, %r11
+	mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
+	xor %r11, %r11
+	mov %r11, InLen(%arg2) # ctx_data.in_length = 0
+	mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
+	mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
+	mov \Iv, %rax
+	movdqu (%rax), %xmm0
+	movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv
+
+	movdqa  SHUF_MASK(%rip), %xmm2
+	PSHUFB_XMM %xmm2, %xmm0
+	movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
+
+	PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+	movdqa HashKey(%arg2), %xmm13
+
+	CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
+	%xmm4, %xmm5, %xmm6
+.endm
+
+# GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context
+# struct has been initialized by GCM_INIT.
+# Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK
+# Clobbers rax, r10-r13, and xmm0-xmm15
+.macro GCM_ENC_DEC operation
+	movdqu AadHash(%arg2), %xmm8
+	movdqu HashKey(%arg2), %xmm13
+	add %arg5, InLen(%arg2)
+
+	xor %r11, %r11 # initialise the data pointer offset as zero
+	PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
+
+	sub %r11, %arg5		# sub partial block data used
+	mov %arg5, %r13		# save the number of bytes
+
+	and $-16, %r13		# %r13 = %r13 - (%r13 mod 16)
+	mov %r13, %r12
+	# Encrypt/Decrypt first few blocks
+
+	and	$(3<<4), %r12
+	jz	_initial_num_blocks_is_0_\@
+	cmp	$(2<<4), %r12
+	jb	_initial_num_blocks_is_1_\@
+	je	_initial_num_blocks_is_2_\@
+_initial_num_blocks_is_3_\@:
+	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation
+	sub	$48, %r13
+	jmp	_initial_blocks_\@
+_initial_num_blocks_is_2_\@:
+	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation
+	sub	$32, %r13
+	jmp	_initial_blocks_\@
+_initial_num_blocks_is_1_\@:
+	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation
+	sub	$16, %r13
+	jmp	_initial_blocks_\@
+_initial_num_blocks_is_0_\@:
+	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation
+_initial_blocks_\@:
+
+	# Main loop - Encrypt/Decrypt remaining blocks
+
+	cmp	$0, %r13
+	je	_zero_cipher_left_\@
+	sub	$64, %r13
+	je	_four_cipher_left_\@
+_crypt_by_4_\@:
+	GHASH_4_ENCRYPT_4_PARALLEL_\operation	%xmm9, %xmm10, %xmm11, %xmm12, \
+	%xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \
+	%xmm7, %xmm8, enc
+	add	$64, %r11
+	sub	$64, %r13
+	jne	_crypt_by_4_\@
+_four_cipher_left_\@:
+	GHASH_LAST_4	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
+%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
+_zero_cipher_left_\@:
+	movdqu %xmm8, AadHash(%arg2)
+	movdqu %xmm0, CurCount(%arg2)
+
+	mov	%arg5, %r13
+	and	$15, %r13			# %r13 = arg5 (mod 16)
+	je	_multiple_of_16_bytes_\@
+
+	mov %r13, PBlockLen(%arg2)
+
+	# Handle the last <16 Byte block separately
+	paddd ONE(%rip), %xmm0                # INCR CNT to get Yn
+	movdqu %xmm0, CurCount(%arg2)
+	movdqa SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM %xmm10, %xmm0
+
+	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm1        # Encrypt(K, Yn)
+	movdqu %xmm0, PBlockEncKey(%arg2)
+
+	cmp	$16, %arg5
+	jge _large_enough_update_\@
+
+	lea (%arg4,%r11,1), %r10
+	mov %r13, %r12
+	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
+	jmp _data_read_\@
+
+_large_enough_update_\@:
+	sub	$16, %r11
+	add	%r13, %r11
+
+	# receive the last <16 Byte block
+	movdqu	(%arg4, %r11, 1), %xmm1
+
+	sub	%r13, %r11
+	add	$16, %r11
+
+	lea	SHIFT_MASK+16(%rip), %r12
+	# adjust the shuffle mask pointer to be able to shift 16-r13 bytes
+	# (r13 is the number of bytes in plaintext mod 16)
+	sub	%r13, %r12
+	# get the appropriate shuffle mask
+	movdqu	(%r12), %xmm2
+	# shift right 16-r13 bytes
+	PSHUFB_XMM  %xmm2, %xmm1
+
+_data_read_\@:
+	lea ALL_F+16(%rip), %r12
+	sub %r13, %r12
+
+.ifc \operation, dec
+	movdqa  %xmm1, %xmm2
+.endif
+	pxor	%xmm1, %xmm0            # XOR Encrypt(K, Yn)
+	movdqu	(%r12), %xmm1
+	# get the appropriate mask to mask out top 16-r13 bytes of xmm0
+	pand	%xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
+.ifc \operation, dec
+	pand    %xmm1, %xmm2
+	movdqa SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM %xmm10 ,%xmm2
+
+	pxor %xmm2, %xmm8
+.else
+	movdqa SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM %xmm10,%xmm0
+
+	pxor	%xmm0, %xmm8
+.endif
+
+	movdqu %xmm8, AadHash(%arg2)
+.ifc \operation, enc
+	# GHASH computation for the last <16 byte block
+	movdqa SHUF_MASK(%rip), %xmm10
+	# shuffle xmm0 back to output as ciphertext
+	PSHUFB_XMM %xmm10, %xmm0
+.endif
+
+	# Output %r13 bytes
+	MOVQ_R64_XMM %xmm0, %rax
+	cmp $8, %r13
+	jle _less_than_8_bytes_left_\@
+	mov %rax, (%arg3 , %r11, 1)
+	add $8, %r11
+	psrldq $8, %xmm0
+	MOVQ_R64_XMM %xmm0, %rax
+	sub $8, %r13
+_less_than_8_bytes_left_\@:
+	mov %al,  (%arg3, %r11, 1)
+	add $1, %r11
+	shr $8, %rax
+	sub $1, %r13
+	jne _less_than_8_bytes_left_\@
+_multiple_of_16_bytes_\@:
+.endm
+
+# GCM_COMPLETE Finishes update of tag of last partial block
+# Output: Authorization Tag (AUTH_TAG)
+# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
+.macro GCM_COMPLETE AUTHTAG AUTHTAGLEN
+	movdqu AadHash(%arg2), %xmm8
+	movdqu HashKey(%arg2), %xmm13
+
+	mov PBlockLen(%arg2), %r12
+
+	cmp $0, %r12
+	je _partial_done\@
+
+	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+
+_partial_done\@:
+	mov AadLen(%arg2), %r12  # %r13 = aadLen (number of bytes)
+	shl	$3, %r12		  # convert into number of bits
+	movd	%r12d, %xmm15		  # len(A) in %xmm15
+	mov InLen(%arg2), %r12
+	shl     $3, %r12                  # len(C) in bits (*128)
+	MOVQ_R64_XMM    %r12, %xmm1
+
+	pslldq	$8, %xmm15		  # %xmm15 = len(A)||0x0000000000000000
+	pxor	%xmm1, %xmm15		  # %xmm15 = len(A)||len(C)
+	pxor	%xmm15, %xmm8
+	GHASH_MUL	%xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+	# final GHASH computation
+	movdqa SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM %xmm10, %xmm8
+
+	movdqu OrigIV(%arg2), %xmm0       # %xmm0 = Y0
+	ENCRYPT_SINGLE_BLOCK	%xmm0,  %xmm1	  # E(K, Y0)
+	pxor	%xmm8, %xmm0
+_return_T_\@:
+	mov	\AUTHTAG, %r10                     # %r10 = authTag
+	mov	\AUTHTAGLEN, %r11                    # %r11 = auth_tag_len
+	cmp	$16, %r11
+	je	_T_16_\@
+	cmp	$8, %r11
+	jl	_T_4_\@
+_T_8_\@:
+	MOVQ_R64_XMM	%xmm0, %rax
+	mov	%rax, (%r10)
+	add	$8, %r10
+	sub	$8, %r11
+	psrldq	$8, %xmm0
+	cmp	$0, %r11
+	je	_return_T_done_\@
+_T_4_\@:
+	movd	%xmm0, %eax
+	mov	%eax, (%r10)
+	add	$4, %r10
+	sub	$4, %r11
+	psrldq	$4, %xmm0
+	cmp	$0, %r11
+	je	_return_T_done_\@
+_T_123_\@:
+	movd	%xmm0, %eax
+	cmp	$2, %r11
+	jl	_T_1_\@
+	mov	%ax, (%r10)
+	cmp	$2, %r11
+	je	_return_T_done_\@
+	add	$2, %r10
+	sar	$16, %eax
+_T_1_\@:
+	mov	%al, (%r10)
+	jmp	_return_T_done_\@
+_T_16_\@:
+	movdqu	%xmm0, (%r10)
+_return_T_done_\@:
+.endm
 
 #ifdef __x86_64__
 /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
@@ -264,232 +598,188 @@ _read_next_byte_lt8_\@:
 _done_read_partial_block_\@:
 .endm
 
-/*
-* if a = number of total plaintext bytes
-* b = floor(a/16)
-* num_initial_blocks = b mod 4
-* encrypt the initial num_initial_blocks blocks and apply ghash on
-* the ciphertext
-* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
-* are clobbered
-* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
-*/
-
-
-.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
-XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
-        MOVADQ     SHUF_MASK(%rip), %xmm14
-	mov	   arg7, %r10           # %r10 = AAD
-	mov	   arg8, %r11           # %r11 = aadLen
-	pxor	   %xmm\i, %xmm\i
-	pxor       \XMM2, \XMM2
+# CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted.
+# clobbers r10-11, xmm14
+.macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \
+	TMP6 TMP7
+	MOVADQ	   SHUF_MASK(%rip), %xmm14
+	mov	   \AAD, %r10		# %r10 = AAD
+	mov	   \AADLEN, %r11		# %r11 = aadLen
+	pxor	   \TMP7, \TMP7
+	pxor	   \TMP6, \TMP6
 
 	cmp	   $16, %r11
-	jl	   _get_AAD_rest\num_initial_blocks\operation
-_get_AAD_blocks\num_initial_blocks\operation:
-	movdqu	   (%r10), %xmm\i
-	PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
-	pxor	   %xmm\i, \XMM2
-	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+	jl	   _get_AAD_rest\@
+_get_AAD_blocks\@:
+	movdqu	   (%r10), \TMP7
+	PSHUFB_XMM   %xmm14, \TMP7 # byte-reflect the AAD data
+	pxor	   \TMP7, \TMP6
+	GHASH_MUL  \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
 	add	   $16, %r10
 	sub	   $16, %r11
-	cmp	   $16, %r11
-	jge	   _get_AAD_blocks\num_initial_blocks\operation
-
-	movdqu	   \XMM2, %xmm\i
-
-	/* read the last <16B of AAD */
-_get_AAD_rest\num_initial_blocks\operation:
-	cmp	   $0, %r11
-	je	   _get_AAD_done\num_initial_blocks\operation
-
-	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
-	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
-	pxor	   \XMM2, %xmm\i
-	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-
-_get_AAD_done\num_initial_blocks\operation:
-	xor	   %r11, %r11 # initialise the data pointer offset as zero
-	# start AES for num_initial_blocks blocks
-
-	mov	   %arg5, %rax                      # %rax = *Y0
-	movdqu	   (%rax), \XMM0                    # XMM0 = Y0
-	PSHUFB_XMM   %xmm14, \XMM0
-
-.if (\i == 5) || (\i == 6) || (\i == 7)
-	MOVADQ		ONE(%RIP),\TMP1
-	MOVADQ		(%arg1),\TMP2
-.irpc index, \i_seq
-	paddd	   \TMP1, \XMM0                 # INCR Y0
-	movdqa	   \XMM0, %xmm\index
-	PSHUFB_XMM   %xmm14, %xmm\index      # perform a 16 byte swap
-	pxor	   \TMP2, %xmm\index
-.endr
-	lea	0x10(%arg1),%r10
-	mov	keysize,%eax
-	shr	$2,%eax				# 128->4, 192->6, 256->8
-	add	$5,%eax			      # 128->9, 192->11, 256->13
-
-aes_loop_initial_dec\num_initial_blocks:
-	MOVADQ	(%r10),\TMP1
-.irpc	index, \i_seq
-	AESENC	\TMP1, %xmm\index
-.endr
-	add	$16,%r10
-	sub	$1,%eax
-	jnz	aes_loop_initial_dec\num_initial_blocks
-
-	MOVADQ	(%r10), \TMP1
-.irpc index, \i_seq
-	AESENCLAST \TMP1, %xmm\index         # Last Round
-.endr
-.irpc index, \i_seq
-	movdqu	   (%arg3 , %r11, 1), \TMP1
-	pxor	   \TMP1, %xmm\index
-	movdqu	   %xmm\index, (%arg2 , %r11, 1)
-	# write back plaintext/ciphertext for num_initial_blocks
-	add	   $16, %r11
-
-	movdqa     \TMP1, %xmm\index
-	PSHUFB_XMM	   %xmm14, %xmm\index
-                # prepare plaintext/ciphertext for GHASH computation
-.endr
-.endif
-
-        # apply GHASH on num_initial_blocks blocks
-
-.if \i == 5
-        pxor       %xmm5, %xmm6
-	GHASH_MUL  %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm6, %xmm7
-	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.elseif \i == 6
-        pxor       %xmm6, %xmm7
-	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.elseif \i == 7
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.endif
-	cmp	   $64, %r13
-	jl	_initial_blocks_done\num_initial_blocks\operation
-	# no need for precomputed values
-/*
-*
-* Precomputations for HashKey parallel with encryption of first 4 blocks.
-* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
-*/
-	MOVADQ	   ONE(%rip), \TMP1
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM1
-	PSHUFB_XMM  %xmm14, \XMM1        # perform a 16 byte swap
-
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM2
-	PSHUFB_XMM  %xmm14, \XMM2        # perform a 16 byte swap
-
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM3
-	PSHUFB_XMM %xmm14, \XMM3        # perform a 16 byte swap
-
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM4
-	PSHUFB_XMM %xmm14, \XMM4        # perform a 16 byte swap
-
-	MOVADQ	   0(%arg1),\TMP1
-	pxor	   \TMP1, \XMM1
-	pxor	   \TMP1, \XMM2
-	pxor	   \TMP1, \XMM3
-	pxor	   \TMP1, \XMM4
-	movdqa	   \TMP3, \TMP5
-	pshufd	   $78, \TMP3, \TMP1
-	pxor	   \TMP3, \TMP1
-	movdqa	   \TMP1, HashKey_k(%rsp)
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^2<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_2(%rsp)
-# HashKey_2 = HashKey^2<<1 (mod poly)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_2_k(%rsp)
-.irpc index, 1234 # do 4 rounds
-	movaps 0x10*\index(%arg1), \TMP1
-	AESENC	   \TMP1, \XMM1
-	AESENC	   \TMP1, \XMM2
-	AESENC	   \TMP1, \XMM3
-	AESENC	   \TMP1, \XMM4
-.endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_3(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_3_k(%rsp)
-.irpc index, 56789 # do next 5 rounds
-	movaps 0x10*\index(%arg1), \TMP1
-	AESENC	   \TMP1, \XMM1
-	AESENC	   \TMP1, \XMM2
-	AESENC	   \TMP1, \XMM3
-	AESENC	   \TMP1, \XMM4
-.endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_4(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_4_k(%rsp)
-	lea	   0xa0(%arg1),%r10
-	mov	   keysize,%eax
-	shr	   $2,%eax			# 128->4, 192->6, 256->8
-	sub	   $4,%eax			# 128->0, 192->2, 256->4
-	jz	   aes_loop_pre_dec_done\num_initial_blocks
-
-aes_loop_pre_dec\num_initial_blocks:
-	MOVADQ	   (%r10),\TMP2
-.irpc	index, 1234
-	AESENC	   \TMP2, %xmm\index
-.endr
-	add	   $16,%r10
-	sub	   $1,%eax
-	jnz	   aes_loop_pre_dec\num_initial_blocks
+	cmp	   $16, %r11
+	jge	   _get_AAD_blocks\@
 
-aes_loop_pre_dec_done\num_initial_blocks:
-	MOVADQ	   (%r10), \TMP2
-	AESENCLAST \TMP2, \XMM1
-	AESENCLAST \TMP2, \XMM2
-	AESENCLAST \TMP2, \XMM3
-	AESENCLAST \TMP2, \XMM4
-	movdqu	   16*0(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM1
-	movdqu	   \XMM1, 16*0(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM1
-	movdqu	   16*1(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM2
-	movdqu	   \XMM2, 16*1(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM2
-	movdqu	   16*2(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM3
-	movdqu	   \XMM3, 16*2(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM3
-	movdqu	   16*3(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM4
-	movdqu	   \XMM4, 16*3(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM4
-	add	   $64, %r11
-	PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
-	pxor	   \XMMDst, \XMM1
-# combine GHASHed value with the corresponding ciphertext
-	PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
-	PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
-	PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
+	movdqu	   \TMP6, \TMP7
+
+	/* read the last <16B of AAD */
+_get_AAD_rest\@:
+	cmp	   $0, %r11
+	je	   _get_AAD_done\@
 
-_initial_blocks_done\num_initial_blocks\operation:
+	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
+	PSHUFB_XMM   %xmm14, \TMP7 # byte-reflect the AAD data
+	pxor	   \TMP6, \TMP7
+	GHASH_MUL  \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
+	movdqu \TMP7, \TMP6
 
+_get_AAD_done\@:
+	movdqu \TMP6, AadHash(%arg2)
 .endm
 
+# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
+# between update calls.
+# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
+# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
+# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
+.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
+	AAD_HASH operation
+	mov 	PBlockLen(%arg2), %r13
+	cmp	$0, %r13
+	je	_partial_block_done_\@	# Leave Macro if no partial blocks
+	# Read in input data without over reading
+	cmp	$16, \PLAIN_CYPH_LEN
+	jl	_fewer_than_16_bytes_\@
+	movups	(\PLAIN_CYPH_IN), %xmm1	# If more than 16 bytes, just fill xmm
+	jmp	_data_read_\@
+
+_fewer_than_16_bytes_\@:
+	lea	(\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
+	mov	\PLAIN_CYPH_LEN, %r12
+	READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1
+
+	mov PBlockLen(%arg2), %r13
+
+_data_read_\@:				# Finished reading in data
+
+	movdqu	PBlockEncKey(%arg2), %xmm9
+	movdqu	HashKey(%arg2), %xmm13
+
+	lea	SHIFT_MASK(%rip), %r12
+
+	# adjust the shuffle mask pointer to be able to shift r13 bytes
+	# r16-r13 is the number of bytes in plaintext mod 16)
+	add	%r13, %r12
+	movdqu	(%r12), %xmm2		# get the appropriate shuffle mask
+	PSHUFB_XMM %xmm2, %xmm9		# shift right r13 bytes
+
+.ifc \operation, dec
+	movdqa	%xmm1, %xmm3
+	pxor	%xmm1, %xmm9		# Cyphertext XOR E(K, Yn)
+
+	mov	\PLAIN_CYPH_LEN, %r10
+	add	%r13, %r10
+	# Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
+	sub	$16, %r10
+	# Determine if if partial block is not being filled and
+	# shift mask accordingly
+	jge	_no_extra_mask_1_\@
+	sub	%r10, %r12
+_no_extra_mask_1_\@:
+
+	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
+	# get the appropriate mask to mask out bottom r13 bytes of xmm9
+	pand	%xmm1, %xmm9		# mask out bottom r13 bytes of xmm9
+
+	pand	%xmm1, %xmm3
+	movdqa	SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM	%xmm10, %xmm3
+	PSHUFB_XMM	%xmm2, %xmm3
+	pxor	%xmm3, \AAD_HASH
+
+	cmp	$0, %r10
+	jl	_partial_incomplete_1_\@
+
+	# GHASH computation for the last <16 Byte block
+	GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+	xor	%rax,%rax
+
+	mov	%rax, PBlockLen(%arg2)
+	jmp	_dec_done_\@
+_partial_incomplete_1_\@:
+	add	\PLAIN_CYPH_LEN, PBlockLen(%arg2)
+_dec_done_\@:
+	movdqu	\AAD_HASH, AadHash(%arg2)
+.else
+	pxor	%xmm1, %xmm9			# Plaintext XOR E(K, Yn)
+
+	mov	\PLAIN_CYPH_LEN, %r10
+	add	%r13, %r10
+	# Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
+	sub	$16, %r10
+	# Determine if if partial block is not being filled and
+	# shift mask accordingly
+	jge	_no_extra_mask_2_\@
+	sub	%r10, %r12
+_no_extra_mask_2_\@:
+
+	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
+	# get the appropriate mask to mask out bottom r13 bytes of xmm9
+	pand	%xmm1, %xmm9
+
+	movdqa	SHUF_MASK(%rip), %xmm1
+	PSHUFB_XMM %xmm1, %xmm9
+	PSHUFB_XMM %xmm2, %xmm9
+	pxor	%xmm9, \AAD_HASH
+
+	cmp	$0, %r10
+	jl	_partial_incomplete_2_\@
+
+	# GHASH computation for the last <16 Byte block
+	GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+	xor	%rax,%rax
+
+	mov	%rax, PBlockLen(%arg2)
+	jmp	_encode_done_\@
+_partial_incomplete_2_\@:
+	add	\PLAIN_CYPH_LEN, PBlockLen(%arg2)
+_encode_done_\@:
+	movdqu	\AAD_HASH, AadHash(%arg2)
+
+	movdqa	SHUF_MASK(%rip), %xmm10
+	# shuffle xmm9 back to output as ciphertext
+	PSHUFB_XMM	%xmm10, %xmm9
+	PSHUFB_XMM	%xmm2, %xmm9
+.endif
+	# output encrypted Bytes
+	cmp	$0, %r10
+	jl	_partial_fill_\@
+	mov	%r13, %r12
+	mov	$16, %r13
+	# Set r13 to be the number of bytes to write out
+	sub	%r12, %r13
+	jmp	_count_set_\@
+_partial_fill_\@:
+	mov	\PLAIN_CYPH_LEN, %r13
+_count_set_\@:
+	movdqa	%xmm9, %xmm0
+	MOVQ_R64_XMM	%xmm0, %rax
+	cmp	$8, %r13
+	jle	_less_than_8_bytes_left_\@
+
+	mov	%rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
+	add	$8, \DATA_OFFSET
+	psrldq	$8, %xmm0
+	MOVQ_R64_XMM	%xmm0, %rax
+	sub	$8, %r13
+_less_than_8_bytes_left_\@:
+	movb	%al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
+	add	$1, \DATA_OFFSET
+	shr	$8, %rax
+	sub	$1, %r13
+	jne	_less_than_8_bytes_left_\@
+_partial_block_done_\@:
+.endm # PARTIAL_BLOCK
 
 /*
 * if a = number of total plaintext bytes
@@ -499,49 +789,19 @@ _initial_blocks_done\num_initial_blocks\operation:
 * the ciphertext
 * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
 * are clobbered
-* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
+* arg1, %arg2, %arg3 are used as a pointer only, not modified
 */
 
 
-.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
-XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
-        MOVADQ     SHUF_MASK(%rip), %xmm14
-	mov	   arg7, %r10           # %r10 = AAD
-	mov	   arg8, %r11           # %r11 = aadLen
-	pxor	   %xmm\i, %xmm\i
-	pxor	   \XMM2, \XMM2
-
-	cmp	   $16, %r11
-	jl	   _get_AAD_rest\num_initial_blocks\operation
-_get_AAD_blocks\num_initial_blocks\operation:
-	movdqu	   (%r10), %xmm\i
-	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
-	pxor	   %xmm\i, \XMM2
-	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-	add	   $16, %r10
-	sub	   $16, %r11
-	cmp	   $16, %r11
-	jge	   _get_AAD_blocks\num_initial_blocks\operation
-
-	movdqu	   \XMM2, %xmm\i
-
-	/* read the last <16B of AAD */
-_get_AAD_rest\num_initial_blocks\operation:
-	cmp	   $0, %r11
-	je	   _get_AAD_done\num_initial_blocks\operation
+.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
+	XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
+	MOVADQ		SHUF_MASK(%rip), %xmm14
 
-	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
-	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
-	pxor	   \XMM2, %xmm\i
-	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+	movdqu AadHash(%arg2), %xmm\i		    # XMM0 = Y0
 
-_get_AAD_done\num_initial_blocks\operation:
-	xor	   %r11, %r11 # initialise the data pointer offset as zero
 	# start AES for num_initial_blocks blocks
 
-	mov	   %arg5, %rax                      # %rax = *Y0
-	movdqu	   (%rax), \XMM0                    # XMM0 = Y0
-	PSHUFB_XMM   %xmm14, \XMM0
+	movdqu CurCount(%arg2), \XMM0                # XMM0 = Y0
 
 .if (\i == 5) || (\i == 6) || (\i == 7)
 
@@ -549,7 +809,11 @@ _get_AAD_done\num_initial_blocks\operation:
 	MOVADQ		0(%arg1),\TMP2
 .irpc index, \i_seq
 	paddd		\TMP1, \XMM0                 # INCR Y0
+.ifc \operation, dec
+        movdqa     \XMM0, %xmm\index
+.else
 	MOVADQ		\XMM0, %xmm\index
+.endif
 	PSHUFB_XMM	%xmm14, %xmm\index      # perform a 16 byte swap
 	pxor		\TMP2, %xmm\index
 .endr
@@ -558,25 +822,29 @@ _get_AAD_done\num_initial_blocks\operation:
 	shr	$2,%eax				# 128->4, 192->6, 256->8
 	add	$5,%eax			      # 128->9, 192->11, 256->13
 
-aes_loop_initial_enc\num_initial_blocks:
+aes_loop_initial_\@:
 	MOVADQ	(%r10),\TMP1
 .irpc	index, \i_seq
 	AESENC	\TMP1, %xmm\index
 .endr
 	add	$16,%r10
 	sub	$1,%eax
-	jnz	aes_loop_initial_enc\num_initial_blocks
+	jnz	aes_loop_initial_\@
 
 	MOVADQ	(%r10), \TMP1
 .irpc index, \i_seq
 	AESENCLAST \TMP1, %xmm\index         # Last Round
 .endr
 .irpc index, \i_seq
-	movdqu	   (%arg3 , %r11, 1), \TMP1
+	movdqu	   (%arg4 , %r11, 1), \TMP1
 	pxor	   \TMP1, %xmm\index
-	movdqu	   %xmm\index, (%arg2 , %r11, 1)
+	movdqu	   %xmm\index, (%arg3 , %r11, 1)
 	# write back plaintext/ciphertext for num_initial_blocks
 	add	   $16, %r11
+
+.ifc \operation, dec
+	movdqa     \TMP1, %xmm\index
+.endif
 	PSHUFB_XMM	   %xmm14, %xmm\index
 
 		# prepare plaintext/ciphertext for GHASH computation
@@ -602,7 +870,7 @@ aes_loop_initial_enc\num_initial_blocks:
 	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
 .endif
 	cmp	   $64, %r13
-	jl	_initial_blocks_done\num_initial_blocks\operation
+	jl	_initial_blocks_done\@
 	# no need for precomputed values
 /*
 *
@@ -631,17 +899,6 @@ aes_loop_initial_enc\num_initial_blocks:
 	pxor	   \TMP1, \XMM2
 	pxor	   \TMP1, \XMM3
 	pxor	   \TMP1, \XMM4
-	movdqa	   \TMP3, \TMP5
-	pshufd	   $78, \TMP3, \TMP1
-	pxor	   \TMP3, \TMP1
-	movdqa	   \TMP1, HashKey_k(%rsp)
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^2<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_2(%rsp)
-# HashKey_2 = HashKey^2<<1 (mod poly)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_2_k(%rsp)
 .irpc index, 1234 # do 4 rounds
 	movaps 0x10*\index(%arg1), \TMP1
 	AESENC	   \TMP1, \XMM1
@@ -649,12 +906,6 @@ aes_loop_initial_enc\num_initial_blocks:
 	AESENC	   \TMP1, \XMM3
 	AESENC	   \TMP1, \XMM4
 .endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_3(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_3_k(%rsp)
 .irpc index, 56789 # do next 5 rounds
 	movaps 0x10*\index(%arg1), \TMP1
 	AESENC	   \TMP1, \XMM1
@@ -662,45 +913,56 @@ aes_loop_initial_enc\num_initial_blocks:
 	AESENC	   \TMP1, \XMM3
 	AESENC	   \TMP1, \XMM4
 .endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_4(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_4_k(%rsp)
 	lea	   0xa0(%arg1),%r10
 	mov	   keysize,%eax
 	shr	   $2,%eax			# 128->4, 192->6, 256->8
 	sub	   $4,%eax			# 128->0, 192->2, 256->4
-	jz	   aes_loop_pre_enc_done\num_initial_blocks
+	jz	   aes_loop_pre_done\@
 
-aes_loop_pre_enc\num_initial_blocks:
+aes_loop_pre_\@:
 	MOVADQ	   (%r10),\TMP2
 .irpc	index, 1234
 	AESENC	   \TMP2, %xmm\index
 .endr
 	add	   $16,%r10
 	sub	   $1,%eax
-	jnz	   aes_loop_pre_enc\num_initial_blocks
+	jnz	   aes_loop_pre_\@
 
-aes_loop_pre_enc_done\num_initial_blocks:
+aes_loop_pre_done\@:
 	MOVADQ	   (%r10), \TMP2
 	AESENCLAST \TMP2, \XMM1
 	AESENCLAST \TMP2, \XMM2
 	AESENCLAST \TMP2, \XMM3
 	AESENCLAST \TMP2, \XMM4
-	movdqu	   16*0(%arg3 , %r11 , 1), \TMP1
+	movdqu	   16*0(%arg4 , %r11 , 1), \TMP1
 	pxor	   \TMP1, \XMM1
-	movdqu	   16*1(%arg3 , %r11 , 1), \TMP1
+.ifc \operation, dec
+	movdqu     \XMM1, 16*0(%arg3 , %r11 , 1)
+	movdqa     \TMP1, \XMM1
+.endif
+	movdqu	   16*1(%arg4 , %r11 , 1), \TMP1
 	pxor	   \TMP1, \XMM2
-	movdqu	   16*2(%arg3 , %r11 , 1), \TMP1
+.ifc \operation, dec
+	movdqu     \XMM2, 16*1(%arg3 , %r11 , 1)
+	movdqa     \TMP1, \XMM2
+.endif
+	movdqu	   16*2(%arg4 , %r11 , 1), \TMP1
 	pxor	   \TMP1, \XMM3
-	movdqu	   16*3(%arg3 , %r11 , 1), \TMP1
+.ifc \operation, dec
+	movdqu     \XMM3, 16*2(%arg3 , %r11 , 1)
+	movdqa     \TMP1, \XMM3
+.endif
+	movdqu	   16*3(%arg4 , %r11 , 1), \TMP1
 	pxor	   \TMP1, \XMM4
-	movdqu     \XMM1, 16*0(%arg2 , %r11 , 1)
-	movdqu     \XMM2, 16*1(%arg2 , %r11 , 1)
-	movdqu     \XMM3, 16*2(%arg2 , %r11 , 1)
-	movdqu     \XMM4, 16*3(%arg2 , %r11 , 1)
+.ifc \operation, dec
+	movdqu     \XMM4, 16*3(%arg3 , %r11 , 1)
+	movdqa     \TMP1, \XMM4
+.else
+	movdqu     \XMM1, 16*0(%arg3 , %r11 , 1)
+	movdqu     \XMM2, 16*1(%arg3 , %r11 , 1)
+	movdqu     \XMM3, 16*2(%arg3 , %r11 , 1)
+	movdqu     \XMM4, 16*3(%arg3 , %r11 , 1)
+.endif
 
 	add	   $64, %r11
 	PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
@@ -710,14 +972,14 @@ aes_loop_pre_enc_done\num_initial_blocks:
 	PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
 	PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
 
-_initial_blocks_done\num_initial_blocks\operation:
+_initial_blocks_done\@:
 
 .endm
 
 /*
 * encrypt 4 blocks at a time
 * ghash the 4 previously encrypted ciphertext blocks
-* arg1, %arg2, %arg3 are used as pointers only, not modified
+* arg1, %arg3, %arg4 are used as pointers only, not modified
 * %r11 is the data offset value
 */
 .macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \
@@ -735,7 +997,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	pshufd	  $78, \XMM5, \TMP6
 	pxor	  \XMM5, \TMP6
 	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa	  HashKey_4(%rsp), \TMP5
+	movdqa	  HashKey_4(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
 	movdqa    \XMM0, \XMM1
 	paddd     ONE(%rip), \XMM0		# INCR CNT
@@ -754,7 +1016,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	pxor	  (%arg1), \XMM2
 	pxor	  (%arg1), \XMM3
 	pxor	  (%arg1), \XMM4
-	movdqa	  HashKey_4_k(%rsp), \TMP5
+	movdqa	  HashKey_4_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
 	movaps 0x10(%arg1), \TMP1
 	AESENC	  \TMP1, \XMM1              # Round 1
@@ -769,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM6, \TMP1
 	pshufd	  $78, \XMM6, \TMP2
 	pxor	  \XMM6, \TMP2
-	movdqa	  HashKey_3(%rsp), \TMP5
+	movdqa	  HashKey_3(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
 	movaps 0x30(%arg1), \TMP3
 	AESENC    \TMP3, \XMM1              # Round 3
@@ -782,7 +1044,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_3_k(%rsp), \TMP5
+	movdqa	  HashKey_3_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	movaps 0x50(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1              # Round 5
@@ -796,7 +1058,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM7, \TMP1
 	pshufd	  $78, \XMM7, \TMP2
 	pxor	  \XMM7, \TMP2
-	movdqa	  HashKey_2(%rsp ), \TMP5
+	movdqa	  HashKey_2(%arg2), \TMP5
 
         # Multiply TMP5 * HashKey using karatsuba
 
@@ -812,7 +1074,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_2_k(%rsp), \TMP5
+	movdqa	  HashKey_2_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	movaps 0x80(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1             # Round 8
@@ -830,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM8, \TMP1
 	pshufd	  $78, \XMM8, \TMP2
 	pxor	  \XMM8, \TMP2
-	movdqa	  HashKey(%rsp), \TMP5
+	movdqa	  HashKey(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
 	movaps 0x90(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1            # Round 9
@@ -842,37 +1104,37 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	mov	  keysize,%eax
 	shr	  $2,%eax			# 128->4, 192->6, 256->8
 	sub	  $4,%eax			# 128->0, 192->2, 256->4
-	jz	  aes_loop_par_enc_done
+	jz	  aes_loop_par_enc_done\@
 
-aes_loop_par_enc:
+aes_loop_par_enc\@:
 	MOVADQ	  (%r10),\TMP3
 .irpc	index, 1234
 	AESENC	  \TMP3, %xmm\index
 .endr
 	add	  $16,%r10
 	sub	  $1,%eax
-	jnz	  aes_loop_par_enc
+	jnz	  aes_loop_par_enc\@
 
-aes_loop_par_enc_done:
+aes_loop_par_enc_done\@:
 	MOVADQ	  (%r10), \TMP3
 	AESENCLAST \TMP3, \XMM1           # Round 10
 	AESENCLAST \TMP3, \XMM2
 	AESENCLAST \TMP3, \XMM3
 	AESENCLAST \TMP3, \XMM4
-	movdqa    HashKey_k(%rsp), \TMP5
+	movdqa    HashKey_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
-	movdqu	  (%arg3,%r11,1), \TMP3
+	movdqu	  (%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
-	movdqu	  16(%arg3,%r11,1), \TMP3
+	movdqu	  16(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
-	movdqu	  32(%arg3,%r11,1), \TMP3
+	movdqu	  32(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
-	movdqu	  48(%arg3,%r11,1), \TMP3
+	movdqu	  48(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
-        movdqu    \XMM1, (%arg2,%r11,1)        # Write to the ciphertext buffer
-        movdqu    \XMM2, 16(%arg2,%r11,1)      # Write to the ciphertext buffer
-        movdqu    \XMM3, 32(%arg2,%r11,1)      # Write to the ciphertext buffer
-        movdqu    \XMM4, 48(%arg2,%r11,1)      # Write to the ciphertext buffer
+        movdqu    \XMM1, (%arg3,%r11,1)        # Write to the ciphertext buffer
+        movdqu    \XMM2, 16(%arg3,%r11,1)      # Write to the ciphertext buffer
+        movdqu    \XMM3, 32(%arg3,%r11,1)      # Write to the ciphertext buffer
+        movdqu    \XMM4, 48(%arg3,%r11,1)      # Write to the ciphertext buffer
 	PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
 	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
 	PSHUFB_XMM %xmm15, \XMM3	# perform a 16 byte swap
@@ -925,7 +1187,7 @@ aes_loop_par_enc_done:
 /*
 * decrypt 4 blocks at a time
 * ghash the 4 previously decrypted ciphertext blocks
-* arg1, %arg2, %arg3 are used as pointers only, not modified
+* arg1, %arg3, %arg4 are used as pointers only, not modified
 * %r11 is the data offset value
 */
 .macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \
@@ -943,7 +1205,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	pshufd	  $78, \XMM5, \TMP6
 	pxor	  \XMM5, \TMP6
 	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa	  HashKey_4(%rsp), \TMP5
+	movdqa	  HashKey_4(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
 	movdqa    \XMM0, \XMM1
 	paddd     ONE(%rip), \XMM0		# INCR CNT
@@ -962,7 +1224,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	pxor	  (%arg1), \XMM2
 	pxor	  (%arg1), \XMM3
 	pxor	  (%arg1), \XMM4
-	movdqa	  HashKey_4_k(%rsp), \TMP5
+	movdqa	  HashKey_4_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
 	movaps 0x10(%arg1), \TMP1
 	AESENC	  \TMP1, \XMM1              # Round 1
@@ -977,7 +1239,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM6, \TMP1
 	pshufd	  $78, \XMM6, \TMP2
 	pxor	  \XMM6, \TMP2
-	movdqa	  HashKey_3(%rsp), \TMP5
+	movdqa	  HashKey_3(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
 	movaps 0x30(%arg1), \TMP3
 	AESENC    \TMP3, \XMM1              # Round 3
@@ -990,7 +1252,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_3_k(%rsp), \TMP5
+	movdqa	  HashKey_3_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	movaps 0x50(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1              # Round 5
@@ -1004,7 +1266,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM7, \TMP1
 	pshufd	  $78, \XMM7, \TMP2
 	pxor	  \XMM7, \TMP2
-	movdqa	  HashKey_2(%rsp ), \TMP5
+	movdqa	  HashKey_2(%arg2), \TMP5
 
         # Multiply TMP5 * HashKey using karatsuba
 
@@ -1020,7 +1282,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_2_k(%rsp), \TMP5
+	movdqa	  HashKey_2_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	movaps 0x80(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1             # Round 8
@@ -1038,7 +1300,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM8, \TMP1
 	pshufd	  $78, \XMM8, \TMP2
 	pxor	  \XMM8, \TMP2
-	movdqa	  HashKey(%rsp), \TMP5
+	movdqa	  HashKey(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
 	movaps 0x90(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1            # Round 9
@@ -1050,40 +1312,40 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	mov	  keysize,%eax
 	shr	  $2,%eax		        # 128->4, 192->6, 256->8
 	sub	  $4,%eax			# 128->0, 192->2, 256->4
-	jz	  aes_loop_par_dec_done
+	jz	  aes_loop_par_dec_done\@
 
-aes_loop_par_dec:
+aes_loop_par_dec\@:
 	MOVADQ	  (%r10),\TMP3
 .irpc	index, 1234
 	AESENC	  \TMP3, %xmm\index
 .endr
 	add	  $16,%r10
 	sub	  $1,%eax
-	jnz	  aes_loop_par_dec
+	jnz	  aes_loop_par_dec\@
 
-aes_loop_par_dec_done:
+aes_loop_par_dec_done\@:
 	MOVADQ	  (%r10), \TMP3
 	AESENCLAST \TMP3, \XMM1           # last round
 	AESENCLAST \TMP3, \XMM2
 	AESENCLAST \TMP3, \XMM3
 	AESENCLAST \TMP3, \XMM4
-	movdqa    HashKey_k(%rsp), \TMP5
+	movdqa    HashKey_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
-	movdqu	  (%arg3,%r11,1), \TMP3
+	movdqu	  (%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM1, (%arg2,%r11,1)        # Write to plaintext buffer
+	movdqu	  \XMM1, (%arg3,%r11,1)        # Write to plaintext buffer
 	movdqa    \TMP3, \XMM1
-	movdqu	  16(%arg3,%r11,1), \TMP3
+	movdqu	  16(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM2, 16(%arg2,%r11,1)      # Write to plaintext buffer
+	movdqu	  \XMM2, 16(%arg3,%r11,1)      # Write to plaintext buffer
 	movdqa    \TMP3, \XMM2
-	movdqu	  32(%arg3,%r11,1), \TMP3
+	movdqu	  32(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM3, 32(%arg2,%r11,1)      # Write to plaintext buffer
+	movdqu	  \XMM3, 32(%arg3,%r11,1)      # Write to plaintext buffer
 	movdqa    \TMP3, \XMM3
-	movdqu	  48(%arg3,%r11,1), \TMP3
+	movdqu	  48(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM4, 48(%arg2,%r11,1)      # Write to plaintext buffer
+	movdqu	  \XMM4, 48(%arg3,%r11,1)      # Write to plaintext buffer
 	movdqa    \TMP3, \XMM4
 	PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
 	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
@@ -1143,10 +1405,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 	movdqa	  \XMM1, \TMP6
 	pshufd	  $78, \XMM1, \TMP2
 	pxor	  \XMM1, \TMP2
-	movdqa	  HashKey_4(%rsp), \TMP5
+	movdqa	  HashKey_4(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP6       # TMP6 = a1*b1
 	PCLMULQDQ 0x00, \TMP5, \XMM1       # XMM1 = a0*b0
-	movdqa	  HashKey_4_k(%rsp), \TMP4
+	movdqa	  HashKey_4_k(%arg2), \TMP4
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	movdqa	  \XMM1, \XMMDst
 	movdqa	  \TMP2, \XMM1              # result in TMP6, XMMDst, XMM1
@@ -1156,10 +1418,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 	movdqa	  \XMM2, \TMP1
 	pshufd	  $78, \XMM2, \TMP2
 	pxor	  \XMM2, \TMP2
-	movdqa	  HashKey_3(%rsp), \TMP5
+	movdqa	  HashKey_3(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
 	PCLMULQDQ 0x00, \TMP5, \XMM2       # XMM2 = a0*b0
-	movdqa	  HashKey_3_k(%rsp), \TMP4
+	movdqa	  HashKey_3_k(%arg2), \TMP4
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	pxor	  \TMP1, \TMP6
 	pxor	  \XMM2, \XMMDst
@@ -1171,10 +1433,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 	movdqa	  \XMM3, \TMP1
 	pshufd	  $78, \XMM3, \TMP2
 	pxor	  \XMM3, \TMP2
-	movdqa	  HashKey_2(%rsp), \TMP5
+	movdqa	  HashKey_2(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
 	PCLMULQDQ 0x00, \TMP5, \XMM3       # XMM3 = a0*b0
-	movdqa	  HashKey_2_k(%rsp), \TMP4
+	movdqa	  HashKey_2_k(%arg2), \TMP4
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	pxor	  \TMP1, \TMP6
 	pxor	  \XMM3, \XMMDst
@@ -1184,10 +1446,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 	movdqa	  \XMM4, \TMP1
 	pshufd	  $78, \XMM4, \TMP2
 	pxor	  \XMM4, \TMP2
-	movdqa	  HashKey(%rsp), \TMP5
+	movdqa	  HashKey(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1	    # TMP1 = a1*b1
 	PCLMULQDQ 0x00, \TMP5, \XMM4       # XMM4 = a0*b0
-	movdqa	  HashKey_k(%rsp), \TMP4
+	movdqa	  HashKey_k(%arg2), \TMP4
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	pxor	  \TMP1, \TMP6
 	pxor	  \XMM4, \XMMDst
@@ -1256,6 +1518,8 @@ _esb_loop_\@:
 .endm
 /*****************************************************************************
 * void aesni_gcm_dec(void *aes_ctx,    // AES Key schedule. Starts on a 16 byte boundary.
+*                   struct gcm_context_data *data
+*                                      // Context data
 *                   u8 *out,           // Plaintext output. Encrypt in-place is allowed.
 *                   const u8 *in,      // Ciphertext input
 *                   u64 plaintext_len, // Length of data in bytes for decryption.
@@ -1333,195 +1597,20 @@ _esb_loop_\@:
 *
 *****************************************************************************/
 ENTRY(aesni_gcm_dec)
-	push	%r12
-	push	%r13
-	push	%r14
-	mov	%rsp, %r14
-/*
-* states of %xmm registers %xmm6:%xmm15 not saved
-* all %xmm registers are clobbered
-*/
-	sub	$VARIABLE_OFFSET, %rsp
-	and	$~63, %rsp                        # align rsp to 64 bytes
-	mov	%arg6, %r12
-	movdqu	(%r12), %xmm13			  # %xmm13 = HashKey
-        movdqa  SHUF_MASK(%rip), %xmm2
-	PSHUFB_XMM %xmm2, %xmm13
-
-
-# Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH)
-
-	movdqa	%xmm13, %xmm2
-	psllq	$1, %xmm13
-	psrlq	$63, %xmm2
-	movdqa	%xmm2, %xmm1
-	pslldq	$8, %xmm2
-	psrldq	$8, %xmm1
-	por	%xmm2, %xmm13
-
-        # Reduction
-
-	pshufd	$0x24, %xmm1, %xmm2
-	pcmpeqd TWOONE(%rip), %xmm2
-	pand	POLY(%rip), %xmm2
-	pxor	%xmm2, %xmm13     # %xmm13 holds the HashKey<<1 (mod poly)
-
-
-        # Decrypt first few blocks
-
-	movdqa %xmm13, HashKey(%rsp)           # store HashKey<<1 (mod poly)
-	mov %arg4, %r13    # save the number of bytes of plaintext/ciphertext
-	and $-16, %r13                      # %r13 = %r13 - (%r13 mod 16)
-	mov %r13, %r12
-	and $(3<<4), %r12
-	jz _initial_num_blocks_is_0_decrypt
-	cmp $(2<<4), %r12
-	jb _initial_num_blocks_is_1_decrypt
-	je _initial_num_blocks_is_2_decrypt
-_initial_num_blocks_is_3_decrypt:
-	INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec
-	sub	$48, %r13
-	jmp	_initial_blocks_decrypted
-_initial_num_blocks_is_2_decrypt:
-	INITIAL_BLOCKS_DEC	2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec
-	sub	$32, %r13
-	jmp	_initial_blocks_decrypted
-_initial_num_blocks_is_1_decrypt:
-	INITIAL_BLOCKS_DEC	1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec
-	sub	$16, %r13
-	jmp	_initial_blocks_decrypted
-_initial_num_blocks_is_0_decrypt:
-	INITIAL_BLOCKS_DEC	0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec
-_initial_blocks_decrypted:
-	cmp	$0, %r13
-	je	_zero_cipher_left_decrypt
-	sub	$64, %r13
-	je	_four_cipher_left_decrypt
-_decrypt_by_4:
-	GHASH_4_ENCRYPT_4_PARALLEL_DEC	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
-%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec
-	add	$64, %r11
-	sub	$64, %r13
-	jne	_decrypt_by_4
-_four_cipher_left_decrypt:
-	GHASH_LAST_4	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
-%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
-_zero_cipher_left_decrypt:
-	mov	%arg4, %r13
-	and	$15, %r13				# %r13 = arg4 (mod 16)
-	je	_multiple_of_16_bytes_decrypt
-
-        # Handle the last <16 byte block separately
+	FUNC_SAVE
 
-	paddd ONE(%rip), %xmm0         # increment CNT to get Yn
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm0
-
-	ENCRYPT_SINGLE_BLOCK  %xmm0, %xmm1    # E(K, Yn)
-
-	lea (%arg3,%r11,1), %r10
-	mov %r13, %r12
-	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
-
-	lea ALL_F+16(%rip), %r12
-	sub %r13, %r12
-	movdqa  %xmm1, %xmm2
-	pxor %xmm1, %xmm0            # Ciphertext XOR E(K, Yn)
-	movdqu (%r12), %xmm1
-	# get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
-	pand %xmm1, %xmm0            # mask out top 16-%r13 bytes of %xmm0
-	pand    %xmm1, %xmm2
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10 ,%xmm2
-
-	pxor %xmm2, %xmm8
-	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-
-        # output %r13 bytes
-	MOVQ_R64_XMM	%xmm0, %rax
-	cmp	$8, %r13
-	jle	_less_than_8_bytes_left_decrypt
-	mov	%rax, (%arg2 , %r11, 1)
-	add	$8, %r11
-	psrldq	$8, %xmm0
-	MOVQ_R64_XMM	%xmm0, %rax
-	sub	$8, %r13
-_less_than_8_bytes_left_decrypt:
-	mov	%al,  (%arg2, %r11, 1)
-	add	$1, %r11
-	shr	$8, %rax
-	sub	$1, %r13
-	jne	_less_than_8_bytes_left_decrypt
-_multiple_of_16_bytes_decrypt:
-	mov	arg8, %r12		  # %r13 = aadLen (number of bytes)
-	shl	$3, %r12		  # convert into number of bits
-	movd	%r12d, %xmm15		  # len(A) in %xmm15
-	shl	$3, %arg4		  # len(C) in bits (*128)
-	MOVQ_R64_XMM	%arg4, %xmm1
-	pslldq	$8, %xmm15		  # %xmm15 = len(A)||0x0000000000000000
-	pxor	%xmm1, %xmm15		  # %xmm15 = len(A)||len(C)
-	pxor	%xmm15, %xmm8
-	GHASH_MUL	%xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	         # final GHASH computation
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm8
-
-	mov	%arg5, %rax		  # %rax = *Y0
-	movdqu	(%rax), %xmm0		  # %xmm0 = Y0
-	ENCRYPT_SINGLE_BLOCK	%xmm0,  %xmm1	  # E(K, Y0)
-	pxor	%xmm8, %xmm0
-_return_T_decrypt:
-	mov	arg9, %r10                # %r10 = authTag
-	mov	arg10, %r11               # %r11 = auth_tag_len
-	cmp	$16, %r11
-	je	_T_16_decrypt
-	cmp	$8, %r11
-	jl	_T_4_decrypt
-_T_8_decrypt:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
-	add	$8, %r10
-	sub	$8, %r11
-	psrldq	$8, %xmm0
-	cmp	$0, %r11
-	je	_return_T_done_decrypt
-_T_4_decrypt:
-	movd	%xmm0, %eax
-	mov	%eax, (%r10)
-	add	$4, %r10
-	sub	$4, %r11
-	psrldq	$4, %xmm0
-	cmp	$0, %r11
-	je	_return_T_done_decrypt
-_T_123_decrypt:
-	movd	%xmm0, %eax
-	cmp	$2, %r11
-	jl	_T_1_decrypt
-	mov	%ax, (%r10)
-	cmp	$2, %r11
-	je	_return_T_done_decrypt
-	add	$2, %r10
-	sar	$16, %eax
-_T_1_decrypt:
-	mov	%al, (%r10)
-	jmp	_return_T_done_decrypt
-_T_16_decrypt:
-	movdqu	%xmm0, (%r10)
-_return_T_done_decrypt:
-	mov	%r14, %rsp
-	pop	%r14
-	pop	%r13
-	pop	%r12
+	GCM_INIT %arg6, arg7, arg8, arg9
+	GCM_ENC_DEC dec
+	GCM_COMPLETE arg10, arg11
+	FUNC_RESTORE
 	ret
 ENDPROC(aesni_gcm_dec)
 
 
 /*****************************************************************************
 * void aesni_gcm_enc(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                    struct gcm_context_data *data
+*                                        // Context data
 *                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
 *                    const u8 *in,       // Plaintext input
 *                    u64 plaintext_len,  // Length of data in bytes for encryption.
@@ -1596,195 +1685,78 @@ ENDPROC(aesni_gcm_dec)
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 ***************************************************************************/
 ENTRY(aesni_gcm_enc)
-	push	%r12
-	push	%r13
-	push	%r14
-	mov	%rsp, %r14
-#
-# states of %xmm registers %xmm6:%xmm15 not saved
-# all %xmm registers are clobbered
-#
-	sub	$VARIABLE_OFFSET, %rsp
-	and	$~63, %rsp
-	mov	%arg6, %r12
-	movdqu	(%r12), %xmm13
-        movdqa  SHUF_MASK(%rip), %xmm2
-	PSHUFB_XMM %xmm2, %xmm13
-
-
-# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
-
-	movdqa	%xmm13, %xmm2
-	psllq	$1, %xmm13
-	psrlq	$63, %xmm2
-	movdqa	%xmm2, %xmm1
-	pslldq	$8, %xmm2
-	psrldq	$8, %xmm1
-	por	%xmm2, %xmm13
-
-        # reduce HashKey<<1
-
-	pshufd	$0x24, %xmm1, %xmm2
-	pcmpeqd TWOONE(%rip), %xmm2
-	pand	POLY(%rip), %xmm2
-	pxor	%xmm2, %xmm13
-	movdqa	%xmm13, HashKey(%rsp)
-	mov	%arg4, %r13            # %xmm13 holds HashKey<<1 (mod poly)
-	and	$-16, %r13
-	mov	%r13, %r12
-
-        # Encrypt first few blocks
-
-	and	$(3<<4), %r12
-	jz	_initial_num_blocks_is_0_encrypt
-	cmp	$(2<<4), %r12
-	jb	_initial_num_blocks_is_1_encrypt
-	je	_initial_num_blocks_is_2_encrypt
-_initial_num_blocks_is_3_encrypt:
-	INITIAL_BLOCKS_ENC	3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc
-	sub	$48, %r13
-	jmp	_initial_blocks_encrypted
-_initial_num_blocks_is_2_encrypt:
-	INITIAL_BLOCKS_ENC	2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc
-	sub	$32, %r13
-	jmp	_initial_blocks_encrypted
-_initial_num_blocks_is_1_encrypt:
-	INITIAL_BLOCKS_ENC	1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc
-	sub	$16, %r13
-	jmp	_initial_blocks_encrypted
-_initial_num_blocks_is_0_encrypt:
-	INITIAL_BLOCKS_ENC	0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc
-_initial_blocks_encrypted:
-
-        # Main loop - Encrypt remaining blocks
-
-	cmp	$0, %r13
-	je	_zero_cipher_left_encrypt
-	sub	$64, %r13
-	je	_four_cipher_left_encrypt
-_encrypt_by_4_encrypt:
-	GHASH_4_ENCRYPT_4_PARALLEL_ENC	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
-%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc
-	add	$64, %r11
-	sub	$64, %r13
-	jne	_encrypt_by_4_encrypt
-_four_cipher_left_encrypt:
-	GHASH_LAST_4	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
-%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
-_zero_cipher_left_encrypt:
-	mov	%arg4, %r13
-	and	$15, %r13			# %r13 = arg4 (mod 16)
-	je	_multiple_of_16_bytes_encrypt
+	FUNC_SAVE
 
-         # Handle the last <16 Byte block separately
-	paddd ONE(%rip), %xmm0                # INCR CNT to get Yn
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm0
-
-	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm1        # Encrypt(K, Yn)
+	GCM_INIT %arg6, arg7, arg8, arg9
+	GCM_ENC_DEC enc
 
-	lea (%arg3,%r11,1), %r10
-	mov %r13, %r12
-	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
-
-	lea ALL_F+16(%rip), %r12
-	sub %r13, %r12
-	pxor	%xmm1, %xmm0            # Plaintext XOR Encrypt(K, Yn)
-	movdqu	(%r12), %xmm1
-	# get the appropriate mask to mask out top 16-r13 bytes of xmm0
-	pand	%xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10,%xmm0
+	GCM_COMPLETE arg10, arg11
+	FUNC_RESTORE
+	ret
+ENDPROC(aesni_gcm_enc)
 
-	pxor	%xmm0, %xmm8
-	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	# GHASH computation for the last <16 byte block
-	movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm0
+/*****************************************************************************
+* void aesni_gcm_init(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                     struct gcm_context_data *data,
+*                                         // context data
+*                     u8 *iv,             // Pre-counter block j0: 4 byte salt (from Security Association)
+*                                         // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
+*                                         // concatenated with 0x00000001. 16-byte aligned pointer.
+*                     u8 *hash_subkey,    // H, the Hash sub key input. Data starts on a 16-byte boundary.
+*                     const u8 *aad,      // Additional Authentication Data (AAD)
+*                     u64 aad_len)        // Length of AAD in bytes.
+*/
+ENTRY(aesni_gcm_init)
+	FUNC_SAVE
+	GCM_INIT %arg3, %arg4,%arg5, %arg6
+	FUNC_RESTORE
+	ret
+ENDPROC(aesni_gcm_init)
 
-	# shuffle xmm0 back to output as ciphertext
+/*****************************************************************************
+* void aesni_gcm_enc_update(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                    struct gcm_context_data *data,
+*                                        // context data
+*                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
+*                    const u8 *in,       // Plaintext input
+*                    u64 plaintext_len,  // Length of data in bytes for encryption.
+*/
+ENTRY(aesni_gcm_enc_update)
+	FUNC_SAVE
+	GCM_ENC_DEC enc
+	FUNC_RESTORE
+	ret
+ENDPROC(aesni_gcm_enc_update)
 
-        # Output %r13 bytes
-	MOVQ_R64_XMM %xmm0, %rax
-	cmp $8, %r13
-	jle _less_than_8_bytes_left_encrypt
-	mov %rax, (%arg2 , %r11, 1)
-	add $8, %r11
-	psrldq $8, %xmm0
-	MOVQ_R64_XMM %xmm0, %rax
-	sub $8, %r13
-_less_than_8_bytes_left_encrypt:
-	mov %al,  (%arg2, %r11, 1)
-	add $1, %r11
-	shr $8, %rax
-	sub $1, %r13
-	jne _less_than_8_bytes_left_encrypt
-_multiple_of_16_bytes_encrypt:
-	mov	arg8, %r12    # %r12 = addLen (number of bytes)
-	shl	$3, %r12
-	movd	%r12d, %xmm15       # len(A) in %xmm15
-	shl	$3, %arg4               # len(C) in bits (*128)
-	MOVQ_R64_XMM	%arg4, %xmm1
-	pslldq	$8, %xmm15          # %xmm15 = len(A)||0x0000000000000000
-	pxor	%xmm1, %xmm15       # %xmm15 = len(A)||len(C)
-	pxor	%xmm15, %xmm8
-	GHASH_MUL	%xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	# final GHASH computation
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm8         # perform a 16 byte swap
+/*****************************************************************************
+* void aesni_gcm_dec_update(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                    struct gcm_context_data *data,
+*                                        // context data
+*                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
+*                    const u8 *in,       // Plaintext input
+*                    u64 plaintext_len,  // Length of data in bytes for encryption.
+*/
+ENTRY(aesni_gcm_dec_update)
+	FUNC_SAVE
+	GCM_ENC_DEC dec
+	FUNC_RESTORE
+	ret
+ENDPROC(aesni_gcm_dec_update)
 
-	mov	%arg5, %rax		       # %rax  = *Y0
-	movdqu	(%rax), %xmm0		       # %xmm0 = Y0
-	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm15         # Encrypt(K, Y0)
-	pxor	%xmm8, %xmm0
-_return_T_encrypt:
-	mov	arg9, %r10                     # %r10 = authTag
-	mov	arg10, %r11                    # %r11 = auth_tag_len
-	cmp	$16, %r11
-	je	_T_16_encrypt
-	cmp	$8, %r11
-	jl	_T_4_encrypt
-_T_8_encrypt:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
-	add	$8, %r10
-	sub	$8, %r11
-	psrldq	$8, %xmm0
-	cmp	$0, %r11
-	je	_return_T_done_encrypt
-_T_4_encrypt:
-	movd	%xmm0, %eax
-	mov	%eax, (%r10)
-	add	$4, %r10
-	sub	$4, %r11
-	psrldq	$4, %xmm0
-	cmp	$0, %r11
-	je	_return_T_done_encrypt
-_T_123_encrypt:
-	movd	%xmm0, %eax
-	cmp	$2, %r11
-	jl	_T_1_encrypt
-	mov	%ax, (%r10)
-	cmp	$2, %r11
-	je	_return_T_done_encrypt
-	add	$2, %r10
-	sar	$16, %eax
-_T_1_encrypt:
-	mov	%al, (%r10)
-	jmp	_return_T_done_encrypt
-_T_16_encrypt:
-	movdqu	%xmm0, (%r10)
-_return_T_done_encrypt:
-	mov	%r14, %rsp
-	pop	%r14
-	pop	%r13
-	pop	%r12
+/*****************************************************************************
+* void aesni_gcm_finalize(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                    struct gcm_context_data *data,
+*                                        // context data
+*                    u8 *auth_tag,       // Authenticated Tag output.
+*                    u64 auth_tag_len);  // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
+*                                        // 12 or 8.
+*/
+ENTRY(aesni_gcm_finalize)
+	FUNC_SAVE
+	GCM_COMPLETE %arg3 %arg4
+	FUNC_RESTORE
 	ret
-ENDPROC(aesni_gcm_enc)
+ENDPROC(aesni_gcm_finalize)
 
 #endif
 

+ 206 - 24
arch/x86/crypto/aesni-intel_glue.c

@@ -72,6 +72,21 @@ struct aesni_xts_ctx {
 	u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
 };
 
+#define GCM_BLOCK_LEN 16
+
+struct gcm_context_data {
+	/* init, update and finalize context data */
+	u8 aad_hash[GCM_BLOCK_LEN];
+	u64 aad_length;
+	u64 in_length;
+	u8 partial_block_enc_key[GCM_BLOCK_LEN];
+	u8 orig_IV[GCM_BLOCK_LEN];
+	u8 current_counter[GCM_BLOCK_LEN];
+	u64 partial_block_len;
+	u64 unused;
+	u8 hash_keys[GCM_BLOCK_LEN * 8];
+};
+
 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
 			     unsigned int key_len);
 asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
@@ -105,6 +120,7 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
 
 /* asmlinkage void aesni_gcm_enc()
  * void *ctx,  AES Key schedule. Starts on a 16 byte boundary.
+ * struct gcm_context_data.  May be uninitialized.
  * u8 *out, Ciphertext output. Encrypt in-place is allowed.
  * const u8 *in, Plaintext input
  * unsigned long plaintext_len, Length of data in bytes for encryption.
@@ -117,13 +133,15 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
  * unsigned long auth_tag_len), Authenticated Tag Length in bytes.
  *          Valid values are 16 (most likely), 12 or 8.
  */
-asmlinkage void aesni_gcm_enc(void *ctx, u8 *out,
+asmlinkage void aesni_gcm_enc(void *ctx,
+			struct gcm_context_data *gdata, u8 *out,
 			const u8 *in, unsigned long plaintext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len);
 
 /* asmlinkage void aesni_gcm_dec()
  * void *ctx, AES Key schedule. Starts on a 16 byte boundary.
+ * struct gcm_context_data.  May be uninitialized.
  * u8 *out, Plaintext output. Decrypt in-place is allowed.
  * const u8 *in, Ciphertext input
  * unsigned long ciphertext_len, Length of data in bytes for decryption.
@@ -137,11 +155,28 @@ asmlinkage void aesni_gcm_enc(void *ctx, u8 *out,
  * unsigned long auth_tag_len) Authenticated Tag Length in bytes.
  * Valid values are 16 (most likely), 12 or 8.
  */
-asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
+asmlinkage void aesni_gcm_dec(void *ctx,
+			struct gcm_context_data *gdata, u8 *out,
 			const u8 *in, unsigned long ciphertext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len);
 
+/* Scatter / Gather routines, with args similar to above */
+asmlinkage void aesni_gcm_init(void *ctx,
+			       struct gcm_context_data *gdata,
+			       u8 *iv,
+			       u8 *hash_subkey, const u8 *aad,
+			       unsigned long aad_len);
+asmlinkage void aesni_gcm_enc_update(void *ctx,
+				     struct gcm_context_data *gdata, u8 *out,
+				     const u8 *in, unsigned long plaintext_len);
+asmlinkage void aesni_gcm_dec_update(void *ctx,
+				     struct gcm_context_data *gdata, u8 *out,
+				     const u8 *in,
+				     unsigned long ciphertext_len);
+asmlinkage void aesni_gcm_finalize(void *ctx,
+				   struct gcm_context_data *gdata,
+				   u8 *auth_tag, unsigned long auth_tag_len);
 
 #ifdef CONFIG_AS_AVX
 asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
@@ -167,15 +202,17 @@ asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out,
 			const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len);
 
-static void aesni_gcm_enc_avx(void *ctx, u8 *out,
+static void aesni_gcm_enc_avx(void *ctx,
+			struct gcm_context_data *data, u8 *out,
 			const u8 *in, unsigned long plaintext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len)
 {
         struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
 	if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){
-		aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
-				aad_len, auth_tag, auth_tag_len);
+		aesni_gcm_enc(ctx, data, out, in,
+			plaintext_len, iv, hash_subkey, aad,
+			aad_len, auth_tag, auth_tag_len);
 	} else {
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
@@ -183,15 +220,17 @@ static void aesni_gcm_enc_avx(void *ctx, u8 *out,
 	}
 }
 
-static void aesni_gcm_dec_avx(void *ctx, u8 *out,
+static void aesni_gcm_dec_avx(void *ctx,
+			struct gcm_context_data *data, u8 *out,
 			const u8 *in, unsigned long ciphertext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len)
 {
         struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
 	if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
-		aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad,
-				aad_len, auth_tag, auth_tag_len);
+		aesni_gcm_dec(ctx, data, out, in,
+			ciphertext_len, iv, hash_subkey, aad,
+			aad_len, auth_tag, auth_tag_len);
 	} else {
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
@@ -218,15 +257,17 @@ asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out,
 			const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len);
 
-static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
+static void aesni_gcm_enc_avx2(void *ctx,
+			struct gcm_context_data *data, u8 *out,
 			const u8 *in, unsigned long plaintext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len)
 {
        struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
 	if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
-		aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
-				aad_len, auth_tag, auth_tag_len);
+		aesni_gcm_enc(ctx, data, out, in,
+			      plaintext_len, iv, hash_subkey, aad,
+			      aad_len, auth_tag, auth_tag_len);
 	} else if (plaintext_len < AVX_GEN4_OPTSIZE) {
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
@@ -238,15 +279,17 @@ static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
 	}
 }
 
-static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
+static void aesni_gcm_dec_avx2(void *ctx,
+	struct gcm_context_data *data, u8 *out,
 			const u8 *in, unsigned long ciphertext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len)
 {
        struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
 	if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
-		aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey,
-				aad, aad_len, auth_tag, auth_tag_len);
+		aesni_gcm_dec(ctx, data, out, in,
+			      ciphertext_len, iv, hash_subkey,
+			      aad, aad_len, auth_tag, auth_tag_len);
 	} else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
@@ -259,15 +302,19 @@ static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
 }
 #endif
 
-static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out,
-			const u8 *in, unsigned long plaintext_len, u8 *iv,
-			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
-			u8 *auth_tag, unsigned long auth_tag_len);
+static void (*aesni_gcm_enc_tfm)(void *ctx,
+				 struct gcm_context_data *data, u8 *out,
+				 const u8 *in, unsigned long plaintext_len,
+				 u8 *iv, u8 *hash_subkey, const u8 *aad,
+				 unsigned long aad_len, u8 *auth_tag,
+				 unsigned long auth_tag_len);
 
-static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out,
-			const u8 *in, unsigned long ciphertext_len, u8 *iv,
-			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
-			u8 *auth_tag, unsigned long auth_tag_len);
+static void (*aesni_gcm_dec_tfm)(void *ctx,
+				 struct gcm_context_data *data, u8 *out,
+				 const u8 *in, unsigned long ciphertext_len,
+				 u8 *iv, u8 *hash_subkey, const u8 *aad,
+				 unsigned long aad_len, u8 *auth_tag,
+				 unsigned long auth_tag_len);
 
 static inline struct
 aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
@@ -744,6 +791,127 @@ static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
 	return 0;
 }
 
+static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
+			      unsigned int assoclen, u8 *hash_subkey,
+			      u8 *iv, void *aes_ctx)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
+	struct gcm_context_data data AESNI_ALIGN_ATTR;
+	struct scatter_walk dst_sg_walk = {};
+	unsigned long left = req->cryptlen;
+	unsigned long len, srclen, dstlen;
+	struct scatter_walk assoc_sg_walk;
+	struct scatter_walk src_sg_walk;
+	struct scatterlist src_start[2];
+	struct scatterlist dst_start[2];
+	struct scatterlist *src_sg;
+	struct scatterlist *dst_sg;
+	u8 *src, *dst, *assoc;
+	u8 *assocmem = NULL;
+	u8 authTag[16];
+
+	if (!enc)
+		left -= auth_tag_len;
+
+	/* Linearize assoc, if not already linear */
+	if (req->src->length >= assoclen && req->src->length &&
+		(!PageHighMem(sg_page(req->src)) ||
+			req->src->offset + req->src->length < PAGE_SIZE)) {
+		scatterwalk_start(&assoc_sg_walk, req->src);
+		assoc = scatterwalk_map(&assoc_sg_walk);
+	} else {
+		/* assoc can be any length, so must be on heap */
+		assocmem = kmalloc(assoclen, GFP_ATOMIC);
+		if (unlikely(!assocmem))
+			return -ENOMEM;
+		assoc = assocmem;
+
+		scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
+	}
+
+	src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen);
+	scatterwalk_start(&src_sg_walk, src_sg);
+	if (req->src != req->dst) {
+		dst_sg = scatterwalk_ffwd(dst_start, req->dst, req->assoclen);
+		scatterwalk_start(&dst_sg_walk, dst_sg);
+	}
+
+	kernel_fpu_begin();
+	aesni_gcm_init(aes_ctx, &data, iv,
+		hash_subkey, assoc, assoclen);
+	if (req->src != req->dst) {
+		while (left) {
+			src = scatterwalk_map(&src_sg_walk);
+			dst = scatterwalk_map(&dst_sg_walk);
+			srclen = scatterwalk_clamp(&src_sg_walk, left);
+			dstlen = scatterwalk_clamp(&dst_sg_walk, left);
+			len = min(srclen, dstlen);
+			if (len) {
+				if (enc)
+					aesni_gcm_enc_update(aes_ctx, &data,
+							     dst, src, len);
+				else
+					aesni_gcm_dec_update(aes_ctx, &data,
+							     dst, src, len);
+			}
+			left -= len;
+
+			scatterwalk_unmap(src);
+			scatterwalk_unmap(dst);
+			scatterwalk_advance(&src_sg_walk, len);
+			scatterwalk_advance(&dst_sg_walk, len);
+			scatterwalk_done(&src_sg_walk, 0, left);
+			scatterwalk_done(&dst_sg_walk, 1, left);
+		}
+	} else {
+		while (left) {
+			dst = src = scatterwalk_map(&src_sg_walk);
+			len = scatterwalk_clamp(&src_sg_walk, left);
+			if (len) {
+				if (enc)
+					aesni_gcm_enc_update(aes_ctx, &data,
+							     src, src, len);
+				else
+					aesni_gcm_dec_update(aes_ctx, &data,
+							     src, src, len);
+			}
+			left -= len;
+			scatterwalk_unmap(src);
+			scatterwalk_advance(&src_sg_walk, len);
+			scatterwalk_done(&src_sg_walk, 1, left);
+		}
+	}
+	aesni_gcm_finalize(aes_ctx, &data, authTag, auth_tag_len);
+	kernel_fpu_end();
+
+	if (!assocmem)
+		scatterwalk_unmap(assoc);
+	else
+		kfree(assocmem);
+
+	if (!enc) {
+		u8 authTagMsg[16];
+
+		/* Copy out original authTag */
+		scatterwalk_map_and_copy(authTagMsg, req->src,
+					 req->assoclen + req->cryptlen -
+					 auth_tag_len,
+					 auth_tag_len, 0);
+
+		/* Compare generated tag with passed in tag. */
+		return crypto_memneq(authTagMsg, authTag, auth_tag_len) ?
+			-EBADMSG : 0;
+	}
+
+	/* Copy in the authTag */
+	scatterwalk_map_and_copy(authTag, req->dst,
+				 req->assoclen + req->cryptlen,
+				 auth_tag_len, 1);
+
+	return 0;
+}
+
 static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
 			  u8 *hash_subkey, u8 *iv, void *aes_ctx)
 {
@@ -753,7 +921,14 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
 	struct scatter_walk src_sg_walk;
 	struct scatter_walk dst_sg_walk = {};
+	struct gcm_context_data data AESNI_ALIGN_ATTR;
 
+	if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 ||
+		aesni_gcm_enc_tfm == aesni_gcm_enc ||
+		req->cryptlen < AVX_GEN2_OPTSIZE) {
+		return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv,
+					  aes_ctx);
+	}
 	if (sg_is_last(req->src) &&
 	    (!PageHighMem(sg_page(req->src)) ||
 	    req->src->offset + req->src->length <= PAGE_SIZE) &&
@@ -782,7 +957,7 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
 	}
 
 	kernel_fpu_begin();
-	aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv,
+	aesni_gcm_enc_tfm(aes_ctx, &data, dst, src, req->cryptlen, iv,
 			  hash_subkey, assoc, assoclen,
 			  dst + req->cryptlen, auth_tag_len);
 	kernel_fpu_end();
@@ -817,8 +992,15 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
 	u8 authTag[16];
 	struct scatter_walk src_sg_walk;
 	struct scatter_walk dst_sg_walk = {};
+	struct gcm_context_data data AESNI_ALIGN_ATTR;
 	int retval = 0;
 
+	if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 ||
+		aesni_gcm_enc_tfm == aesni_gcm_enc ||
+		req->cryptlen < AVX_GEN2_OPTSIZE) {
+		return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv,
+					  aes_ctx);
+	}
 	tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
 
 	if (sg_is_last(req->src) &&
@@ -849,7 +1031,7 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
 
 
 	kernel_fpu_begin();
-	aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
+	aesni_gcm_dec_tfm(aes_ctx, &data, dst, src, tempCipherLen, iv,
 			  hash_subkey, assoc, assoclen,
 			  authTag, auth_tag_len);
 	kernel_fpu_end();

+ 113 - 117
arch/x86/crypto/blowfish_glue.c

@@ -25,13 +25,13 @@
  *
  */
 
-#include <asm/processor.h>
+#include <crypto/algapi.h>
 #include <crypto/blowfish.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <crypto/algapi.h>
 
 /* regular block cipher functions */
 asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
@@ -77,20 +77,28 @@ static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
 }
 
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+static int blowfish_setkey_skcipher(struct crypto_skcipher *tfm,
+				    const u8 *key, unsigned int keylen)
+{
+	return blowfish_setkey(&tfm->base, key, keylen);
+}
+
+static int ecb_crypt(struct skcipher_request *req,
 		     void (*fn)(struct bf_ctx *, u8 *, const u8 *),
 		     void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
 {
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = BF_BLOCK_SIZE;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes;
 	int err;
 
-	err = blkcipher_walk_virt(desc, walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
+	while ((nbytes = walk.nbytes)) {
+		u8 *wsrc = walk.src.virt.addr;
+		u8 *wdst = walk.dst.virt.addr;
 
 		/* Process four block batch */
 		if (nbytes >= bsize * 4) {
@@ -116,34 +124,25 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
 		} while (nbytes >= bsize);
 
 done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way);
+	return ecb_crypt(req, blowfish_enc_blk, blowfish_enc_blk_4way);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
+	return ecb_crypt(req, blowfish_dec_blk, blowfish_dec_blk_4way);
 }
 
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_encrypt(struct bf_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = BF_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -164,27 +163,27 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
 	return nbytes;
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __cbc_encrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_decrypt(struct bf_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = BF_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -245,24 +244,25 @@ done:
 	return nbytes;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __cbc_decrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
-static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
+static void ctr_crypt_final(struct bf_ctx *ctx, struct skcipher_walk *walk)
 {
 	u8 *ctrblk = walk->iv;
 	u8 keystream[BF_BLOCK_SIZE];
@@ -276,10 +276,8 @@ static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
 	crypto_inc(ctrblk, BF_BLOCK_SIZE);
 }
 
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
+static unsigned int __ctr_crypt(struct bf_ctx *ctx, struct skcipher_walk *walk)
 {
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = BF_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -332,29 +330,30 @@ done:
 	return nbytes;
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __ctr_crypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
-	if (walk.nbytes) {
-		ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
+	if (nbytes) {
+		ctr_crypt_final(ctx, &walk);
+		err = skcipher_walk_done(&walk, 0);
 	}
 
 	return err;
 }
 
-static struct crypto_alg bf_algs[4] = { {
+static struct crypto_alg bf_cipher_alg = {
 	.cra_name		= "blowfish",
 	.cra_driver_name	= "blowfish-asm",
 	.cra_priority		= 200,
@@ -372,66 +371,50 @@ static struct crypto_alg bf_algs[4] = { {
 			.cia_decrypt		= blowfish_decrypt,
 		}
 	}
-}, {
-	.cra_name		= "ecb(blowfish)",
-	.cra_driver_name	= "ecb-blowfish-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= BF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= BF_MIN_KEY_SIZE,
-			.max_keysize	= BF_MAX_KEY_SIZE,
-			.setkey		= blowfish_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
+};
+
+static struct skcipher_alg bf_skcipher_algs[] = {
+	{
+		.base.cra_name		= "ecb(blowfish)",
+		.base.cra_driver_name	= "ecb-blowfish-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= BF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct bf_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= BF_MIN_KEY_SIZE,
+		.max_keysize		= BF_MAX_KEY_SIZE,
+		.setkey			= blowfish_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(blowfish)",
+		.base.cra_driver_name	= "cbc-blowfish-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= BF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct bf_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= BF_MIN_KEY_SIZE,
+		.max_keysize		= BF_MAX_KEY_SIZE,
+		.ivsize			= BF_BLOCK_SIZE,
+		.setkey			= blowfish_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ctr(blowfish)",
+		.base.cra_driver_name	= "ctr-blowfish-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct bf_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= BF_MIN_KEY_SIZE,
+		.max_keysize		= BF_MAX_KEY_SIZE,
+		.ivsize			= BF_BLOCK_SIZE,
+		.chunksize		= BF_BLOCK_SIZE,
+		.setkey			= blowfish_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
 	},
-}, {
-	.cra_name		= "cbc(blowfish)",
-	.cra_driver_name	= "cbc-blowfish-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= BF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= BF_MIN_KEY_SIZE,
-			.max_keysize	= BF_MAX_KEY_SIZE,
-			.ivsize		= BF_BLOCK_SIZE,
-			.setkey		= blowfish_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(blowfish)",
-	.cra_driver_name	= "ctr-blowfish-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= BF_MIN_KEY_SIZE,
-			.max_keysize	= BF_MAX_KEY_SIZE,
-			.ivsize		= BF_BLOCK_SIZE,
-			.setkey		= blowfish_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-} };
+};
 
 static bool is_blacklisted_cpu(void)
 {
@@ -456,6 +439,8 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
 static int __init init(void)
 {
+	int err;
+
 	if (!force && is_blacklisted_cpu()) {
 		printk(KERN_INFO
 			"blowfish-x86_64: performance on this CPU "
@@ -464,12 +449,23 @@ static int __init init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
+	err = crypto_register_alg(&bf_cipher_alg);
+	if (err)
+		return err;
+
+	err = crypto_register_skciphers(bf_skcipher_algs,
+					ARRAY_SIZE(bf_skcipher_algs));
+	if (err)
+		crypto_unregister_alg(&bf_cipher_alg);
+
+	return err;
 }
 
 static void __exit fini(void)
 {
-	crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
+	crypto_unregister_alg(&bf_cipher_alg);
+	crypto_unregister_skciphers(bf_skcipher_algs,
+				    ARRAY_SIZE(bf_skcipher_algs));
 }
 
 module_init(init);

+ 99 - 392
arch/x86/crypto/camellia_aesni_avx2_glue.c

@@ -10,18 +10,15 @@
  *
  */
 
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/ablk_helper.h>
-#include <crypto/algapi.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
-#include <asm/fpu/api.h>
 #include <asm/crypto/camellia.h>
 #include <asm/crypto/glue_helper.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/xts.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
 
 #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
 #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
@@ -150,413 +147,120 @@ static const struct common_glue_ctx camellia_dec_xts = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keylen)
 {
-	return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
+	return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen,
+				 &tfm->base.crt_flags);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&camellia_enc, req);
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
-				       dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
-				       nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
-}
-
-static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
-{
-	return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
-			      CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
-			      nbytes);
-}
-
-static inline void camellia_fpu_end(bool fpu_enabled)
-{
-	glue_fpu_end(fpu_enabled);
-}
-
-static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-			   unsigned int key_len)
-{
-	return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
-				 &tfm->crt_flags);
-}
-
-struct crypt_priv {
-	struct camellia_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_enc_32way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
-	}
-
-	if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-	}
-
-	while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
-		camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_enc_blk(ctx->ctx, srcdst, srcdst);
+	return glue_ecb_req_128bit(&camellia_dec, req);
 }
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_dec_32way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
-	}
-
-	if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-	}
-
-	while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
-		camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_dec_blk(ctx->ctx, srcdst, srcdst);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+					   req);
 }
 
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->camellia_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	camellia_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
+	return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
 }
 
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->camellia_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	camellia_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
+	return glue_ctr_req_128bit(&camellia_ctr, req);
 }
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(camellia_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
+	return glue_xts_req_128bit(&camellia_enc_xts, req,
+				   XTS_TWEAK_CAST(camellia_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(camellia_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
+	return glue_xts_req_128bit(&camellia_dec_xts, req,
+				   XTS_TWEAK_CAST(camellia_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static struct crypto_alg cmll_algs[10] = { {
-	.cra_name		= "__ecb-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-ecb-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-cbc-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-ctr-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-lrw-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_camellia_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= lrw_camellia_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-xts-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= xts_camellia_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(camellia)",
-	.cra_driver_name	= "ecb-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(camellia)",
-	.cra_driver_name	= "cbc-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(camellia)",
-	.cra_driver_name	= "ctr-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(camellia)",
-	.cra_driver_name	= "lrw-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(camellia)",
-	.cra_driver_name	= "xts-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg camellia_algs[] = {
+	{
+		.base.cra_name		= "__ecb(camellia)",
+		.base.cra_driver_name	= "__ecb-camellia-aesni-avx2",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(camellia)",
+		.base.cra_driver_name	= "__cbc-camellia-aesni-avx2",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(camellia)",
+		.base.cra_driver_name	= "__ctr-camellia-aesni-avx2",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.chunksize		= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(camellia)",
+		.base.cra_driver_name	= "__xts-camellia-aesni-avx2",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= 2 * CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= xts_camellia_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
-} };
+};
+
+static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
 
 static int __init camellia_aesni_init(void)
 {
@@ -576,12 +280,15 @@ static int __init camellia_aesni_init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+	return simd_register_skciphers_compat(camellia_algs,
+					      ARRAY_SIZE(camellia_algs),
+					      camellia_simd_algs);
 }
 
 static void __exit camellia_aesni_fini(void)
 {
-	crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+	simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs),
+				  camellia_simd_algs);
 }
 
 module_init(camellia_aesni_init);

+ 118 - 377
arch/x86/crypto/camellia_aesni_avx_glue.c

@@ -10,18 +10,15 @@
  *
  */
 
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/ablk_helper.h>
-#include <crypto/algapi.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
-#include <asm/fpu/api.h>
 #include <asm/crypto/camellia.h>
 #include <asm/crypto/glue_helper.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/xts.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
 
 #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
 
@@ -154,401 +151,142 @@ static const struct common_glue_ctx camellia_dec_xts = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keylen)
 {
-	return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
+	return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen,
+				 &tfm->base.crt_flags);
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
-				       dst, src, nbytes);
+	return glue_ecb_req_128bit(&camellia_enc, req);
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_ecb_req_128bit(&camellia_dec, req);
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+					   req);
 }
 
-static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
-			      CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
-			      nbytes);
+	return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
 }
 
-static inline void camellia_fpu_end(bool fpu_enabled)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	glue_fpu_end(fpu_enabled);
+	return glue_ctr_req_128bit(&camellia_ctr, req);
 }
 
-static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-			   unsigned int key_len)
+int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			unsigned int keylen)
 {
-	return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
-				 &tfm->crt_flags);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u32 *flags = &tfm->base.crt_flags;
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	/* first half of xts-key is for crypt */
+	err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
+				flags);
 }
+EXPORT_SYMBOL_GPL(xts_camellia_setkey);
 
-struct crypt_priv {
-	struct camellia_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-	}
-
-	while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
-		camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
-	}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_enc_blk(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&camellia_enc_xts, req,
+				   XTS_TWEAK_CAST(camellia_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-	}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
-		camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_dec_blk(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&camellia_dec_xts, req,
+				   XTS_TWEAK_CAST(camellia_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->camellia_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	camellia_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->camellia_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	camellia_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(camellia_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(camellia_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg cmll_algs[10] = { {
-	.cra_name		= "__ecb-camellia-aesni",
-	.cra_driver_name	= "__driver-ecb-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-camellia-aesni",
-	.cra_driver_name	= "__driver-cbc-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-camellia-aesni",
-	.cra_driver_name	= "__driver-ctr-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-camellia-aesni",
-	.cra_driver_name	= "__driver-lrw-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_camellia_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= lrw_camellia_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-camellia-aesni",
-	.cra_driver_name	= "__driver-xts-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= xts_camellia_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(camellia)",
-	.cra_driver_name	= "ecb-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(camellia)",
-	.cra_driver_name	= "cbc-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(camellia)",
-	.cra_driver_name	= "ctr-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(camellia)",
-	.cra_driver_name	= "lrw-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(camellia)",
-	.cra_driver_name	= "xts-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg camellia_algs[] = {
+	{
+		.base.cra_name		= "__ecb(camellia)",
+		.base.cra_driver_name	= "__ecb-camellia-aesni",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(camellia)",
+		.base.cra_driver_name	= "__cbc-camellia-aesni",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(camellia)",
+		.base.cra_driver_name	= "__ctr-camellia-aesni",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.chunksize		= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(camellia)",
+		.base.cra_driver_name	= "__xts-camellia-aesni",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= 2 * CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= xts_camellia_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
-} };
+};
+
+static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
 
 static int __init camellia_aesni_init(void)
 {
@@ -567,12 +305,15 @@ static int __init camellia_aesni_init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+	return simd_register_skciphers_compat(camellia_algs,
+					      ARRAY_SIZE(camellia_algs),
+					      camellia_simd_algs);
 }
 
 static void __exit camellia_aesni_fini(void)
 {
-	crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+	simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs),
+				  camellia_simd_algs);
 }
 
 module_init(camellia_aesni_init);

+ 79 - 277
arch/x86/crypto/camellia_glue.c

@@ -23,15 +23,12 @@
  *
  */
 
-#include <asm/processor.h>
 #include <asm/unaligned.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <crypto/algapi.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
 #include <asm/crypto/camellia.h>
 #include <asm/crypto/glue_helper.h>
 
@@ -1272,13 +1269,19 @@ int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key,
 }
 EXPORT_SYMBOL_GPL(__camellia_setkey);
 
-static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+static int camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
 			   unsigned int key_len)
 {
-	return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
+	return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len,
 				 &tfm->crt_flags);
 }
 
+static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				    unsigned int key_len)
+{
+	return camellia_setkey(&tfm->base, key, key_len);
+}
+
 void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
 {
 	u128 iv = *src;
@@ -1373,188 +1376,33 @@ static const struct common_glue_ctx camellia_dec_cbc = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
-				       dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
-				       nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
-}
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct camellia_ctx *ctx = priv;
-	int i;
-
-	while (nbytes >= 2 * bsize) {
-		camellia_enc_blk_2way(ctx, srcdst, srcdst);
-		srcdst += bsize * 2;
-		nbytes -= bsize * 2;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_enc_blk(ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct camellia_ctx *ctx = priv;
-	int i;
-
-	while (nbytes >= 2 * bsize) {
-		camellia_dec_blk_2way(ctx, srcdst, srcdst);
-		srcdst += bsize * 2;
-		nbytes -= bsize * 2;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_dec_blk(ctx, srcdst, srcdst);
-}
-
-int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
-			unsigned int keylen)
-{
-	struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __camellia_setkey(&ctx->camellia_ctx, key,
-				keylen - CAMELLIA_BLOCK_SIZE,
-				&tfm->crt_flags);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table,
-			      key + keylen - CAMELLIA_BLOCK_SIZE);
-}
-EXPORT_SYMBOL_GPL(lrw_camellia_setkey);
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[2 * 4];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->camellia_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[2 * 4];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->camellia_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
+	return glue_ecb_req_128bit(&camellia_enc, req);
 }
 
-void lrw_camellia_exit_tfm(struct crypto_tfm *tfm)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
+	return glue_ecb_req_128bit(&camellia_dec, req);
 }
-EXPORT_SYMBOL_GPL(lrw_camellia_exit_tfm);
 
-int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
-			unsigned int keylen)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	int err;
-
-	err = xts_check_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	/* first half of xts-key is for crypt */
-	err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
-				flags);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+					   req);
 }
-EXPORT_SYMBOL_GPL(xts_camellia_setkey);
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[2 * 4];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
+	return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
 }
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[2 * 4];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
+	return glue_ctr_req_128bit(&camellia_ctr, req);
 }
 
-static struct crypto_alg camellia_algs[6] = { {
+static struct crypto_alg camellia_cipher_alg = {
 	.cra_name		= "camellia",
 	.cra_driver_name	= "camellia-asm",
 	.cra_priority		= 200,
@@ -1572,109 +1420,50 @@ static struct crypto_alg camellia_algs[6] = { {
 			.cia_decrypt	 = camellia_decrypt
 		}
 	}
-}, {
-	.cra_name		= "ecb(camellia)",
-	.cra_driver_name	= "ecb-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(camellia)",
-	.cra_driver_name	= "cbc-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(camellia)",
-	.cra_driver_name	= "ctr-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "lrw(camellia)",
-	.cra_driver_name	= "lrw-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_camellia_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-						CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-						CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= lrw_camellia_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(camellia)",
-	.cra_driver_name	= "xts-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= xts_camellia_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-} };
+};
+
+static struct skcipher_alg camellia_skcipher_algs[] = {
+	{
+		.base.cra_name		= "ecb(camellia)",
+		.base.cra_driver_name	= "ecb-camellia-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.setkey			= camellia_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(camellia)",
+		.base.cra_driver_name	= "cbc-camellia-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ctr(camellia)",
+		.base.cra_driver_name	= "ctr-camellia-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.chunksize		= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}
+};
 
 static bool is_blacklisted_cpu(void)
 {
@@ -1700,6 +1489,8 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
 static int __init init(void)
 {
+	int err;
+
 	if (!force && is_blacklisted_cpu()) {
 		printk(KERN_INFO
 			"camellia-x86_64: performance on this CPU "
@@ -1708,12 +1499,23 @@ static int __init init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
+	err = crypto_register_alg(&camellia_cipher_alg);
+	if (err)
+		return err;
+
+	err = crypto_register_skciphers(camellia_skcipher_algs,
+					ARRAY_SIZE(camellia_skcipher_algs));
+	if (err)
+		crypto_unregister_alg(&camellia_cipher_alg);
+
+	return err;
 }
 
 static void __exit fini(void)
 {
-	crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
+	crypto_unregister_alg(&camellia_cipher_alg);
+	crypto_unregister_skciphers(camellia_skcipher_algs,
+				    ARRAY_SIZE(camellia_skcipher_algs));
 }
 
 module_init(init);

+ 127 - 225
arch/x86/crypto/cast5_avx_glue.c

@@ -21,18 +21,14 @@
  *
  */
 
-#include <linux/module.h>
-#include <linux/hardirq.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
 #include <crypto/algapi.h>
 #include <crypto/cast5.h>
-#include <crypto/cryptd.h>
-#include <crypto/ctr.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/glue_helper.h>
+#include <crypto/internal/simd.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
 
 #define CAST5_PARALLEL_BLOCKS 16
 
@@ -45,10 +41,17 @@ asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
 				__be64 *iv);
 
-static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	return cast5_setkey(&tfm->base, key, keylen);
+}
+
+static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk,
+				   unsigned int nbytes)
 {
 	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
-			      NULL, fpu_enabled, nbytes);
+			      walk, fpu_enabled, nbytes);
 }
 
 static inline void cast5_fpu_end(bool fpu_enabled)
@@ -56,29 +59,28 @@ static inline void cast5_fpu_end(bool fpu_enabled)
 	return glue_fpu_end(fpu_enabled);
 }
 
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
-		     bool enc)
+static int ecb_crypt(struct skcipher_request *req, bool enc)
 {
 	bool fpu_enabled = false;
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
 	unsigned int nbytes;
 	void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
 	int err;
 
-	fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
-
-	err = blkcipher_walk_virt(desc, walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
+	while ((nbytes = walk.nbytes)) {
+		u8 *wsrc = walk.src.virt.addr;
+		u8 *wdst = walk.dst.virt.addr;
 
-		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
 
 		/* Process multi-block batch */
 		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
+			fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
 			do {
 				fn(ctx, wdst, wsrc);
 
@@ -103,76 +105,58 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
 		} while (nbytes >= bsize);
 
 done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	cast5_fpu_end(fpu_enabled);
 	return err;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, true);
+	return ecb_crypt(req, true);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, false);
+	return ecb_crypt(req, false);
 }
 
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u64 *src = (u64 *)walk->src.virt.addr;
-	u64 *dst = (u64 *)walk->dst.virt.addr;
-	u64 *iv = (u64 *)walk->iv;
-
-	do {
-		*dst = *src ^ *iv;
-		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
-		iv = dst;
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-	*(u64 *)walk->iv = *iv;
-	return nbytes;
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		u64 *src = (u64 *)walk.src.virt.addr;
+		u64 *dst = (u64 *)walk.dst.virt.addr;
+		u64 *iv = (u64 *)walk.iv;
+
+		do {
+			*dst = *src ^ *iv;
+			__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
+			iv = dst;
+			src++;
+			dst++;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+		*(u64 *)walk.iv = *iv;
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_decrypt(struct cast5_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -224,31 +208,29 @@ done:
 	return nbytes;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
 	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
-		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
+		nbytes = __cbc_decrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	cast5_fpu_end(fpu_enabled);
 	return err;
 }
 
-static void ctr_crypt_final(struct blkcipher_desc *desc,
-			    struct blkcipher_walk *walk)
+static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx)
 {
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	u8 *ctrblk = walk->iv;
 	u8 keystream[CAST5_BLOCK_SIZE];
 	u8 *src = walk->src.virt.addr;
@@ -261,10 +243,9 @@ static void ctr_crypt_final(struct blkcipher_desc *desc,
 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
 }
 
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
+static unsigned int __ctr_crypt(struct skcipher_walk *walk,
+				struct cast5_ctx *ctx)
 {
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -307,162 +288,80 @@ done:
 	return nbytes;
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
 	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
-		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
+		nbytes = __ctr_crypt(&walk, ctx);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	cast5_fpu_end(fpu_enabled);
 
 	if (walk.nbytes) {
-		ctr_crypt_final(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		ctr_crypt_final(&walk, ctx);
+		err = skcipher_walk_done(&walk, 0);
 	}
 
 	return err;
 }
 
+static struct skcipher_alg cast5_algs[] = {
+	{
+		.base.cra_name		= "__ecb(cast5)",
+		.base.cra_driver_name	= "__ecb-cast5-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST5_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST5_MIN_KEY_SIZE,
+		.max_keysize		= CAST5_MAX_KEY_SIZE,
+		.setkey			= cast5_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(cast5)",
+		.base.cra_driver_name	= "__cbc-cast5-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST5_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST5_MIN_KEY_SIZE,
+		.max_keysize		= CAST5_MAX_KEY_SIZE,
+		.ivsize			= CAST5_BLOCK_SIZE,
+		.setkey			= cast5_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(cast5)",
+		.base.cra_driver_name	= "__ctr-cast5-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST5_MIN_KEY_SIZE,
+		.max_keysize		= CAST5_MAX_KEY_SIZE,
+		.ivsize			= CAST5_BLOCK_SIZE,
+		.chunksize		= CAST5_BLOCK_SIZE,
+		.setkey			= cast5_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}
+};
 
-static struct crypto_alg cast5_algs[6] = { {
-	.cra_name		= "__ecb-cast5-avx",
-	.cra_driver_name	= "__driver-ecb-cast5-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST5_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast5_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.setkey		= cast5_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-cast5-avx",
-	.cra_driver_name	= "__driver-cbc-cast5-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST5_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast5_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.setkey		= cast5_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-cast5-avx",
-	.cra_driver_name	= "__driver-ctr-cast5-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct cast5_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.ivsize		= CAST5_BLOCK_SIZE,
-			.setkey		= cast5_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(cast5)",
-	.cra_driver_name	= "ecb-cast5-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST5_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(cast5)",
-	.cra_driver_name	= "cbc-cast5-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST5_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.ivsize		= CAST5_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(cast5)",
-	.cra_driver_name	= "ctr-cast5-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.ivsize		= CAST5_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-} };
+static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)];
 
 static int __init cast5_init(void)
 {
@@ -474,12 +373,15 @@ static int __init cast5_init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
+	return simd_register_skciphers_compat(cast5_algs,
+					      ARRAY_SIZE(cast5_algs),
+					      cast5_simd_algs);
 }
 
 static void __exit cast5_exit(void)
 {
-	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
+	simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs),
+				  cast5_simd_algs);
 }
 
 module_init(cast5_init);

+ 100 - 389
arch/x86/crypto/cast6_avx_glue.c

@@ -24,19 +24,13 @@
  */
 
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
 #include <crypto/cast6.h>
-#include <crypto/cryptd.h>
-#include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
+#include <crypto/internal/simd.h>
 #include <crypto/xts.h>
-#include <asm/fpu/api.h>
 #include <asm/crypto/glue_helper.h>
 
 #define CAST6_PARALLEL_BLOCKS 8
@@ -56,6 +50,12 @@ asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
 asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
 				   const u8 *src, le128 *iv);
 
+static int cast6_setkey_skcipher(struct crypto_skcipher *tfm,
+				 const u8 *key, unsigned int keylen)
+{
+	return cast6_setkey(&tfm->base, key, keylen);
+}
+
 static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 {
 	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
@@ -157,164 +157,30 @@ static const struct common_glue_ctx cast6_dec_xts = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc,
-				       dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src,
-				       nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&cast6_enc, req);
 }
 
-static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS,
-			      NULL, fpu_enabled, nbytes);
+	return glue_ecb_req_128bit(&cast6_dec, req);
 }
 
-static inline void cast6_fpu_end(bool fpu_enabled)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	glue_fpu_end(fpu_enabled);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__cast6_encrypt),
+					   req);
 }
 
-struct crypt_priv {
-	struct cast6_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = CAST6_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
-		cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__cast6_encrypt(ctx->ctx, srcdst, srcdst);
+	return glue_cbc_decrypt_req_128bit(&cast6_dec_cbc, req);
 }
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = CAST6_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
-		cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__cast6_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-struct cast6_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct cast6_ctx cast6_ctx;
-};
-
-static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE,
-			     &tfm->crt_flags);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAST6_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->cast6_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	cast6_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAST6_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->cast6_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	cast6_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static void lrw_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
+	return glue_ctr_req_128bit(&cast6_ctr, req);
 }
 
 struct cast6_xts_ctx {
@@ -322,14 +188,14 @@ struct cast6_xts_ctx {
 	struct cast6_ctx crypt_ctx;
 };
 
-static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
+static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			    unsigned int keylen)
 {
-	struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
+	struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u32 *flags = &tfm->base.crt_flags;
 	int err;
 
-	err = xts_check_key(tfm, key, keylen);
+	err = xts_verify_key(tfm, key, keylen);
 	if (err)
 		return err;
 
@@ -343,245 +209,87 @@ static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
 			      flags);
 }
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__cast6_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
+	return glue_xts_req_128bit(&cast6_enc_xts, req,
+				   XTS_TWEAK_CAST(__cast6_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__cast6_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
+	return glue_xts_req_128bit(&cast6_dec_xts, req,
+				   XTS_TWEAK_CAST(__cast6_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static struct crypto_alg cast6_algs[10] = { {
-	.cra_name		= "__ecb-cast6-avx",
-	.cra_driver_name	= "__driver-ecb-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast6_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.setkey		= cast6_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-cast6-avx",
-	.cra_driver_name	= "__driver-cbc-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast6_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.setkey		= cast6_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-cast6-avx",
-	.cra_driver_name	= "__driver-ctr-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct cast6_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= cast6_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-cast6-avx",
-	.cra_driver_name	= "__driver-lrw-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast6_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE +
-					  CAST6_BLOCK_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE +
-					  CAST6_BLOCK_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= lrw_cast6_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-cast6-avx",
-	.cra_driver_name	= "__driver-xts-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast6_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAST6_MAX_KEY_SIZE * 2,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= xts_cast6_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(cast6)",
-	.cra_driver_name	= "ecb-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(cast6)",
-	.cra_driver_name	= "cbc-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(cast6)",
-	.cra_driver_name	= "ctr-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(cast6)",
-	.cra_driver_name	= "lrw-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE +
-					  CAST6_BLOCK_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE +
-					  CAST6_BLOCK_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(cast6)",
-	.cra_driver_name	= "xts-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAST6_MAX_KEY_SIZE * 2,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg cast6_algs[] = {
+	{
+		.base.cra_name		= "__ecb(cast6)",
+		.base.cra_driver_name	= "__ecb-cast6-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST6_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast6_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST6_MIN_KEY_SIZE,
+		.max_keysize		= CAST6_MAX_KEY_SIZE,
+		.setkey			= cast6_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(cast6)",
+		.base.cra_driver_name	= "__cbc-cast6-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST6_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast6_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST6_MIN_KEY_SIZE,
+		.max_keysize		= CAST6_MAX_KEY_SIZE,
+		.ivsize			= CAST6_BLOCK_SIZE,
+		.setkey			= cast6_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(cast6)",
+		.base.cra_driver_name	= "__ctr-cast6-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct cast6_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST6_MIN_KEY_SIZE,
+		.max_keysize		= CAST6_MAX_KEY_SIZE,
+		.ivsize			= CAST6_BLOCK_SIZE,
+		.chunksize		= CAST6_BLOCK_SIZE,
+		.setkey			= cast6_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(cast6)",
+		.base.cra_driver_name	= "__xts-cast6-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST6_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast6_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * CAST6_MIN_KEY_SIZE,
+		.max_keysize		= 2 * CAST6_MAX_KEY_SIZE,
+		.ivsize			= CAST6_BLOCK_SIZE,
+		.setkey			= xts_cast6_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
-} };
+};
+
+static struct simd_skcipher_alg *cast6_simd_algs[ARRAY_SIZE(cast6_algs)];
 
 static int __init cast6_init(void)
 {
@@ -593,12 +301,15 @@ static int __init cast6_init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
+	return simd_register_skciphers_compat(cast6_algs,
+					      ARRAY_SIZE(cast6_algs),
+					      cast6_simd_algs);
 }
 
 static void __exit cast6_exit(void)
 {
-	crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
+	simd_unregister_skciphers(cast6_algs, ARRAY_SIZE(cast6_algs),
+				  cast6_simd_algs);
 }
 
 module_init(cast6_init);

+ 119 - 119
arch/x86/crypto/des3_ede_glue.c

@@ -20,13 +20,13 @@
  *
  */
 
-#include <asm/processor.h>
+#include <crypto/algapi.h>
 #include <crypto/des.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <crypto/algapi.h>
 
 struct des3_ede_x86_ctx {
 	u32 enc_expkey[DES3_EDE_EXPKEY_WORDS];
@@ -83,18 +83,18 @@ static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src);
 }
 
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
-		     const u32 *expkey)
+static int ecb_crypt(struct skcipher_request *req, const u32 *expkey)
 {
-	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
+	const unsigned int bsize = DES3_EDE_BLOCK_SIZE;
+	struct skcipher_walk walk;
 	unsigned int nbytes;
 	int err;
 
-	err = blkcipher_walk_virt(desc, walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
+	while ((nbytes = walk.nbytes)) {
+		u8 *wsrc = walk.src.virt.addr;
+		u8 *wdst = walk.dst.virt.addr;
 
 		/* Process four block batch */
 		if (nbytes >= bsize * 3) {
@@ -121,36 +121,31 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
 		} while (nbytes >= bsize);
 
 done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, ctx->enc_expkey);
+	return ecb_crypt(req, ctx->enc_expkey);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, ctx->dec_expkey);
+	return ecb_crypt(req, ctx->dec_expkey);
 }
 
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_encrypt(struct des3_ede_x86_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -171,27 +166,27 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
 	return nbytes;
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __cbc_encrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_decrypt(struct des3_ede_x86_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -250,25 +245,26 @@ done:
 	return nbytes;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __cbc_decrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
 static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
-			    struct blkcipher_walk *walk)
+			    struct skcipher_walk *walk)
 {
 	u8 *ctrblk = walk->iv;
 	u8 keystream[DES3_EDE_BLOCK_SIZE];
@@ -282,10 +278,9 @@ static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
 	crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
 }
 
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
+static unsigned int __ctr_crypt(struct des3_ede_x86_ctx *ctx,
+				struct skcipher_walk *walk)
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	__be64 *src = (__be64 *)walk->src.virt.addr;
@@ -333,23 +328,24 @@ done:
 	return nbytes;
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) {
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __ctr_crypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
-	if (walk.nbytes) {
-		ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
+	if (nbytes) {
+		ctr_crypt_final(ctx, &walk);
+		err = skcipher_walk_done(&walk, 0);
 	}
 
 	return err;
@@ -381,7 +377,14 @@ static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key,
 	return 0;
 }
 
-static struct crypto_alg des3_ede_algs[4] = { {
+static int des3_ede_x86_setkey_skcipher(struct crypto_skcipher *tfm,
+					const u8 *key,
+					unsigned int keylen)
+{
+	return des3_ede_x86_setkey(&tfm->base, key, keylen);
+}
+
+static struct crypto_alg des3_ede_cipher = {
 	.cra_name		= "des3_ede",
 	.cra_driver_name	= "des3_ede-asm",
 	.cra_priority		= 200,
@@ -399,66 +402,50 @@ static struct crypto_alg des3_ede_algs[4] = { {
 			.cia_decrypt		= des3_ede_x86_decrypt,
 		}
 	}
-}, {
-	.cra_name		= "ecb(des3_ede)",
-	.cra_driver_name	= "ecb-des3_ede-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des3_ede_x86_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.setkey		= des3_ede_x86_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(des3_ede)",
-	.cra_driver_name	= "cbc-des3_ede-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des3_ede_x86_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.ivsize		= DES3_EDE_BLOCK_SIZE,
-			.setkey		= des3_ede_x86_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(des3_ede)",
-	.cra_driver_name	= "ctr-des3_ede-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct des3_ede_x86_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.ivsize		= DES3_EDE_BLOCK_SIZE,
-			.setkey		= des3_ede_x86_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-} };
+};
+
+static struct skcipher_alg des3_ede_skciphers[] = {
+	{
+		.base.cra_name		= "ecb(des3_ede)",
+		.base.cra_driver_name	= "ecb-des3_ede-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct des3_ede_x86_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.setkey			= des3_ede_x86_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(des3_ede)",
+		.base.cra_driver_name	= "cbc-des3_ede-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct des3_ede_x86_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.ivsize			= DES3_EDE_BLOCK_SIZE,
+		.setkey			= des3_ede_x86_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ctr(des3_ede)",
+		.base.cra_driver_name	= "ctr-des3_ede-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct des3_ede_x86_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.ivsize			= DES3_EDE_BLOCK_SIZE,
+		.chunksize		= DES3_EDE_BLOCK_SIZE,
+		.setkey			= des3_ede_x86_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}
+};
 
 static bool is_blacklisted_cpu(void)
 {
@@ -483,17 +470,30 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
 static int __init des3_ede_x86_init(void)
 {
+	int err;
+
 	if (!force && is_blacklisted_cpu()) {
 		pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n");
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
+	err = crypto_register_alg(&des3_ede_cipher);
+	if (err)
+		return err;
+
+	err = crypto_register_skciphers(des3_ede_skciphers,
+					ARRAY_SIZE(des3_ede_skciphers));
+	if (err)
+		crypto_unregister_alg(&des3_ede_cipher);
+
+	return err;
 }
 
 static void __exit des3_ede_x86_fini(void)
 {
-	crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
+	crypto_unregister_alg(&des3_ede_cipher);
+	crypto_unregister_skciphers(des3_ede_skciphers,
+				    ARRAY_SIZE(des3_ede_skciphers));
 }
 
 module_init(des3_ede_x86_init);

+ 125 - 266
arch/x86/crypto/glue_helper.c

@@ -29,313 +29,212 @@
 #include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
 #include <crypto/internal/skcipher.h>
-#include <crypto/lrw.h>
 #include <crypto/xts.h>
 #include <asm/crypto/glue_helper.h>
 
-static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
-				   struct blkcipher_desc *desc,
-				   struct blkcipher_walk *walk)
+int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
+			struct skcipher_request *req)
 {
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
+	void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	const unsigned int bsize = 128 / 8;
-	unsigned int nbytes, i, func_bytes;
+	struct skcipher_walk walk;
 	bool fpu_enabled = false;
+	unsigned int nbytes;
 	int err;
 
-	err = blkcipher_walk_virt(desc, walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
+	while ((nbytes = walk.nbytes)) {
+		const u8 *src = walk.src.virt.addr;
+		u8 *dst = walk.dst.virt.addr;
+		unsigned int func_bytes;
+		unsigned int i;
 
 		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
-					     desc, fpu_enabled, nbytes);
-
+					     &walk, fpu_enabled, nbytes);
 		for (i = 0; i < gctx->num_funcs; i++) {
 			func_bytes = bsize * gctx->funcs[i].num_blocks;
 
-			/* Process multi-block batch */
-			if (nbytes >= func_bytes) {
-				do {
-					gctx->funcs[i].fn_u.ecb(ctx, wdst,
-								wsrc);
+			if (nbytes < func_bytes)
+				continue;
 
-					wsrc += func_bytes;
-					wdst += func_bytes;
-					nbytes -= func_bytes;
-				} while (nbytes >= func_bytes);
+			/* Process multi-block batch */
+			do {
+				gctx->funcs[i].fn_u.ecb(ctx, dst, src);
+				src += func_bytes;
+				dst += func_bytes;
+				nbytes -= func_bytes;
+			} while (nbytes >= func_bytes);
 
-				if (nbytes < bsize)
-					goto done;
-			}
+			if (nbytes < bsize)
+				break;
 		}
-
-done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	glue_fpu_end(fpu_enabled);
 	return err;
 }
+EXPORT_SYMBOL_GPL(glue_ecb_req_128bit);
 
-int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
-			  struct blkcipher_desc *desc, struct scatterlist *dst,
-			  struct scatterlist *src, unsigned int nbytes)
+int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn,
+				struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return __glue_ecb_crypt_128bit(gctx, desc, &walk);
-}
-EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit);
-
-static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn,
-					      struct blkcipher_desc *desc,
-					      struct blkcipher_walk *walk)
-{
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
+	void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	const unsigned int bsize = 128 / 8;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 *iv = (u128 *)walk->iv;
-
-	do {
-		u128_xor(dst, src, iv);
-		fn(ctx, (u8 *)dst, (u8 *)dst);
-		iv = dst;
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-	*(u128 *)walk->iv = *iv;
-	return nbytes;
-}
-
-int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
-			    struct blkcipher_desc *desc,
-			    struct scatterlist *dst,
-			    struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		const u128 *src = (u128 *)walk.src.virt.addr;
+		u128 *dst = (u128 *)walk.dst.virt.addr;
+		u128 *iv = (u128 *)walk.iv;
+
+		do {
+			u128_xor(dst, src, iv);
+			fn(ctx, (u8 *)dst, (u8 *)dst);
+			iv = dst;
+			src++;
+			dst++;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+		*(u128 *)walk.iv = *iv;
+		err = skcipher_walk_done(&walk, nbytes);
 	}
-
 	return err;
 }
-EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit);
+EXPORT_SYMBOL_GPL(glue_cbc_encrypt_req_128bit);
 
-static unsigned int
-__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
-			  struct blkcipher_desc *desc,
-			  struct blkcipher_walk *walk)
+int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
+				struct skcipher_request *req)
 {
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
+	void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	const unsigned int bsize = 128 / 8;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 last_iv;
-	unsigned int num_blocks, func_bytes;
-	unsigned int i;
+	struct skcipher_walk walk;
+	bool fpu_enabled = false;
+	unsigned int nbytes;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
 
-	/* Start of the last block. */
-	src += nbytes / bsize - 1;
-	dst += nbytes / bsize - 1;
+	while ((nbytes = walk.nbytes)) {
+		const u128 *src = walk.src.virt.addr;
+		u128 *dst = walk.dst.virt.addr;
+		unsigned int func_bytes, num_blocks;
+		unsigned int i;
+		u128 last_iv;
 
-	last_iv = *src;
+		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+					     &walk, fpu_enabled, nbytes);
+		/* Start of the last block. */
+		src += nbytes / bsize - 1;
+		dst += nbytes / bsize - 1;
 
-	for (i = 0; i < gctx->num_funcs; i++) {
-		num_blocks = gctx->funcs[i].num_blocks;
-		func_bytes = bsize * num_blocks;
+		last_iv = *src;
 
-		/* Process multi-block batch */
-		if (nbytes >= func_bytes) {
+		for (i = 0; i < gctx->num_funcs; i++) {
+			num_blocks = gctx->funcs[i].num_blocks;
+			func_bytes = bsize * num_blocks;
+
+			if (nbytes < func_bytes)
+				continue;
+
+			/* Process multi-block batch */
 			do {
-				nbytes -= func_bytes - bsize;
 				src -= num_blocks - 1;
 				dst -= num_blocks - 1;
 
 				gctx->funcs[i].fn_u.cbc(ctx, dst, src);
 
-				nbytes -= bsize;
+				nbytes -= func_bytes;
 				if (nbytes < bsize)
 					goto done;
 
-				u128_xor(dst, dst, src - 1);
-				src -= 1;
-				dst -= 1;
+				u128_xor(dst, dst, --src);
+				dst--;
 			} while (nbytes >= func_bytes);
 		}
-	}
-
 done:
-	u128_xor(dst, dst, (u128 *)walk->iv);
-	*(u128 *)walk->iv = last_iv;
-
-	return nbytes;
-}
-
-int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
-			    struct blkcipher_desc *desc,
-			    struct scatterlist *dst,
-			    struct scatterlist *src, unsigned int nbytes)
-{
-	const unsigned int bsize = 128 / 8;
-	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
-					     desc, fpu_enabled, nbytes);
-		nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		u128_xor(dst, dst, (u128 *)walk.iv);
+		*(u128 *)walk.iv = last_iv;
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	glue_fpu_end(fpu_enabled);
 	return err;
 }
-EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
+EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit);
 
-static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
-					struct blkcipher_desc *desc,
-					struct blkcipher_walk *walk)
+int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
+			struct skcipher_request *req)
 {
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
-	u8 *src = (u8 *)walk->src.virt.addr;
-	u8 *dst = (u8 *)walk->dst.virt.addr;
-	unsigned int nbytes = walk->nbytes;
-	le128 ctrblk;
-	u128 tmp;
+	void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+	const unsigned int bsize = 128 / 8;
+	struct skcipher_walk walk;
+	bool fpu_enabled = false;
+	unsigned int nbytes;
+	int err;
 
-	be128_to_le128(&ctrblk, (be128 *)walk->iv);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	memcpy(&tmp, src, nbytes);
-	fn_ctr(ctx, &tmp, &tmp, &ctrblk);
-	memcpy(dst, &tmp, nbytes);
+	while ((nbytes = walk.nbytes) >= bsize) {
+		const u128 *src = walk.src.virt.addr;
+		u128 *dst = walk.dst.virt.addr;
+		unsigned int func_bytes, num_blocks;
+		unsigned int i;
+		le128 ctrblk;
 
-	le128_to_be128((be128 *)walk->iv, &ctrblk);
-}
+		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+					     &walk, fpu_enabled, nbytes);
 
-static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
-					    struct blkcipher_desc *desc,
-					    struct blkcipher_walk *walk)
-{
-	const unsigned int bsize = 128 / 8;
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	le128 ctrblk;
-	unsigned int num_blocks, func_bytes;
-	unsigned int i;
+		be128_to_le128(&ctrblk, (be128 *)walk.iv);
 
-	be128_to_le128(&ctrblk, (be128 *)walk->iv);
+		for (i = 0; i < gctx->num_funcs; i++) {
+			num_blocks = gctx->funcs[i].num_blocks;
+			func_bytes = bsize * num_blocks;
 
-	/* Process multi-block batch */
-	for (i = 0; i < gctx->num_funcs; i++) {
-		num_blocks = gctx->funcs[i].num_blocks;
-		func_bytes = bsize * num_blocks;
+			if (nbytes < func_bytes)
+				continue;
 
-		if (nbytes >= func_bytes) {
+			/* Process multi-block batch */
 			do {
 				gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
-
 				src += num_blocks;
 				dst += num_blocks;
 				nbytes -= func_bytes;
 			} while (nbytes >= func_bytes);
 
 			if (nbytes < bsize)
-				goto done;
+				break;
 		}
-	}
-
-done:
-	le128_to_be128((be128 *)walk->iv, &ctrblk);
-	return nbytes;
-}
-
-int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
-			  struct blkcipher_desc *desc, struct scatterlist *dst,
-			  struct scatterlist *src, unsigned int nbytes)
-{
-	const unsigned int bsize = 128 / 8;
-	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, bsize);
 
-	while ((nbytes = walk.nbytes) >= bsize) {
-		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
-					     desc, fpu_enabled, nbytes);
-		nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		le128_to_be128((be128 *)walk.iv, &ctrblk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	glue_fpu_end(fpu_enabled);
 
-	if (walk.nbytes) {
-		glue_ctr_crypt_final_128bit(
-			gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit);
-
-static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
-					    void *ctx,
-					    struct blkcipher_desc *desc,
-					    struct blkcipher_walk *walk)
-{
-	const unsigned int bsize = 128 / 8;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	unsigned int num_blocks, func_bytes;
-	unsigned int i;
-
-	/* Process multi-block batch */
-	for (i = 0; i < gctx->num_funcs; i++) {
-		num_blocks = gctx->funcs[i].num_blocks;
-		func_bytes = bsize * num_blocks;
-
-		if (nbytes >= func_bytes) {
-			do {
-				gctx->funcs[i].fn_u.xts(ctx, dst, src,
-							(le128 *)walk->iv);
+	if (nbytes) {
+		le128 ctrblk;
+		u128 tmp;
 
-				src += num_blocks;
-				dst += num_blocks;
-				nbytes -= func_bytes;
-			} while (nbytes >= func_bytes);
+		be128_to_le128(&ctrblk, (be128 *)walk.iv);
+		memcpy(&tmp, walk.src.virt.addr, nbytes);
+		gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, &tmp, &tmp,
+							  &ctrblk);
+		memcpy(walk.dst.virt.addr, &tmp, nbytes);
+		le128_to_be128((be128 *)walk.iv, &ctrblk);
 
-			if (nbytes < bsize)
-				goto done;
-		}
+		err = skcipher_walk_done(&walk, 0);
 	}
 
-done:
-	return nbytes;
+	return err;
 }
+EXPORT_SYMBOL_GPL(glue_ctr_req_128bit);
 
 static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 					  void *ctx,
@@ -372,46 +271,6 @@ done:
 	return nbytes;
 }
 
-/* for implementations implementing faster XTS IV generator */
-int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
-			  struct blkcipher_desc *desc, struct scatterlist *dst,
-			  struct scatterlist *src, unsigned int nbytes,
-			  void (*tweak_fn)(void *ctx, u8 *dst, const u8 *src),
-			  void *tweak_ctx, void *crypt_ctx)
-{
-	const unsigned int bsize = 128 / 8;
-	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-
-	err = blkcipher_walk_virt(desc, &walk);
-	nbytes = walk.nbytes;
-	if (!nbytes)
-		return err;
-
-	/* set minimum length to bsize, for tweak_fn */
-	fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
-				     desc, fpu_enabled,
-				     nbytes < bsize ? bsize : nbytes);
-
-	/* calculate first value of T */
-	tweak_fn(tweak_ctx, walk.iv, walk.iv);
-
-	while (nbytes) {
-		nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
-
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-		nbytes = walk.nbytes;
-	}
-
-	glue_fpu_end(fpu_enabled);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
-
 int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 			struct skcipher_request *req,
 			common_glue_func_t tweak_fn, void *tweak_ctx,
@@ -429,9 +288,9 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 		return err;
 
 	/* set minimum length to bsize, for tweak_fn */
-	fpu_enabled = glue_skwalk_fpu_begin(bsize, gctx->fpu_blocks_limit,
-					    &walk, fpu_enabled,
-					    nbytes < bsize ? bsize : nbytes);
+	fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+				     &walk, fpu_enabled,
+				     nbytes < bsize ? bsize : nbytes);
 
 	/* calculate first value of T */
 	tweak_fn(tweak_ctx, walk.iv, walk.iv);

+ 97 - 381
arch/x86/crypto/serpent_avx2_glue.c

@@ -14,15 +14,12 @@
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
+#include <crypto/internal/simd.h>
 #include <crypto/serpent.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/serpent-avx.h>
+#include <crypto/xts.h>
 #include <asm/crypto/glue_helper.h>
+#include <asm/crypto/serpent-avx.h>
 
 #define SERPENT_AVX2_PARALLEL_BLOCKS 16
 
@@ -40,6 +37,12 @@ asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst,
 asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst,
 				      const u8 *src, le128 *iv);
 
+static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
+}
+
 static const struct common_glue_ctx serpent_enc = {
 	.num_funcs = 3,
 	.fpu_blocks_limit = 8,
@@ -136,403 +139,113 @@ static const struct common_glue_ctx serpent_dec_xts = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_enc, req);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_dec, req);
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
-				       dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+					   req);
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+	return glue_ctr_req_128bit(&serpent_ctr, req);
 }
 
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	/* since reusing AVX functions, starts using FPU at 8 parallel blocks */
-	return glue_fpu_begin(SERPENT_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes);
-}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-static inline void serpent_fpu_end(bool fpu_enabled)
-{
-	glue_fpu_end(fpu_enabled);
+	return glue_xts_req_128bit(&serpent_enc_xts, req,
+				   XTS_TWEAK_CAST(__serpent_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-struct crypt_priv {
-	struct serpent_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) {
-		serpent_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
-		nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
-	}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) {
-		serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * SERPENT_PARALLEL_BLOCKS;
-		nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_encrypt(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&serpent_dec_xts, req,
+				   XTS_TWEAK_CAST(__serpent_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) {
-		serpent_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
-		nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
-	}
-
-	while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) {
-		serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * SERPENT_PARALLEL_BLOCKS;
-		nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__serpent_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__serpent_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg srp_algs[10] = { {
-	.cra_name		= "__ecb-serpent-avx2",
-	.cra_driver_name	= "__driver-ecb-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[0].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-serpent-avx2",
-	.cra_driver_name	= "__driver-cbc-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[1].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-serpent-avx2",
-	.cra_driver_name	= "__driver-ctr-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[2].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-serpent-avx2",
-	.cra_driver_name	= "__driver-lrw-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[3].cra_list),
-	.cra_exit		= lrw_serpent_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= lrw_serpent_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-serpent-avx2",
-	.cra_driver_name	= "__driver-xts-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[4].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= xts_serpent_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(serpent)",
-	.cra_driver_name	= "ecb-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[5].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(serpent)",
-	.cra_driver_name	= "cbc-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[6].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(serpent)",
-	.cra_driver_name	= "ctr-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[7].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(serpent)",
-	.cra_driver_name	= "lrw-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[8].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(serpent)",
-	.cra_driver_name	= "xts-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[9].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg serpent_algs[] = {
+	{
+		.base.cra_name		= "__ecb(serpent)",
+		.base.cra_driver_name	= "__ecb-serpent-avx2",
+		.base.cra_priority	= 600,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(serpent)",
+		.base.cra_driver_name	= "__cbc-serpent-avx2",
+		.base.cra_priority	= 600,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(serpent)",
+		.base.cra_driver_name	= "__ctr-serpent-avx2",
+		.base.cra_priority	= 600,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.chunksize		= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(serpent)",
+		.base.cra_driver_name	= "__xts-serpent-avx2",
+		.base.cra_priority	= 600,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= 2 * SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= xts_serpent_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
-} };
+};
+
+static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
 
 static int __init init(void)
 {
@@ -548,12 +261,15 @@ static int __init init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(srp_algs, ARRAY_SIZE(srp_algs));
+	return simd_register_skciphers_compat(serpent_algs,
+					      ARRAY_SIZE(serpent_algs),
+					      serpent_simd_algs);
 }
 
 static void __exit fini(void)
 {
-	crypto_unregister_algs(srp_algs, ARRAY_SIZE(srp_algs));
+	simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
+				  serpent_simd_algs);
 }
 
 module_init(init);

+ 115 - 403
arch/x86/crypto/serpent_avx_glue.c

@@ -24,21 +24,15 @@
  */
 
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
 #include <crypto/serpent.h>
-#include <crypto/cryptd.h>
-#include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
 #include <crypto/xts.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/serpent-avx.h>
 #include <asm/crypto/glue_helper.h>
+#include <asm/crypto/serpent-avx.h>
 
 /* 8-way parallel cipher functions */
 asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
@@ -91,6 +85,31 @@ void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 }
 EXPORT_SYMBOL_GPL(serpent_xts_dec);
 
+static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
+}
+
+int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
+		       unsigned int keylen)
+{
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	/* first half of xts-key is for crypt */
+	err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
+}
+EXPORT_SYMBOL_GPL(xts_serpent_setkey);
 
 static const struct common_glue_ctx serpent_enc = {
 	.num_funcs = 2,
@@ -170,423 +189,113 @@ static const struct common_glue_ctx serpent_dec_xts = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_enc, req);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_dec, req);
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
-				     dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+					   req);
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+	return glue_ctr_req_128bit(&serpent_ctr, req);
 }
 
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
-			      NULL, fpu_enabled, nbytes);
-}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-static inline void serpent_fpu_end(bool fpu_enabled)
-{
-	glue_fpu_end(fpu_enabled);
+	return glue_xts_req_128bit(&serpent_enc_xts, req,
+				   XTS_TWEAK_CAST(__serpent_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-struct crypt_priv {
-	struct serpent_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
-		serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_encrypt(ctx->ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
-		serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-		       unsigned int keylen)
-{
-	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
-							SERPENT_BLOCK_SIZE);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table, key + keylen -
-						SERPENT_BLOCK_SIZE);
-}
-EXPORT_SYMBOL_GPL(lrw_serpent_setkey);
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
+	return glue_xts_req_128bit(&serpent_dec_xts, req,
+				   XTS_TWEAK_CAST(__serpent_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-void lrw_serpent_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
-}
-EXPORT_SYMBOL_GPL(lrw_serpent_exit_tfm);
-
-int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-		       unsigned int keylen)
-{
-	struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = xts_check_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	/* first half of xts-key is for crypt */
-	err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
-}
-EXPORT_SYMBOL_GPL(xts_serpent_setkey);
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__serpent_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__serpent_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg serpent_algs[10] = { {
-	.cra_name		= "__ecb-serpent-avx",
-	.cra_driver_name	= "__driver-ecb-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-serpent-avx",
-	.cra_driver_name	= "__driver-cbc-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-serpent-avx",
-	.cra_driver_name	= "__driver-ctr-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-serpent-avx",
-	.cra_driver_name	= "__driver-lrw-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_serpent_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= lrw_serpent_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-serpent-avx",
-	.cra_driver_name	= "__driver-xts-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= xts_serpent_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(serpent)",
-	.cra_driver_name	= "ecb-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(serpent)",
-	.cra_driver_name	= "cbc-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(serpent)",
-	.cra_driver_name	= "ctr-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(serpent)",
-	.cra_driver_name	= "lrw-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(serpent)",
-	.cra_driver_name	= "xts-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg serpent_algs[] = {
+	{
+		.base.cra_name		= "__ecb(serpent)",
+		.base.cra_driver_name	= "__ecb-serpent-avx",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(serpent)",
+		.base.cra_driver_name	= "__cbc-serpent-avx",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(serpent)",
+		.base.cra_driver_name	= "__ctr-serpent-avx",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.chunksize		= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(serpent)",
+		.base.cra_driver_name	= "__xts-serpent-avx",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= 2 * SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= xts_serpent_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
-} };
+};
+
+static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
 
 static int __init serpent_init(void)
 {
@@ -598,12 +307,15 @@ static int __init serpent_init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+	return simd_register_skciphers_compat(serpent_algs,
+					      ARRAY_SIZE(serpent_algs),
+					      serpent_simd_algs);
 }
 
 static void __exit serpent_exit(void)
 {
-	crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+	simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
+				  serpent_simd_algs);
 }
 
 module_init(serpent_init);

+ 69 - 450
arch/x86/crypto/serpent_sse2_glue.c

@@ -30,21 +30,22 @@
  */
 
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
-#include <crypto/serpent.h>
-#include <crypto/cryptd.h>
 #include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
+#include <crypto/internal/simd.h>
+#include <crypto/serpent.h>
 #include <asm/crypto/serpent-sse2.h>
 #include <asm/crypto/glue_helper.h>
 
+static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
+}
+
 static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
 {
 	u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
@@ -139,464 +140,79 @@ static const struct common_glue_ctx serpent_dec_cbc = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
-				     dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_enc, req);
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_ecb_req_128bit(&serpent_dec, req);
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+					   req);
 }
 
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
-			      NULL, fpu_enabled, nbytes);
+	return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
 }
 
-static inline void serpent_fpu_end(bool fpu_enabled)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	glue_fpu_end(fpu_enabled);
+	return glue_ctr_req_128bit(&serpent_ctr, req);
 }
 
-struct crypt_priv {
-	struct serpent_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
-		serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_encrypt(ctx->ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
-		serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-struct serpent_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct serpent_ctx serpent_ctx;
-};
-
-static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
-							SERPENT_BLOCK_SIZE);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table, key + keylen -
-						SERPENT_BLOCK_SIZE);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static void lrw_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
-}
-
-struct serpent_xts_ctx {
-	struct serpent_ctx tweak_ctx;
-	struct serpent_ctx crypt_ctx;
+static struct skcipher_alg serpent_algs[] = {
+	{
+		.base.cra_name		= "__ecb(serpent)",
+		.base.cra_driver_name	= "__ecb-serpent-sse2",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(serpent)",
+		.base.cra_driver_name	= "__cbc-serpent-sse2",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(serpent)",
+		.base.cra_driver_name	= "__ctr-serpent-sse2",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.chunksize		= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	},
 };
 
-static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = xts_check_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	/* first half of xts-key is for crypt */
-	err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->crypt_ctx,
-		.fpu_enabled = false,
-	};
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = xts_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->crypt_ctx,
-		.fpu_enabled = false,
-	};
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = xts_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static struct crypto_alg serpent_algs[10] = { {
-	.cra_name		= "__ecb-serpent-sse2",
-	.cra_driver_name	= "__driver-ecb-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-serpent-sse2",
-	.cra_driver_name	= "__driver-cbc-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-serpent-sse2",
-	.cra_driver_name	= "__driver-ctr-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-serpent-sse2",
-	.cra_driver_name	= "__driver-lrw-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= lrw_serpent_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-serpent-sse2",
-	.cra_driver_name	= "__driver-xts-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= xts_serpent_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(serpent)",
-	.cra_driver_name	= "ecb-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(serpent)",
-	.cra_driver_name	= "cbc-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(serpent)",
-	.cra_driver_name	= "ctr-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(serpent)",
-	.cra_driver_name	= "lrw-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(serpent)",
-	.cra_driver_name	= "xts-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-} };
+static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
 
 static int __init serpent_sse2_init(void)
 {
@@ -605,12 +221,15 @@ static int __init serpent_sse2_init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+	return simd_register_skciphers_compat(serpent_algs,
+					      ARRAY_SIZE(serpent_algs),
+					      serpent_simd_algs);
 }
 
 static void __exit serpent_sse2_exit(void)
 {
-	crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+	simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
+				  serpent_simd_algs);
 }
 
 module_init(serpent_sse2_init);

+ 3 - 25
arch/x86/crypto/sha1-mb/sha1_mb.c

@@ -106,13 +106,6 @@ static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)
 static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)
 						(struct sha1_mb_mgr *state);
 
-static inline void sha1_init_digest(uint32_t *digest)
-{
-	static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0,
-					SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 };
-	memcpy(digest, initial_digest, sizeof(initial_digest));
-}
-
 static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
 			 uint64_t total_len)
 {
@@ -244,11 +237,8 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
 					  uint32_t len,
 					  int flags)
 {
-	if (flags & (~HASH_ENTIRE)) {
-		/*
-		 * User should not pass anything other than FIRST, UPDATE, or
-		 * LAST
-		 */
+	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
+		/* User should not pass anything other than UPDATE or LAST */
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
 		return ctx;
 	}
@@ -259,24 +249,12 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
 		return ctx;
 	}
 
-	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+	if (ctx->status & HASH_CTX_STS_COMPLETE) {
 		/* Cannot update a finished job. */
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
 		return ctx;
 	}
 
-
-	if (flags & HASH_FIRST) {
-		/* Init digest */
-		sha1_init_digest(ctx->job.result_digest);
-
-		/* Reset byte counter */
-		ctx->total_length = 0;
-
-		/* Clear extra blocks */
-		ctx->partial_block_buffer_length = 0;
-	}
-
 	/*
 	 * If we made it here, there were no errors during this call to
 	 * submit

+ 3 - 5
arch/x86/crypto/sha1-mb/sha1_mb_ctx.h

@@ -57,11 +57,9 @@
 #include "sha1_mb_mgr.h"
 
 #define HASH_UPDATE          0x00
-#define HASH_FIRST           0x01
-#define HASH_LAST            0x02
-#define HASH_ENTIRE          0x03
-#define HASH_DONE	     0x04
-#define HASH_FINAL	     0x08
+#define HASH_LAST            0x01
+#define HASH_DONE	     0x02
+#define HASH_FINAL	     0x04
 
 #define HASH_CTX_STS_IDLE       0x00
 #define HASH_CTX_STS_PROCESSING 0x01

+ 3 - 24
arch/x86/crypto/sha256-mb/sha256_mb.c

@@ -106,14 +106,6 @@ static asmlinkage struct job_sha256* (*sha256_job_mgr_flush)
 static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job)
 			(struct sha256_mb_mgr *state);
 
-inline void sha256_init_digest(uint32_t *digest)
-{
-	static const uint32_t initial_digest[SHA256_DIGEST_LENGTH] = {
-				SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
-				SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7};
-	memcpy(digest, initial_digest, sizeof(initial_digest));
-}
-
 inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
 			 uint64_t total_len)
 {
@@ -245,10 +237,8 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
 					  uint32_t len,
 					  int flags)
 {
-	if (flags & (~HASH_ENTIRE)) {
-		/* User should not pass anything other than FIRST, UPDATE
-		 * or LAST
-		 */
+	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
+		/* User should not pass anything other than UPDATE or LAST */
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
 		return ctx;
 	}
@@ -259,23 +249,12 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
 		return ctx;
 	}
 
-	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+	if (ctx->status & HASH_CTX_STS_COMPLETE) {
 		/* Cannot update a finished job. */
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
 		return ctx;
 	}
 
-	if (flags & HASH_FIRST) {
-		/* Init digest */
-		sha256_init_digest(ctx->job.result_digest);
-
-		/* Reset byte counter */
-		ctx->total_length = 0;
-
-		/* Clear extra blocks */
-		ctx->partial_block_buffer_length = 0;
-	}
-
 	/* If we made it here, there was no error during this call to submit */
 	ctx->error = HASH_CTX_ERROR_NONE;
 

+ 3 - 5
arch/x86/crypto/sha256-mb/sha256_mb_ctx.h

@@ -57,11 +57,9 @@
 #include "sha256_mb_mgr.h"
 
 #define HASH_UPDATE          0x00
-#define HASH_FIRST           0x01
-#define HASH_LAST            0x02
-#define HASH_ENTIRE          0x03
-#define HASH_DONE	     0x04
-#define HASH_FINAL	     0x08
+#define HASH_LAST            0x01
+#define HASH_DONE	     0x02
+#define HASH_FINAL	     0x04
 
 #define HASH_CTX_STS_IDLE       0x00
 #define HASH_CTX_STS_PROCESSING 0x01

+ 3 - 27
arch/x86/crypto/sha512-mb/sha512_mb.c

@@ -107,15 +107,6 @@ static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
 static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
 						(struct sha512_mb_mgr *state);
 
-inline void sha512_init_digest(uint64_t *digest)
-{
-	static const uint64_t initial_digest[SHA512_DIGEST_LENGTH] = {
-					SHA512_H0, SHA512_H1, SHA512_H2,
-					SHA512_H3, SHA512_H4, SHA512_H5,
-					SHA512_H6, SHA512_H7 };
-	memcpy(digest, initial_digest, sizeof(initial_digest));
-}
-
 inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
 			 uint64_t total_len)
 {
@@ -263,11 +254,8 @@ static struct sha512_hash_ctx
 
 	mgr = cstate->mgr;
 	spin_lock_irqsave(&cstate->work_lock, irqflags);
-	if (flags & (~HASH_ENTIRE)) {
-		/*
-		 * User should not pass anything other than FIRST, UPDATE, or
-		 * LAST
-		 */
+	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
+		/* User should not pass anything other than UPDATE or LAST */
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
 		goto unlock;
 	}
@@ -278,24 +266,12 @@ static struct sha512_hash_ctx
 		goto unlock;
 	}
 
-	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+	if (ctx->status & HASH_CTX_STS_COMPLETE) {
 		/* Cannot update a finished job. */
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
 		goto unlock;
 	}
 
-
-	if (flags & HASH_FIRST) {
-		/* Init digest */
-		sha512_init_digest(ctx->job.result_digest);
-
-		/* Reset byte counter */
-		ctx->total_length = 0;
-
-		/* Clear extra blocks */
-		ctx->partial_block_buffer_length = 0;
-	}
-
 	/*
 	 * If we made it here, there were no errors during this call to
 	 * submit

+ 3 - 5
arch/x86/crypto/sha512-mb/sha512_mb_ctx.h

@@ -57,11 +57,9 @@
 #include "sha512_mb_mgr.h"
 
 #define HASH_UPDATE          0x00
-#define HASH_FIRST           0x01
-#define HASH_LAST            0x02
-#define HASH_ENTIRE          0x03
-#define HASH_DONE            0x04
-#define HASH_FINAL           0x08
+#define HASH_LAST            0x01
+#define HASH_DONE            0x02
+#define HASH_FINAL           0x04
 
 #define HASH_CTX_STS_IDLE       0x00
 #define HASH_CTX_STS_PROCESSING 0x01

+ 121 - 372
arch/x86/crypto/twofish_avx_glue.c

@@ -24,24 +24,15 @@
  */
 
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
 #include <crypto/twofish.h>
-#include <crypto/cryptd.h>
-#include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
 #include <crypto/xts.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/twofish.h>
 #include <asm/crypto/glue_helper.h>
-#include <crypto/scatterwalk.h>
-#include <linux/workqueue.h>
-#include <linux/spinlock.h>
+#include <asm/crypto/twofish.h>
 
 #define TWOFISH_PARALLEL_BLOCKS 8
 
@@ -61,6 +52,12 @@ asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
 asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
 				     const u8 *src, le128 *iv);
 
+static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return twofish_setkey(&tfm->base, key, keylen);
+}
+
 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
 					const u8 *src)
 {
@@ -79,6 +76,31 @@ static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 				  GLUE_FUNC_CAST(twofish_dec_blk));
 }
 
+struct twofish_xts_ctx {
+	struct twofish_ctx tweak_ctx;
+	struct twofish_ctx crypt_ctx;
+};
+
+static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u32 *flags = &tfm->base.crt_flags;
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	/* first half of xts-key is for crypt */
+	err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
+				flags);
+}
 
 static const struct common_glue_ctx twofish_enc = {
 	.num_funcs = 3,
@@ -170,389 +192,113 @@ static const struct common_glue_ctx twofish_dec_xts = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
-				       dst, src, nbytes);
+	return glue_ecb_req_128bit(&twofish_enc, req);
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_ecb_req_128bit(&twofish_dec, req);
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
+					   req);
 }
 
-static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL,
-			      fpu_enabled, nbytes);
+	return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
 }
 
-static inline void twofish_fpu_end(bool fpu_enabled)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	glue_fpu_end(fpu_enabled);
+	return glue_ctr_req_128bit(&twofish_ctr, req);
 }
 
-struct crypt_priv {
-	struct twofish_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = TF_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
-		twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
-		twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	nbytes %= bsize * 3;
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		twofish_enc_blk(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&twofish_enc_xts, req,
+				   XTS_TWEAK_CAST(twofish_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = TF_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
-		twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
-		return;
-	}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
-		twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
-
-	nbytes %= bsize * 3;
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		twofish_dec_blk(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&twofish_dec_xts, req,
+				   XTS_TWEAK_CAST(twofish_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[TWOFISH_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->twofish_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	twofish_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[TWOFISH_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->twofish_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	twofish_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(twofish_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(twofish_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg twofish_algs[10] = { {
-	.cra_name		= "__ecb-twofish-avx",
-	.cra_driver_name	= "__driver-ecb-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-twofish-avx",
-	.cra_driver_name	= "__driver-cbc-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-twofish-avx",
-	.cra_driver_name	= "__driver-ctr-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-twofish-avx",
-	.cra_driver_name	= "__driver-lrw-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_twofish_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE +
-					  TF_BLOCK_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE +
-					  TF_BLOCK_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= lrw_twofish_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-twofish-avx",
-	.cra_driver_name	= "__driver-xts-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE * 2,
-			.max_keysize	= TF_MAX_KEY_SIZE * 2,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= xts_twofish_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(twofish)",
-	.cra_driver_name	= "ecb-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(twofish)",
-	.cra_driver_name	= "cbc-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(twofish)",
-	.cra_driver_name	= "ctr-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(twofish)",
-	.cra_driver_name	= "lrw-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE +
-					  TF_BLOCK_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE +
-					  TF_BLOCK_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(twofish)",
-	.cra_driver_name	= "xts-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE * 2,
-			.max_keysize	= TF_MAX_KEY_SIZE * 2,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg twofish_algs[] = {
+	{
+		.base.cra_name		= "__ecb(twofish)",
+		.base.cra_driver_name	= "__ecb-twofish-avx",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(twofish)",
+		.base.cra_driver_name	= "__cbc-twofish-avx",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(twofish)",
+		.base.cra_driver_name	= "__ctr-twofish-avx",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.chunksize		= TF_BLOCK_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(twofish)",
+		.base.cra_driver_name	= "__xts-twofish-avx",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * TF_MIN_KEY_SIZE,
+		.max_keysize		= 2 * TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.setkey			= xts_twofish_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
-} };
+};
+
+static struct simd_skcipher_alg *twofish_simd_algs[ARRAY_SIZE(twofish_algs)];
 
 static int __init twofish_init(void)
 {
@@ -563,12 +309,15 @@ static int __init twofish_init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
+	return simd_register_skciphers_compat(twofish_algs,
+					      ARRAY_SIZE(twofish_algs),
+					      twofish_simd_algs);
 }
 
 static void __exit twofish_exit(void)
 {
-	crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
+	simd_unregister_skciphers(twofish_algs, ARRAY_SIZE(twofish_algs),
+				  twofish_simd_algs);
 }
 
 module_init(twofish_init);

+ 67 - 272
arch/x86/crypto/twofish_glue_3way.c

@@ -20,22 +20,26 @@
  *
  */
 
-#include <asm/processor.h>
+#include <asm/crypto/glue_helper.h>
+#include <asm/crypto/twofish.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/twofish.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <crypto/algapi.h>
-#include <crypto/twofish.h>
-#include <crypto/b128ops.h>
-#include <asm/crypto/twofish.h>
-#include <asm/crypto/glue_helper.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
 
 EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
 EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
 
+static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return twofish_setkey(&tfm->base, key, keylen);
+}
+
 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
 					const u8 *src)
 {
@@ -151,284 +155,74 @@ static const struct common_glue_ctx twofish_dec_cbc = {
 	} }
 };
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
-				       dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
-				       nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
-}
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	const unsigned int bsize = TF_BLOCK_SIZE;
-	struct twofish_ctx *ctx = priv;
-	int i;
-
-	if (nbytes == 3 * bsize) {
-		twofish_enc_blk_3way(ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		twofish_enc_blk(ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = TF_BLOCK_SIZE;
-	struct twofish_ctx *ctx = priv;
-	int i;
-
-	if (nbytes == 3 * bsize) {
-		twofish_dec_blk_3way(ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		twofish_dec_blk(ctx, srcdst, srcdst);
-}
-
-int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-		       unsigned int keylen)
-{
-	struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE,
-			       &tfm->crt_flags);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE);
+	return glue_ecb_req_128bit(&twofish_enc, req);
 }
-EXPORT_SYMBOL_GPL(lrw_twofish_setkey);
 
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[3];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->twofish_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
+	return glue_ecb_req_128bit(&twofish_dec, req);
 }
 
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[3];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->twofish_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
+					   req);
 }
 
-void lrw_twofish_exit_tfm(struct crypto_tfm *tfm)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
-}
-EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm);
-
-int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-		       unsigned int keylen)
-{
-	struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	int err;
-
-	err = xts_check_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	/* first half of xts-key is for crypt */
-	err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
-				flags);
-}
-EXPORT_SYMBOL_GPL(xts_twofish_setkey);
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[3];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
+	return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
 }
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[3];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
+	return glue_ctr_req_128bit(&twofish_ctr, req);
 }
 
-static struct crypto_alg tf_algs[5] = { {
-	.cra_name		= "ecb(twofish)",
-	.cra_driver_name	= "ecb-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(twofish)",
-	.cra_driver_name	= "cbc-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(twofish)",
-	.cra_driver_name	= "ctr-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "lrw(twofish)",
-	.cra_driver_name	= "lrw-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_twofish_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= lrw_twofish_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(twofish)",
-	.cra_driver_name	= "xts-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE * 2,
-			.max_keysize	= TF_MAX_KEY_SIZE * 2,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= xts_twofish_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
+static struct skcipher_alg tf_skciphers[] = {
+	{
+		.base.cra_name		= "ecb(twofish)",
+		.base.cra_driver_name	= "ecb-twofish-3way",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(twofish)",
+		.base.cra_driver_name	= "cbc-twofish-3way",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ctr(twofish)",
+		.base.cra_driver_name	= "ctr-twofish-3way",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.chunksize		= TF_BLOCK_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
 	},
-} };
+};
 
 static bool is_blacklisted_cpu(void)
 {
@@ -478,12 +272,13 @@ static int __init init(void)
 		return -ENODEV;
 	}
 
-	return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
+	return crypto_register_skciphers(tf_skciphers,
+					 ARRAY_SIZE(tf_skciphers));
 }
 
 static void __exit fini(void)
 {
-	crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
+	crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers));
 }
 
 module_init(init);

+ 5 - 11
arch/x86/include/asm/crypto/camellia.h

@@ -2,8 +2,9 @@
 #ifndef ASM_X86_CAMELLIA_H
 #define ASM_X86_CAMELLIA_H
 
-#include <linux/kernel.h>
+#include <crypto/b128ops.h>
 #include <linux/crypto.h>
+#include <linux/kernel.h>
 
 #define CAMELLIA_MIN_KEY_SIZE	16
 #define CAMELLIA_MAX_KEY_SIZE	32
@@ -11,16 +12,13 @@
 #define CAMELLIA_TABLE_BYTE_LEN	272
 #define CAMELLIA_PARALLEL_BLOCKS 2
 
+struct crypto_skcipher;
+
 struct camellia_ctx {
 	u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
 	u32 key_length;
 };
 
-struct camellia_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct camellia_ctx camellia_ctx;
-};
-
 struct camellia_xts_ctx {
 	struct camellia_ctx tweak_ctx;
 	struct camellia_ctx crypt_ctx;
@@ -30,11 +28,7 @@ extern int __camellia_setkey(struct camellia_ctx *cctx,
 			     const unsigned char *key,
 			     unsigned int key_len, u32 *flags);
 
-extern int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
-			       unsigned int keylen);
-extern void lrw_camellia_exit_tfm(struct crypto_tfm *tfm);
-
-extern int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
+extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			       unsigned int keylen);
 
 /* regular block cipher functions */

+ 12 - 63
arch/x86/include/asm/crypto/glue_helper.h

@@ -45,7 +45,7 @@ struct common_glue_ctx {
 };
 
 static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
-				  struct blkcipher_desc *desc,
+				  struct skcipher_walk *walk,
 				  bool fpu_enabled, unsigned int nbytes)
 {
 	if (likely(fpu_blocks_limit < 0))
@@ -61,33 +61,6 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
 	if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
 		return false;
 
-	if (desc) {
-		/* prevent sleeping if FPU is in use */
-		desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	}
-
-	kernel_fpu_begin();
-	return true;
-}
-
-static inline bool glue_skwalk_fpu_begin(unsigned int bsize,
-					 int fpu_blocks_limit,
-					 struct skcipher_walk *walk,
-					 bool fpu_enabled, unsigned int nbytes)
-{
-	if (likely(fpu_blocks_limit < 0))
-		return false;
-
-	if (fpu_enabled)
-		return true;
-
-	/*
-	 * Vector-registers are only used when chunk to be processed is large
-	 * enough, so do not enable FPU until it is necessary.
-	 */
-	if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
-		return false;
-
 	/* prevent sleeping if FPU is in use */
 	skcipher_walk_atomise(walk);
 
@@ -126,41 +99,17 @@ static inline void le128_inc(le128 *i)
 	i->b = cpu_to_le64(b);
 }
 
-extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
-				 struct blkcipher_desc *desc,
-				 struct scatterlist *dst,
-				 struct scatterlist *src, unsigned int nbytes);
-
-extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
-				   struct blkcipher_desc *desc,
-				   struct scatterlist *dst,
-				   struct scatterlist *src,
-				   unsigned int nbytes);
-
-extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
-				   struct blkcipher_desc *desc,
-				   struct scatterlist *dst,
-				   struct scatterlist *src,
-				   unsigned int nbytes);
-
-extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
-				 struct blkcipher_desc *desc,
-				 struct scatterlist *dst,
-				 struct scatterlist *src, unsigned int nbytes);
-
-extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
-				 struct blkcipher_desc *desc,
-				 struct scatterlist *dst,
-				 struct scatterlist *src, unsigned int nbytes,
-				 common_glue_func_t tweak_fn, void *tweak_ctx,
-				 void *crypt_ctx);
-
-extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
-				 struct blkcipher_desc *desc,
-				 struct scatterlist *dst,
-				 struct scatterlist *src, unsigned int nbytes,
-				 common_glue_func_t tweak_fn, void *tweak_ctx,
-				 void *crypt_ctx);
+extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
+			       struct skcipher_request *req);
+
+extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn,
+				       struct skcipher_request *req);
+
+extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
+				       struct skcipher_request *req);
+
+extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
+			       struct skcipher_request *req);
 
 extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 			       struct skcipher_request *req,

+ 5 - 12
arch/x86/include/asm/crypto/serpent-avx.h

@@ -2,15 +2,13 @@
 #ifndef ASM_X86_SERPENT_AVX_H
 #define ASM_X86_SERPENT_AVX_H
 
-#include <linux/crypto.h>
+#include <crypto/b128ops.h>
 #include <crypto/serpent.h>
+#include <linux/types.h>
 
-#define SERPENT_PARALLEL_BLOCKS 8
+struct crypto_skcipher;
 
-struct serpent_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct serpent_ctx serpent_ctx;
-};
+#define SERPENT_PARALLEL_BLOCKS 8
 
 struct serpent_xts_ctx {
 	struct serpent_ctx tweak_ctx;
@@ -38,12 +36,7 @@ extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
 extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
 extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
 
-extern int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen);
-
-extern void lrw_serpent_exit_tfm(struct crypto_tfm *tfm);
-
-extern int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
+extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			      unsigned int keylen);
 
 #endif

+ 0 - 19
arch/x86/include/asm/crypto/twofish.h

@@ -4,19 +4,8 @@
 
 #include <linux/crypto.h>
 #include <crypto/twofish.h>
-#include <crypto/lrw.h>
 #include <crypto/b128ops.h>
 
-struct twofish_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct twofish_ctx twofish_ctx;
-};
-
-struct twofish_xts_ctx {
-	struct twofish_ctx tweak_ctx;
-	struct twofish_ctx crypt_ctx;
-};
-
 /* regular block cipher functions from twofish_x86_64 module */
 asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
 				const u8 *src);
@@ -36,12 +25,4 @@ extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
 extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
 				     le128 *iv);
 
-extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen);
-
-extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
-
-extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen);
-
 #endif /* ASM_X86_TWOFISH_H */

+ 69 - 60
crypto/Kconfig

@@ -245,10 +245,6 @@ config CRYPTO_TEST
 	help
 	  Quick & dirty crypto test module.
 
-config CRYPTO_ABLK_HELPER
-	tristate
-	select CRYPTO_CRYPTD
-
 config CRYPTO_SIMD
 	tristate
 	select CRYPTO_CRYPTD
@@ -324,6 +320,14 @@ config CRYPTO_CBC
 	  CBC: Cipher Block Chaining mode
 	  This block cipher algorithm is required for IPSec.
 
+config CRYPTO_CFB
+	tristate "CFB support"
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_MANAGER
+	help
+	  CFB: Cipher FeedBack mode
+	  This block cipher algorithm is required for TPM2 Cryptography.
+
 config CRYPTO_CTR
 	tristate "CTR support"
 	select CRYPTO_BLKCIPHER
@@ -1114,7 +1118,7 @@ config CRYPTO_BLOWFISH_COMMON
 config CRYPTO_BLOWFISH_X86_64
 	tristate "Blowfish cipher algorithm (x86_64)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_BLOWFISH_COMMON
 	help
 	  Blowfish cipher algorithm (x86_64), by Bruce Schneier.
@@ -1145,10 +1149,8 @@ config CRYPTO_CAMELLIA_X86_64
 	tristate "Camellia cipher algorithm (x86_64)"
 	depends on X86 && 64BIT
 	depends on CRYPTO
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	  Camellia cipher algorithm module (x86_64).
 
@@ -1164,12 +1166,10 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64
 	tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX)"
 	depends on X86 && 64BIT
 	depends on CRYPTO
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_CAMELLIA_X86_64
-	select CRYPTO_LRW
+	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_SIMD
 	select CRYPTO_XTS
 	help
 	  Camellia cipher algorithm module (x86_64/AES-NI/AVX).
@@ -1186,14 +1186,7 @@ config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64
 	tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX2)"
 	depends on X86 && 64BIT
 	depends on CRYPTO
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_CAMELLIA_X86_64
 	select CRYPTO_CAMELLIA_AESNI_AVX_X86_64
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	  Camellia cipher algorithm module (x86_64/AES-NI/AVX2).
 
@@ -1238,11 +1231,10 @@ config CRYPTO_CAST5
 config CRYPTO_CAST5_AVX_X86_64
 	tristate "CAST5 (CAST-128) cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_CAST_COMMON
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_CAST5
+	select CRYPTO_CAST_COMMON
+	select CRYPTO_SIMD
 	help
 	  The CAST5 encryption algorithm (synonymous with CAST-128) is
 	  described in RFC2144.
@@ -1261,13 +1253,11 @@ config CRYPTO_CAST6
 config CRYPTO_CAST6_AVX_X86_64
 	tristate "CAST6 (CAST-256) cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_CAST_COMMON
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_CAST6
-	select CRYPTO_LRW
+	select CRYPTO_CAST_COMMON
+	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_SIMD
 	select CRYPTO_XTS
 	help
 	  The CAST6 encryption algorithm (synonymous with CAST-256) is
@@ -1294,7 +1284,7 @@ config CRYPTO_DES_SPARC64
 config CRYPTO_DES3_EDE_X86_64
 	tristate "Triple DES EDE cipher algorithm (x86-64)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_DES
 	help
 	  Triple DES EDE (FIPS 46-3) algorithm.
@@ -1422,13 +1412,10 @@ config CRYPTO_SERPENT
 config CRYPTO_SERPENT_SSE2_X86_64
 	tristate "Serpent cipher algorithm (x86_64/SSE2)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
-	select CRYPTO_LRW
-	select CRYPTO_XTS
+	select CRYPTO_SIMD
 	help
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 
@@ -1444,13 +1431,10 @@ config CRYPTO_SERPENT_SSE2_X86_64
 config CRYPTO_SERPENT_SSE2_586
 	tristate "Serpent cipher algorithm (i586/SSE2)"
 	depends on X86 && !64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
-	select CRYPTO_LRW
-	select CRYPTO_XTS
+	select CRYPTO_SIMD
 	help
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 
@@ -1466,12 +1450,10 @@ config CRYPTO_SERPENT_SSE2_586
 config CRYPTO_SERPENT_AVX_X86_64
 	tristate "Serpent cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
-	select CRYPTO_LRW
+	select CRYPTO_SIMD
 	select CRYPTO_XTS
 	help
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
@@ -1488,14 +1470,7 @@ config CRYPTO_SERPENT_AVX_X86_64
 config CRYPTO_SERPENT_AVX2_X86_64
 	tristate "Serpent cipher algorithm (x86_64/AVX2)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_SERPENT
 	select CRYPTO_SERPENT_AVX_X86_64
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 
@@ -1508,6 +1483,45 @@ config CRYPTO_SERPENT_AVX2_X86_64
 	  See also:
 	  <http://www.cl.cam.ac.uk/~rja14/serpent.html>
 
+config CRYPTO_SM4
+	tristate "SM4 cipher algorithm"
+	select CRYPTO_ALGAPI
+	help
+	  SM4 cipher algorithms (OSCCA GB/T 32907-2016).
+
+	  SM4 (GBT.32907-2016) is a cryptographic standard issued by the
+	  Organization of State Commercial Administration of China (OSCCA)
+	  as an authorized cryptographic algorithms for the use within China.
+
+	  SMS4 was originally created for use in protecting wireless
+	  networks, and is mandated in the Chinese National Standard for
+	  Wireless LAN WAPI (Wired Authentication and Privacy Infrastructure)
+	  (GB.15629.11-2003).
+
+	  The latest SM4 standard (GBT.32907-2016) was proposed by OSCCA and
+	  standardized through TC 260 of the Standardization Administration
+	  of the People's Republic of China (SAC).
+
+	  The input, output, and key of SMS4 are each 128 bits.
+
+	  See also: <https://eprint.iacr.org/2008/329.pdf>
+
+	  If unsure, say N.
+
+config CRYPTO_SPECK
+	tristate "Speck cipher algorithm"
+	select CRYPTO_ALGAPI
+	help
+	  Speck is a lightweight block cipher that is tuned for optimal
+	  performance in software (rather than hardware).
+
+	  Speck may not be as secure as AES, and should only be used on systems
+	  where AES is not fast enough.
+
+	  See also: <https://eprint.iacr.org/2013/404.pdf>
+
+	  If unsure, say N.
+
 config CRYPTO_TEA
 	tristate "TEA, XTEA and XETA cipher algorithms"
 	select CRYPTO_ALGAPI
@@ -1581,12 +1595,10 @@ config CRYPTO_TWOFISH_X86_64
 config CRYPTO_TWOFISH_X86_64_3WAY
 	tristate "Twofish cipher algorithm (x86_64, 3-way parallel)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_TWOFISH_COMMON
 	select CRYPTO_TWOFISH_X86_64
 	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	  Twofish cipher algorithm (x86_64, 3-way parallel).
 
@@ -1604,15 +1616,12 @@ config CRYPTO_TWOFISH_X86_64_3WAY
 config CRYPTO_TWOFISH_AVX_X86_64
 	tristate "Twofish cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_SIMD
 	select CRYPTO_TWOFISH_COMMON
 	select CRYPTO_TWOFISH_X86_64
 	select CRYPTO_TWOFISH_X86_64_3WAY
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	  Twofish cipher algorithm (x86_64/AVX).
 

+ 3 - 1
crypto/Makefile

@@ -78,6 +78,7 @@ obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
 obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
 obj-$(CONFIG_CRYPTO_ECB) += ecb.o
 obj-$(CONFIG_CRYPTO_CBC) += cbc.o
+obj-$(CONFIG_CRYPTO_CFB) += cfb.o
 obj-$(CONFIG_CRYPTO_PCBC) += pcbc.o
 obj-$(CONFIG_CRYPTO_CTS) += cts.o
 obj-$(CONFIG_CRYPTO_LRW) += lrw.o
@@ -100,6 +101,7 @@ obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
 CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure)  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
 CFLAGS_aes_generic.o := $(call cc-option,-fno-code-hoisting) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83356
+obj-$(CONFIG_CRYPTO_SM4) += sm4_generic.o
 obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
 obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
 obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
@@ -110,6 +112,7 @@ obj-$(CONFIG_CRYPTO_TEA) += tea.o
 obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
 obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
 obj-$(CONFIG_CRYPTO_SEED) += seed.o
+obj-$(CONFIG_CRYPTO_SPECK) += speck.o
 obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
 obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
 obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
@@ -149,6 +152,5 @@ obj-$(CONFIG_XOR_BLOCKS) += xor.o
 obj-$(CONFIG_ASYNC_CORE) += async_tx/
 obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/
 obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o
-obj-$(CONFIG_CRYPTO_ABLK_HELPER) += ablk_helper.o
 crypto_simd-y := simd.o
 obj-$(CONFIG_CRYPTO_SIMD) += crypto_simd.o

+ 0 - 150
crypto/ablk_helper.c

@@ -1,150 +0,0 @@
-/*
- * Shared async block cipher helpers
- *
- * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * Based on aesni-intel_glue.c by:
- *  Copyright (C) 2008, Intel Corp.
- *    Author: Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <crypto/algapi.h>
-#include <crypto/cryptd.h>
-#include <crypto/ablk_helper.h>
-#include <asm/simd.h>
-
-int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
-		 unsigned int key_len)
-{
-	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
-	int err;
-
-	crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
-				    & CRYPTO_TFM_REQ_MASK);
-	err = crypto_ablkcipher_setkey(child, key, key_len);
-	crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
-				    & CRYPTO_TFM_RES_MASK);
-	return err;
-}
-EXPORT_SYMBOL_GPL(ablk_set_key);
-
-int __ablk_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct blkcipher_desc desc;
-
-	desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
-	desc.info = req->info;
-	desc.flags = 0;
-
-	return crypto_blkcipher_crt(desc.tfm)->encrypt(
-		&desc, req->dst, req->src, req->nbytes);
-}
-EXPORT_SYMBOL_GPL(__ablk_encrypt);
-
-int ablk_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
-	if (!may_use_simd() ||
-	    (in_atomic() && cryptd_ablkcipher_queued(ctx->cryptd_tfm))) {
-		struct ablkcipher_request *cryptd_req =
-			ablkcipher_request_ctx(req);
-
-		*cryptd_req = *req;
-		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-
-		return crypto_ablkcipher_encrypt(cryptd_req);
-	} else {
-		return __ablk_encrypt(req);
-	}
-}
-EXPORT_SYMBOL_GPL(ablk_encrypt);
-
-int ablk_decrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
-	if (!may_use_simd() ||
-	    (in_atomic() && cryptd_ablkcipher_queued(ctx->cryptd_tfm))) {
-		struct ablkcipher_request *cryptd_req =
-			ablkcipher_request_ctx(req);
-
-		*cryptd_req = *req;
-		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-
-		return crypto_ablkcipher_decrypt(cryptd_req);
-	} else {
-		struct blkcipher_desc desc;
-
-		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
-		desc.info = req->info;
-		desc.flags = 0;
-
-		return crypto_blkcipher_crt(desc.tfm)->decrypt(
-			&desc, req->dst, req->src, req->nbytes);
-	}
-}
-EXPORT_SYMBOL_GPL(ablk_decrypt);
-
-void ablk_exit(struct crypto_tfm *tfm)
-{
-	struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	cryptd_free_ablkcipher(ctx->cryptd_tfm);
-}
-EXPORT_SYMBOL_GPL(ablk_exit);
-
-int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
-{
-	struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, CRYPTO_ALG_INTERNAL,
-					     CRYPTO_ALG_INTERNAL);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-
-	ctx->cryptd_tfm = cryptd_tfm;
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
-		crypto_ablkcipher_reqsize(&cryptd_tfm->base);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ablk_init_common);
-
-int ablk_init(struct crypto_tfm *tfm)
-{
-	char drv_name[CRYPTO_MAX_ALG_NAME];
-
-	snprintf(drv_name, sizeof(drv_name), "__driver-%s",
-					crypto_tfm_alg_driver_name(tfm));
-
-	return ablk_init_common(tfm, drv_name);
-}
-EXPORT_SYMBOL_GPL(ablk_init);
-
-MODULE_LICENSE("GPL");

+ 6 - 19
crypto/ahash.c

@@ -92,13 +92,14 @@ int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err)
 
 	if (nbytes && walk->offset & alignmask && !err) {
 		walk->offset = ALIGN(walk->offset, alignmask + 1);
-		walk->data += walk->offset;
-
 		nbytes = min(nbytes,
 			     ((unsigned int)(PAGE_SIZE)) - walk->offset);
 		walk->entrylen -= nbytes;
 
-		return nbytes;
+		if (nbytes) {
+			walk->data += walk->offset;
+			return nbytes;
+		}
 	}
 
 	if (walk->flags & CRYPTO_ALG_ASYNC)
@@ -446,24 +447,12 @@ static int ahash_def_finup(struct ahash_request *req)
 	return ahash_def_finup_finish1(req, err);
 }
 
-static int ahash_no_export(struct ahash_request *req, void *out)
-{
-	return -ENOSYS;
-}
-
-static int ahash_no_import(struct ahash_request *req, const void *in)
-{
-	return -ENOSYS;
-}
-
 static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_ahash *hash = __crypto_ahash_cast(tfm);
 	struct ahash_alg *alg = crypto_ahash_alg(hash);
 
 	hash->setkey = ahash_nosetkey;
-	hash->export = ahash_no_export;
-	hash->import = ahash_no_import;
 
 	if (tfm->__crt_alg->cra_type != &crypto_ahash_type)
 		return crypto_init_shash_ops_async(tfm);
@@ -473,16 +462,14 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
 	hash->final = alg->final;
 	hash->finup = alg->finup ?: ahash_def_finup;
 	hash->digest = alg->digest;
+	hash->export = alg->export;
+	hash->import = alg->import;
 
 	if (alg->setkey) {
 		hash->setkey = alg->setkey;
 		if (!(alg->halg.base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY))
 			crypto_ahash_set_flags(hash, CRYPTO_TFM_NEED_KEY);
 	}
-	if (alg->export)
-		hash->export = alg->export;
-	if (alg->import)
-		hash->import = alg->import;
 
 	return 0;
 }

+ 0 - 8
crypto/algapi.c

@@ -543,9 +543,6 @@ int crypto_register_instance(struct crypto_template *tmpl,
 	inst->alg.cra_module = tmpl->module;
 	inst->alg.cra_flags |= CRYPTO_ALG_INSTANCE;
 
-	if (unlikely(!crypto_mod_get(&inst->alg)))
-		return -EAGAIN;
-
 	down_write(&crypto_alg_sem);
 
 	larval = __crypto_register_alg(&inst->alg);
@@ -563,14 +560,9 @@ unlock:
 		goto err;
 
 	crypto_wait_for_test(larval);
-
-	/* Remove instance if test failed */
-	if (!(inst->alg.cra_flags & CRYPTO_ALG_TESTED))
-		crypto_unregister_instance(inst);
 	err = 0;
 
 err:
-	crypto_mod_put(&inst->alg);
 	return err;
 }
 EXPORT_SYMBOL_GPL(crypto_register_instance);

+ 16 - 18
crypto/api.c

@@ -193,17 +193,24 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
 	return alg;
 }
 
-struct crypto_alg *crypto_alg_lookup(const char *name, u32 type, u32 mask)
+static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
+					    u32 mask)
 {
 	struct crypto_alg *alg;
+	u32 test = 0;
+
+	if (!((type | mask) & CRYPTO_ALG_TESTED))
+		test |= CRYPTO_ALG_TESTED;
 
 	down_read(&crypto_alg_sem);
-	alg = __crypto_alg_lookup(name, type, mask);
+	alg = __crypto_alg_lookup(name, type | test, mask | test);
+	if (!alg && test)
+		alg = __crypto_alg_lookup(name, type, mask) ?
+		      ERR_PTR(-ELIBBAD) : NULL;
 	up_read(&crypto_alg_sem);
 
 	return alg;
 }
-EXPORT_SYMBOL_GPL(crypto_alg_lookup);
 
 static struct crypto_alg *crypto_larval_lookup(const char *name, u32 type,
 					       u32 mask)
@@ -227,10 +234,12 @@ static struct crypto_alg *crypto_larval_lookup(const char *name, u32 type,
 		alg = crypto_alg_lookup(name, type, mask);
 	}
 
-	if (alg)
-		return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg;
+	if (!IS_ERR_OR_NULL(alg) && crypto_is_larval(alg))
+		alg = crypto_larval_wait(alg);
+	else if (!alg)
+		alg = crypto_larval_add(name, type, mask);
 
-	return crypto_larval_add(name, type, mask);
+	return alg;
 }
 
 int crypto_probing_notify(unsigned long val, void *v)
@@ -253,11 +262,6 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
 	struct crypto_alg *larval;
 	int ok;
 
-	if (!((type | mask) & CRYPTO_ALG_TESTED)) {
-		type |= CRYPTO_ALG_TESTED;
-		mask |= CRYPTO_ALG_TESTED;
-	}
-
 	/*
 	 * If the internal flag is set for a cipher, require a caller to
 	 * to invoke the cipher with the internal flag to use that cipher.
@@ -485,20 +489,14 @@ struct crypto_alg *crypto_find_alg(const char *alg_name,
 				   const struct crypto_type *frontend,
 				   u32 type, u32 mask)
 {
-	struct crypto_alg *(*lookup)(const char *name, u32 type, u32 mask) =
-		crypto_alg_mod_lookup;
-
 	if (frontend) {
 		type &= frontend->maskclear;
 		mask &= frontend->maskclear;
 		type |= frontend->type;
 		mask |= frontend->maskset;
-
-		if (frontend->lookup)
-			lookup = frontend->lookup;
 	}
 
-	return lookup(alg_name, type, mask);
+	return crypto_alg_mod_lookup(alg_name, type, mask);
 }
 EXPORT_SYMBOL_GPL(crypto_find_alg);
 

+ 353 - 0
crypto/cfb.c

@@ -0,0 +1,353 @@
+//SPDX-License-Identifier: GPL-2.0
+/*
+ * CFB: Cipher FeedBack mode
+ *
+ * Copyright (c) 2018 James.Bottomley@HansenPartnership.com
+ *
+ * CFB is a stream cipher mode which is layered on to a block
+ * encryption scheme.  It works very much like a one time pad where
+ * the pad is generated initially from the encrypted IV and then
+ * subsequently from the encrypted previous block of ciphertext.  The
+ * pad is XOR'd into the plain text to get the final ciphertext.
+ *
+ * The scheme of CFB is best described by wikipedia:
+ *
+ * https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#CFB
+ *
+ * Note that since the pad for both encryption and decryption is
+ * generated by an encryption operation, CFB never uses the block
+ * decryption function.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+struct crypto_cfb_ctx {
+	struct crypto_cipher *child;
+};
+
+static unsigned int crypto_cfb_bsize(struct crypto_skcipher *tfm)
+{
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_cipher *child = ctx->child;
+
+	return crypto_cipher_blocksize(child);
+}
+
+static void crypto_cfb_encrypt_one(struct crypto_skcipher *tfm,
+					  const u8 *src, u8 *dst)
+{
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_cipher_encrypt_one(ctx->child, dst, src);
+}
+
+/* final encrypt and decrypt is the same */
+static void crypto_cfb_final(struct skcipher_walk *walk,
+			     struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	const unsigned long alignmask = crypto_skcipher_alignmask(tfm);
+	u8 tmp[bsize + alignmask];
+	u8 *stream = PTR_ALIGN(tmp + 0, alignmask + 1);
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+	unsigned int nbytes = walk->nbytes;
+
+	crypto_cfb_encrypt_one(tfm, iv, stream);
+	crypto_xor_cpy(dst, stream, src, nbytes);
+}
+
+static int crypto_cfb_encrypt_segment(struct skcipher_walk *walk,
+				      struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		crypto_cfb_encrypt_one(tfm, iv, dst);
+		crypto_xor(dst, src, bsize);
+		memcpy(iv, dst, bsize);
+
+		src += bsize;
+		dst += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	return nbytes;
+}
+
+static int crypto_cfb_encrypt_inplace(struct skcipher_walk *walk,
+				      struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *iv = walk->iv;
+	u8 tmp[bsize];
+
+	do {
+		crypto_cfb_encrypt_one(tfm, iv, tmp);
+		crypto_xor(src, tmp, bsize);
+		iv = src;
+
+		src += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cfb_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct skcipher_walk walk;
+	unsigned int bsize = crypto_cfb_bsize(tfm);
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while (walk.nbytes >= bsize) {
+		if (walk.src.virt.addr == walk.dst.virt.addr)
+			err = crypto_cfb_encrypt_inplace(&walk, tfm);
+		else
+			err = crypto_cfb_encrypt_segment(&walk, tfm);
+		err = skcipher_walk_done(&walk, err);
+	}
+
+	if (walk.nbytes) {
+		crypto_cfb_final(&walk, tfm);
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	return err;
+}
+
+static int crypto_cfb_decrypt_segment(struct skcipher_walk *walk,
+				      struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		crypto_cfb_encrypt_one(tfm, iv, dst);
+		crypto_xor(dst, iv, bsize);
+		iv = src;
+
+		src += bsize;
+		dst += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cfb_decrypt_inplace(struct skcipher_walk *walk,
+				      struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *iv = walk->iv;
+	u8 tmp[bsize];
+
+	do {
+		crypto_cfb_encrypt_one(tfm, iv, tmp);
+		memcpy(iv, src, bsize);
+		crypto_xor(src, tmp, bsize);
+		src += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cfb_decrypt_blocks(struct skcipher_walk *walk,
+				     struct crypto_skcipher *tfm)
+{
+	if (walk->src.virt.addr == walk->dst.virt.addr)
+		return crypto_cfb_decrypt_inplace(walk, tfm);
+	else
+		return crypto_cfb_decrypt_segment(walk, tfm);
+}
+
+static int crypto_cfb_setkey(struct crypto_skcipher *parent, const u8 *key,
+			     unsigned int keylen)
+{
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(parent);
+	struct crypto_cipher *child = ctx->child;
+	int err;
+
+	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+				       CRYPTO_TFM_REQ_MASK);
+	err = crypto_cipher_setkey(child, key, keylen);
+	crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) &
+					  CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static int crypto_cfb_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct skcipher_walk walk;
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while (walk.nbytes >= bsize) {
+		err = crypto_cfb_decrypt_blocks(&walk, tfm);
+		err = skcipher_walk_done(&walk, err);
+	}
+
+	if (walk.nbytes) {
+		crypto_cfb_final(&walk, tfm);
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	return err;
+}
+
+static int crypto_cfb_init_tfm(struct crypto_skcipher *tfm)
+{
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_cipher *cipher;
+
+	cipher = crypto_spawn_cipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	ctx->child = cipher;
+	return 0;
+}
+
+static void crypto_cfb_exit_tfm(struct crypto_skcipher *tfm)
+{
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_cipher(ctx->child);
+}
+
+static void crypto_cfb_free(struct skcipher_instance *inst)
+{
+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
+	kfree(inst);
+}
+
+static int crypto_cfb_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct skcipher_instance *inst;
+	struct crypto_attr_type *algt;
+	struct crypto_spawn *spawn;
+	struct crypto_alg *alg;
+	u32 mask;
+	int err;
+
+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER);
+	if (err)
+		return err;
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	algt = crypto_get_attr_type(tb);
+	err = PTR_ERR(algt);
+	if (IS_ERR(algt))
+		goto err_free_inst;
+
+	mask = CRYPTO_ALG_TYPE_MASK |
+		crypto_requires_off(algt->type, algt->mask,
+				    CRYPTO_ALG_NEED_FALLBACK);
+
+	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, mask);
+	err = PTR_ERR(alg);
+	if (IS_ERR(alg))
+		goto err_free_inst;
+
+	spawn = skcipher_instance_ctx(inst);
+	err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
+				CRYPTO_ALG_TYPE_MASK);
+	crypto_mod_put(alg);
+	if (err)
+		goto err_free_inst;
+
+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "cfb", alg);
+	if (err)
+		goto err_drop_spawn;
+
+	inst->alg.base.cra_priority = alg->cra_priority;
+	/* we're a stream cipher independend of the crypto cra_blocksize */
+	inst->alg.base.cra_blocksize = 1;
+	inst->alg.base.cra_alignmask = alg->cra_alignmask;
+
+	inst->alg.ivsize = alg->cra_blocksize;
+	inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
+	inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
+
+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_cfb_ctx);
+
+	inst->alg.init = crypto_cfb_init_tfm;
+	inst->alg.exit = crypto_cfb_exit_tfm;
+
+	inst->alg.setkey = crypto_cfb_setkey;
+	inst->alg.encrypt = crypto_cfb_encrypt;
+	inst->alg.decrypt = crypto_cfb_decrypt;
+
+	inst->free = crypto_cfb_free;
+
+	err = skcipher_register_instance(tmpl, inst);
+	if (err)
+		goto err_drop_spawn;
+
+out:
+	return err;
+
+err_drop_spawn:
+	crypto_drop_spawn(spawn);
+err_free_inst:
+	kfree(inst);
+	goto out;
+}
+
+static struct crypto_template crypto_cfb_tmpl = {
+	.name = "cfb",
+	.create = crypto_cfb_create,
+	.module = THIS_MODULE,
+};
+
+static int __init crypto_cfb_module_init(void)
+{
+	return crypto_register_template(&crypto_cfb_tmpl);
+}
+
+static void __exit crypto_cfb_module_exit(void)
+{
+	crypto_unregister_template(&crypto_cfb_tmpl);
+}
+
+module_init(crypto_cfb_module_init);
+module_exit(crypto_cfb_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CFB block cipher algorithm");
+MODULE_ALIAS_CRYPTO("cfb");

+ 164 - 137
crypto/crypto_engine.c

@@ -15,12 +15,49 @@
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <crypto/engine.h>
-#include <crypto/internal/hash.h>
 #include <uapi/linux/sched/types.h>
 #include "internal.h"
 
 #define CRYPTO_ENGINE_MAX_QLEN 10
 
+/**
+ * crypto_finalize_request - finalize one request if the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+static void crypto_finalize_request(struct crypto_engine *engine,
+			     struct crypto_async_request *req, int err)
+{
+	unsigned long flags;
+	bool finalize_cur_req = false;
+	int ret;
+	struct crypto_engine_ctx *enginectx;
+
+	spin_lock_irqsave(&engine->queue_lock, flags);
+	if (engine->cur_req == req)
+		finalize_cur_req = true;
+	spin_unlock_irqrestore(&engine->queue_lock, flags);
+
+	if (finalize_cur_req) {
+		enginectx = crypto_tfm_ctx(req->tfm);
+		if (engine->cur_req_prepared &&
+		    enginectx->op.unprepare_request) {
+			ret = enginectx->op.unprepare_request(engine, req);
+			if (ret)
+				dev_err(engine->dev, "failed to unprepare request\n");
+		}
+		spin_lock_irqsave(&engine->queue_lock, flags);
+		engine->cur_req = NULL;
+		engine->cur_req_prepared = false;
+		spin_unlock_irqrestore(&engine->queue_lock, flags);
+	}
+
+	req->complete(req, err);
+
+	kthread_queue_work(engine->kworker, &engine->pump_requests);
+}
+
 /**
  * crypto_pump_requests - dequeue one request from engine queue to process
  * @engine: the hardware engine
@@ -34,11 +71,10 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 				 bool in_kthread)
 {
 	struct crypto_async_request *async_req, *backlog;
-	struct ahash_request *hreq;
-	struct ablkcipher_request *breq;
 	unsigned long flags;
 	bool was_busy = false;
-	int ret, rtype;
+	int ret;
+	struct crypto_engine_ctx *enginectx;
 
 	spin_lock_irqsave(&engine->queue_lock, flags);
 
@@ -94,7 +130,6 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 
-	rtype = crypto_tfm_alg_type(engine->cur_req->tfm);
 	/* Until here we get the request need to be encrypted successfully */
 	if (!was_busy && engine->prepare_crypt_hardware) {
 		ret = engine->prepare_crypt_hardware(engine);
@@ -104,57 +139,31 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 		}
 	}
 
-	switch (rtype) {
-	case CRYPTO_ALG_TYPE_AHASH:
-		hreq = ahash_request_cast(engine->cur_req);
-		if (engine->prepare_hash_request) {
-			ret = engine->prepare_hash_request(engine, hreq);
-			if (ret) {
-				dev_err(engine->dev, "failed to prepare request: %d\n",
-					ret);
-				goto req_err;
-			}
-			engine->cur_req_prepared = true;
-		}
-		ret = engine->hash_one_request(engine, hreq);
-		if (ret) {
-			dev_err(engine->dev, "failed to hash one request from queue\n");
-			goto req_err;
-		}
-		return;
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		breq = ablkcipher_request_cast(engine->cur_req);
-		if (engine->prepare_cipher_request) {
-			ret = engine->prepare_cipher_request(engine, breq);
-			if (ret) {
-				dev_err(engine->dev, "failed to prepare request: %d\n",
-					ret);
-				goto req_err;
-			}
-			engine->cur_req_prepared = true;
-		}
-		ret = engine->cipher_one_request(engine, breq);
+	enginectx = crypto_tfm_ctx(async_req->tfm);
+
+	if (enginectx->op.prepare_request) {
+		ret = enginectx->op.prepare_request(engine, async_req);
 		if (ret) {
-			dev_err(engine->dev, "failed to cipher one request from queue\n");
+			dev_err(engine->dev, "failed to prepare request: %d\n",
+				ret);
 			goto req_err;
 		}
-		return;
-	default:
-		dev_err(engine->dev, "failed to prepare request of unknown type\n");
-		return;
+		engine->cur_req_prepared = true;
+	}
+	if (!enginectx->op.do_one_request) {
+		dev_err(engine->dev, "failed to do request\n");
+		ret = -EINVAL;
+		goto req_err;
 	}
+	ret = enginectx->op.do_one_request(engine, async_req);
+	if (ret) {
+		dev_err(engine->dev, "Failed to do one request from queue: %d\n", ret);
+		goto req_err;
+	}
+	return;
 
 req_err:
-	switch (rtype) {
-	case CRYPTO_ALG_TYPE_AHASH:
-		hreq = ahash_request_cast(engine->cur_req);
-		crypto_finalize_hash_request(engine, hreq, ret);
-		break;
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		breq = ablkcipher_request_cast(engine->cur_req);
-		crypto_finalize_cipher_request(engine, breq, ret);
-		break;
-	}
+	crypto_finalize_request(engine, async_req, ret);
 	return;
 
 out:
@@ -170,13 +179,12 @@ static void crypto_pump_work(struct kthread_work *work)
 }
 
 /**
- * crypto_transfer_cipher_request - transfer the new request into the
- * enginequeue
+ * crypto_transfer_request - transfer the new request into the engine queue
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  */
-int crypto_transfer_cipher_request(struct crypto_engine *engine,
-				   struct ablkcipher_request *req,
+static int crypto_transfer_request(struct crypto_engine *engine,
+				   struct crypto_async_request *req,
 				   bool need_pump)
 {
 	unsigned long flags;
@@ -189,7 +197,7 @@ int crypto_transfer_cipher_request(struct crypto_engine *engine,
 		return -ESHUTDOWN;
 	}
 
-	ret = ablkcipher_enqueue_request(&engine->queue, req);
+	ret = crypto_enqueue_request(&engine->queue, req);
 
 	if (!engine->busy && need_pump)
 		kthread_queue_work(engine->kworker, &engine->pump_requests);
@@ -197,102 +205,131 @@ int crypto_transfer_cipher_request(struct crypto_engine *engine,
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request);
 
 /**
- * crypto_transfer_cipher_request_to_engine - transfer one request to list
+ * crypto_transfer_request_to_engine - transfer one request to list
  * into the engine queue
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  */
-int crypto_transfer_cipher_request_to_engine(struct crypto_engine *engine,
-					     struct ablkcipher_request *req)
+static int crypto_transfer_request_to_engine(struct crypto_engine *engine,
+					     struct crypto_async_request *req)
 {
-	return crypto_transfer_cipher_request(engine, req, true);
+	return crypto_transfer_request(engine, req, true);
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request_to_engine);
 
 /**
- * crypto_transfer_hash_request - transfer the new request into the
- * enginequeue
+ * crypto_transfer_ablkcipher_request_to_engine - transfer one ablkcipher_request
+ * to list into the engine queue
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
+ * TODO: Remove this function when skcipher conversion is finished
  */
-int crypto_transfer_hash_request(struct crypto_engine *engine,
-				 struct ahash_request *req, bool need_pump)
+int crypto_transfer_ablkcipher_request_to_engine(struct crypto_engine *engine,
+						 struct ablkcipher_request *req)
 {
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&engine->queue_lock, flags);
-
-	if (!engine->running) {
-		spin_unlock_irqrestore(&engine->queue_lock, flags);
-		return -ESHUTDOWN;
-	}
-
-	ret = ahash_enqueue_request(&engine->queue, req);
+	return crypto_transfer_request_to_engine(engine, &req->base);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_ablkcipher_request_to_engine);
 
-	if (!engine->busy && need_pump)
-		kthread_queue_work(engine->kworker, &engine->pump_requests);
+/**
+ * crypto_transfer_aead_request_to_engine - transfer one aead_request
+ * to list into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_aead_request_to_engine(struct crypto_engine *engine,
+					   struct aead_request *req)
+{
+	return crypto_transfer_request_to_engine(engine, &req->base);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_aead_request_to_engine);
 
-	spin_unlock_irqrestore(&engine->queue_lock, flags);
-	return ret;
+/**
+ * crypto_transfer_akcipher_request_to_engine - transfer one akcipher_request
+ * to list into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_akcipher_request_to_engine(struct crypto_engine *engine,
+					       struct akcipher_request *req)
+{
+	return crypto_transfer_request_to_engine(engine, &req->base);
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_hash_request);
+EXPORT_SYMBOL_GPL(crypto_transfer_akcipher_request_to_engine);
 
 /**
- * crypto_transfer_hash_request_to_engine - transfer one request to list
- * into the engine queue
+ * crypto_transfer_hash_request_to_engine - transfer one ahash_request
+ * to list into the engine queue
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  */
 int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine,
 					   struct ahash_request *req)
 {
-	return crypto_transfer_hash_request(engine, req, true);
+	return crypto_transfer_request_to_engine(engine, &req->base);
 }
 EXPORT_SYMBOL_GPL(crypto_transfer_hash_request_to_engine);
 
 /**
- * crypto_finalize_cipher_request - finalize one request if the request is done
+ * crypto_transfer_skcipher_request_to_engine - transfer one skcipher_request
+ * to list into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_skcipher_request_to_engine(struct crypto_engine *engine,
+					       struct skcipher_request *req)
+{
+	return crypto_transfer_request_to_engine(engine, &req->base);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_skcipher_request_to_engine);
+
+/**
+ * crypto_finalize_ablkcipher_request - finalize one ablkcipher_request if
+ * the request is done
  * @engine: the hardware engine
  * @req: the request need to be finalized
  * @err: error number
+ * TODO: Remove this function when skcipher conversion is finished
  */
-void crypto_finalize_cipher_request(struct crypto_engine *engine,
-				    struct ablkcipher_request *req, int err)
+void crypto_finalize_ablkcipher_request(struct crypto_engine *engine,
+					struct ablkcipher_request *req, int err)
 {
-	unsigned long flags;
-	bool finalize_cur_req = false;
-	int ret;
-
-	spin_lock_irqsave(&engine->queue_lock, flags);
-	if (engine->cur_req == &req->base)
-		finalize_cur_req = true;
-	spin_unlock_irqrestore(&engine->queue_lock, flags);
-
-	if (finalize_cur_req) {
-		if (engine->cur_req_prepared &&
-		    engine->unprepare_cipher_request) {
-			ret = engine->unprepare_cipher_request(engine, req);
-			if (ret)
-				dev_err(engine->dev, "failed to unprepare request\n");
-		}
-		spin_lock_irqsave(&engine->queue_lock, flags);
-		engine->cur_req = NULL;
-		engine->cur_req_prepared = false;
-		spin_unlock_irqrestore(&engine->queue_lock, flags);
-	}
+	return crypto_finalize_request(engine, &req->base, err);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_ablkcipher_request);
 
-	req->base.complete(&req->base, err);
+/**
+ * crypto_finalize_aead_request - finalize one aead_request if
+ * the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_aead_request(struct crypto_engine *engine,
+				  struct aead_request *req, int err)
+{
+	return crypto_finalize_request(engine, &req->base, err);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_aead_request);
 
-	kthread_queue_work(engine->kworker, &engine->pump_requests);
+/**
+ * crypto_finalize_akcipher_request - finalize one akcipher_request if
+ * the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_akcipher_request(struct crypto_engine *engine,
+				      struct akcipher_request *req, int err)
+{
+	return crypto_finalize_request(engine, &req->base, err);
 }
-EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
+EXPORT_SYMBOL_GPL(crypto_finalize_akcipher_request);
 
 /**
- * crypto_finalize_hash_request - finalize one request if the request is done
+ * crypto_finalize_hash_request - finalize one ahash_request if
+ * the request is done
  * @engine: the hardware engine
  * @req: the request need to be finalized
  * @err: error number
@@ -300,34 +337,24 @@ EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
 void crypto_finalize_hash_request(struct crypto_engine *engine,
 				  struct ahash_request *req, int err)
 {
-	unsigned long flags;
-	bool finalize_cur_req = false;
-	int ret;
-
-	spin_lock_irqsave(&engine->queue_lock, flags);
-	if (engine->cur_req == &req->base)
-		finalize_cur_req = true;
-	spin_unlock_irqrestore(&engine->queue_lock, flags);
-
-	if (finalize_cur_req) {
-		if (engine->cur_req_prepared &&
-		    engine->unprepare_hash_request) {
-			ret = engine->unprepare_hash_request(engine, req);
-			if (ret)
-				dev_err(engine->dev, "failed to unprepare request\n");
-		}
-		spin_lock_irqsave(&engine->queue_lock, flags);
-		engine->cur_req = NULL;
-		engine->cur_req_prepared = false;
-		spin_unlock_irqrestore(&engine->queue_lock, flags);
-	}
-
-	req->base.complete(&req->base, err);
-
-	kthread_queue_work(engine->kworker, &engine->pump_requests);
+	return crypto_finalize_request(engine, &req->base, err);
 }
 EXPORT_SYMBOL_GPL(crypto_finalize_hash_request);
 
+/**
+ * crypto_finalize_skcipher_request - finalize one skcipher_request if
+ * the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_skcipher_request(struct crypto_engine *engine,
+				      struct skcipher_request *req, int err)
+{
+	return crypto_finalize_request(engine, &req->base, err);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_skcipher_request);
+
 /**
  * crypto_engine_start - start the hardware engine
  * @engine: the hardware engine need to be started

+ 1 - 1
crypto/crypto_user.c

@@ -271,7 +271,7 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
 		return -ENOENT;
 
 	err = -ENOMEM;
-	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!skb)
 		goto drop_alg;
 

+ 17 - 6
crypto/ecc.c

@@ -1025,9 +1025,7 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 {
 	int ret = 0;
 	struct ecc_point *product, *pk;
-	u64 priv[ndigits];
-	u64 rand_z[ndigits];
-	unsigned int nbytes;
+	u64 *priv, *rand_z;
 	const struct ecc_curve *curve = ecc_get_curve(curve_id);
 
 	if (!private_key || !public_key || !curve) {
@@ -1035,14 +1033,22 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 		goto out;
 	}
 
-	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	priv = kmalloc_array(ndigits, sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
-	get_random_bytes(rand_z, nbytes);
+	rand_z = kmalloc_array(ndigits, sizeof(*rand_z), GFP_KERNEL);
+	if (!rand_z) {
+		ret = -ENOMEM;
+		goto kfree_out;
+	}
 
 	pk = ecc_alloc_point(ndigits);
 	if (!pk) {
 		ret = -ENOMEM;
-		goto out;
+		goto kfree_out;
 	}
 
 	product = ecc_alloc_point(ndigits);
@@ -1051,6 +1057,8 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 		goto err_alloc_product;
 	}
 
+	get_random_bytes(rand_z, ndigits << ECC_DIGITS_TO_BYTES_SHIFT);
+
 	ecc_swap_digits(public_key, pk->x, ndigits);
 	ecc_swap_digits(&public_key[ndigits], pk->y, ndigits);
 	ecc_swap_digits(private_key, priv, ndigits);
@@ -1065,6 +1073,9 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 	ecc_free_point(product);
 err_alloc_product:
 	ecc_free_point(pk);
+kfree_out:
+	kzfree(priv);
+	kzfree(rand_z);
 out:
 	return ret;
 }

+ 17 - 6
crypto/ecdh.c

@@ -89,12 +89,19 @@ static int ecdh_compute_value(struct kpp_request *req)
 		if (!shared_secret)
 			goto free_pubkey;
 
-		copied = sg_copy_to_buffer(req->src, 1, public_key,
-					   public_key_sz);
-		if (copied != public_key_sz) {
-			ret = -EINVAL;
+		/* from here on it's invalid parameters */
+		ret = -EINVAL;
+
+		/* must have exactly two points to be on the curve */
+		if (public_key_sz != req->src_len)
+			goto free_all;
+
+		copied = sg_copy_to_buffer(req->src,
+					   sg_nents_for_len(req->src,
+							    public_key_sz),
+					   public_key, public_key_sz);
+		if (copied != public_key_sz)
 			goto free_all;
-		}
 
 		ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits,
 						ctx->private_key, public_key,
@@ -111,7 +118,11 @@ static int ecdh_compute_value(struct kpp_request *req)
 	if (ret < 0)
 		goto free_all;
 
-	copied = sg_copy_from_buffer(req->dst, 1, buf, nbytes);
+	/* might want less than we've got */
+	nbytes = min_t(size_t, nbytes, req->dst_len);
+	copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+								nbytes),
+				     buf, nbytes);
 	if (copied != nbytes)
 		ret = -EINVAL;
 

+ 0 - 1
crypto/internal.h

@@ -67,7 +67,6 @@ static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg)
 }
 
 struct crypto_alg *crypto_mod_get(struct crypto_alg *alg);
-struct crypto_alg *crypto_alg_lookup(const char *name, u32 type, u32 mask);
 struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask);
 
 int crypto_init_cipher_ops(struct crypto_tfm *tfm);

+ 40 - 114
crypto/lrw.c

@@ -28,13 +28,31 @@
 
 #include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
-#include <crypto/lrw.h>
 
 #define LRW_BUFFER_SIZE 128u
 
+#define LRW_BLOCK_SIZE 16
+
 struct priv {
 	struct crypto_skcipher *child;
-	struct lrw_table_ctx table;
+
+	/*
+	 * optimizes multiplying a random (non incrementing, as at the
+	 * start of a new sector) value with key2, we could also have
+	 * used 4k optimization tables or no optimization at all. In the
+	 * latter case we would have to store key2 here
+	 */
+	struct gf128mul_64k *table;
+
+	/*
+	 * stores:
+	 *  key2*{ 0,0,...0,0,0,0,1 }, key2*{ 0,0,...0,0,0,1,1 },
+	 *  key2*{ 0,0,...0,0,1,1,1 }, key2*{ 0,0,...0,1,1,1,1 }
+	 *  key2*{ 0,0,...1,1,1,1,1 }, etc
+	 * needed for optimized multiplication of incrementing values
+	 * with key2
+	 */
+	be128 mulinc[128];
 };
 
 struct rctx {
@@ -65,11 +83,25 @@ static inline void setbit128_bbe(void *b, int bit)
 			), b);
 }
 
-int lrw_init_table(struct lrw_table_ctx *ctx, const u8 *tweak)
+static int setkey(struct crypto_skcipher *parent, const u8 *key,
+		  unsigned int keylen)
 {
+	struct priv *ctx = crypto_skcipher_ctx(parent);
+	struct crypto_skcipher *child = ctx->child;
+	int err, bsize = LRW_BLOCK_SIZE;
+	const u8 *tweak = key + keylen - bsize;
 	be128 tmp = { 0 };
 	int i;
 
+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+					 CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(child, key, keylen - bsize);
+	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+					  CRYPTO_TFM_RES_MASK);
+	if (err)
+		return err;
+
 	if (ctx->table)
 		gf128mul_free_64k(ctx->table);
 
@@ -87,34 +119,6 @@ int lrw_init_table(struct lrw_table_ctx *ctx, const u8 *tweak)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(lrw_init_table);
-
-void lrw_free_table(struct lrw_table_ctx *ctx)
-{
-	if (ctx->table)
-		gf128mul_free_64k(ctx->table);
-}
-EXPORT_SYMBOL_GPL(lrw_free_table);
-
-static int setkey(struct crypto_skcipher *parent, const u8 *key,
-		  unsigned int keylen)
-{
-	struct priv *ctx = crypto_skcipher_ctx(parent);
-	struct crypto_skcipher *child = ctx->child;
-	int err, bsize = LRW_BLOCK_SIZE;
-	const u8 *tweak = key + keylen - bsize;
-
-	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
-					 CRYPTO_TFM_REQ_MASK);
-	err = crypto_skcipher_setkey(child, key, keylen - bsize);
-	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
-					  CRYPTO_TFM_RES_MASK);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->table, tweak);
-}
 
 static inline void inc(be128 *iv)
 {
@@ -238,7 +242,7 @@ static int pre_crypt(struct skcipher_request *req)
 			/* T <- I*Key2, using the optimization
 			 * discussed in the specification */
 			be128_xor(&rctx->t, &rctx->t,
-				  &ctx->table.mulinc[get_index128(iv)]);
+				  &ctx->mulinc[get_index128(iv)]);
 			inc(iv);
 		} while ((avail -= bs) >= bs);
 
@@ -301,7 +305,7 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
 	memcpy(&rctx->t, req->iv, sizeof(rctx->t));
 
 	/* T <- I*Key2 */
-	gf128mul_64k_bbe(&rctx->t, ctx->table.table);
+	gf128mul_64k_bbe(&rctx->t, ctx->table);
 
 	return 0;
 }
@@ -313,7 +317,7 @@ static void exit_crypt(struct skcipher_request *req)
 	rctx->left = 0;
 
 	if (rctx->ext)
-		kfree(rctx->ext);
+		kzfree(rctx->ext);
 }
 
 static int do_encrypt(struct skcipher_request *req, int err)
@@ -416,85 +420,6 @@ static int decrypt(struct skcipher_request *req)
 	return do_decrypt(req, init_crypt(req, decrypt_done));
 }
 
-int lrw_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
-	      struct scatterlist *ssrc, unsigned int nbytes,
-	      struct lrw_crypt_req *req)
-{
-	const unsigned int bsize = LRW_BLOCK_SIZE;
-	const unsigned int max_blks = req->tbuflen / bsize;
-	struct lrw_table_ctx *ctx = req->table_ctx;
-	struct blkcipher_walk walk;
-	unsigned int nblocks;
-	be128 *iv, *src, *dst, *t;
-	be128 *t_buf = req->tbuf;
-	int err, i;
-
-	BUG_ON(max_blks < 1);
-
-	blkcipher_walk_init(&walk, sdst, ssrc, nbytes);
-
-	err = blkcipher_walk_virt(desc, &walk);
-	nbytes = walk.nbytes;
-	if (!nbytes)
-		return err;
-
-	nblocks = min(walk.nbytes / bsize, max_blks);
-	src = (be128 *)walk.src.virt.addr;
-	dst = (be128 *)walk.dst.virt.addr;
-
-	/* calculate first value of T */
-	iv = (be128 *)walk.iv;
-	t_buf[0] = *iv;
-
-	/* T <- I*Key2 */
-	gf128mul_64k_bbe(&t_buf[0], ctx->table);
-
-	i = 0;
-	goto first;
-
-	for (;;) {
-		do {
-			for (i = 0; i < nblocks; i++) {
-				/* T <- I*Key2, using the optimization
-				 * discussed in the specification */
-				be128_xor(&t_buf[i], t,
-						&ctx->mulinc[get_index128(iv)]);
-				inc(iv);
-first:
-				t = &t_buf[i];
-
-				/* PP <- T xor P */
-				be128_xor(dst + i, t, src + i);
-			}
-
-			/* CC <- E(Key2,PP) */
-			req->crypt_fn(req->crypt_ctx, (u8 *)dst,
-				      nblocks * bsize);
-
-			/* C <- T xor CC */
-			for (i = 0; i < nblocks; i++)
-				be128_xor(dst + i, dst + i, &t_buf[i]);
-
-			src += nblocks;
-			dst += nblocks;
-			nbytes -= nblocks * bsize;
-			nblocks = min(nbytes / bsize, max_blks);
-		} while (nblocks > 0);
-
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-		nbytes = walk.nbytes;
-		if (!nbytes)
-			break;
-
-		nblocks = min(nbytes / bsize, max_blks);
-		src = (be128 *)walk.src.virt.addr;
-		dst = (be128 *)walk.dst.virt.addr;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(lrw_crypt);
-
 static int init_tfm(struct crypto_skcipher *tfm)
 {
 	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
@@ -518,7 +443,8 @@ static void exit_tfm(struct crypto_skcipher *tfm)
 {
 	struct priv *ctx = crypto_skcipher_ctx(tfm);
 
-	lrw_free_table(&ctx->table);
+	if (ctx->table)
+		gf128mul_free_64k(ctx->table);
 	crypto_free_skcipher(ctx->child);
 }
 

+ 4 - 30
crypto/mcryptd.c

@@ -367,7 +367,7 @@ static void mcryptd_hash_update(struct crypto_async_request *req_async, int err)
 		goto out;
 
 	rctx->out = req->result;
-	err = ahash_mcryptd_update(&rctx->areq);
+	err = crypto_ahash_update(&rctx->areq);
 	if (err) {
 		req->base.complete = rctx->complete;
 		goto out;
@@ -394,7 +394,7 @@ static void mcryptd_hash_final(struct crypto_async_request *req_async, int err)
 		goto out;
 
 	rctx->out = req->result;
-	err = ahash_mcryptd_final(&rctx->areq);
+	err = crypto_ahash_final(&rctx->areq);
 	if (err) {
 		req->base.complete = rctx->complete;
 		goto out;
@@ -420,7 +420,7 @@ static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err)
 	if (unlikely(err == -EINPROGRESS))
 		goto out;
 	rctx->out = req->result;
-	err = ahash_mcryptd_finup(&rctx->areq);
+	err = crypto_ahash_finup(&rctx->areq);
 
 	if (err) {
 		req->base.complete = rctx->complete;
@@ -455,7 +455,7 @@ static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err)
 						rctx->complete, req_async);
 
 	rctx->out = req->result;
-	err = ahash_mcryptd_digest(desc);
+	err = crypto_ahash_init(desc) ?: crypto_ahash_finup(desc);
 
 out:
 	local_bh_disable();
@@ -612,32 +612,6 @@ struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
 }
 EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
 
-int ahash_mcryptd_digest(struct ahash_request *desc)
-{
-	return crypto_ahash_init(desc) ?: ahash_mcryptd_finup(desc);
-}
-
-int ahash_mcryptd_update(struct ahash_request *desc)
-{
-	/* alignment is to be done by multi-buffer crypto algorithm if needed */
-
-	return crypto_ahash_update(desc);
-}
-
-int ahash_mcryptd_finup(struct ahash_request *desc)
-{
-	/* alignment is to be done by multi-buffer crypto algorithm if needed */
-
-	return crypto_ahash_finup(desc);
-}
-
-int ahash_mcryptd_final(struct ahash_request *desc)
-{
-	/* alignment is to be done by multi-buffer crypto algorithm if needed */
-
-	return crypto_ahash_final(desc);
-}
-
 struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
 {
 	struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);

+ 0 - 17
crypto/md4.c

@@ -64,23 +64,6 @@ static inline u32 H(u32 x, u32 y, u32 z)
 #define ROUND2(a,b,c,d,k,s) (a = lshift(a + G(b,c,d) + k + (u32)0x5A827999,s))
 #define ROUND3(a,b,c,d,k,s) (a = lshift(a + H(b,c,d) + k + (u32)0x6ED9EBA1,s))
 
-/* XXX: this stuff can be optimized */
-static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		__le32_to_cpus(buf);
-		buf++;
-	}
-}
-
-static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		__cpu_to_le32s(buf);
-		buf++;
-	}
-}
-
 static void md4_transform(u32 *hash, u32 const *in)
 {
 	u32 a, b, c, d;

+ 0 - 17
crypto/md5.c

@@ -32,23 +32,6 @@ const u8 md5_zero_message_hash[MD5_DIGEST_SIZE] = {
 };
 EXPORT_SYMBOL_GPL(md5_zero_message_hash);
 
-/* XXX: this stuff can be optimized */
-static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		__le32_to_cpus(buf);
-		buf++;
-	}
-}
-
-static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		__cpu_to_le32s(buf);
-		buf++;
-	}
-}
-
 #define F1(x, y, z)	(z ^ (x & (y ^ z)))
 #define F2(x, y, z)	F1(z, x, y)
 #define F3(x, y, z)	(x ^ y ^ z)

+ 1 - 1
crypto/rsa-pkcs1pad.c

@@ -192,7 +192,7 @@ static int pkcs1pad_encrypt_sign_complete(struct akcipher_request *req, int err)
 	if (likely(!pad_len))
 		goto out;
 
-	out_buf = kzalloc(ctx->key_size, GFP_ATOMIC);
+	out_buf = kzalloc(ctx->key_size, GFP_KERNEL);
 	err = -ENOMEM;
 	if (!out_buf)
 		goto out;

+ 50 - 0
crypto/simd.c

@@ -221,4 +221,54 @@ void simd_skcipher_free(struct simd_skcipher_alg *salg)
 }
 EXPORT_SYMBOL_GPL(simd_skcipher_free);
 
+int simd_register_skciphers_compat(struct skcipher_alg *algs, int count,
+				   struct simd_skcipher_alg **simd_algs)
+{
+	int err;
+	int i;
+	const char *algname;
+	const char *drvname;
+	const char *basename;
+	struct simd_skcipher_alg *simd;
+
+	err = crypto_register_skciphers(algs, count);
+	if (err)
+		return err;
+
+	for (i = 0; i < count; i++) {
+		WARN_ON(strncmp(algs[i].base.cra_name, "__", 2));
+		WARN_ON(strncmp(algs[i].base.cra_driver_name, "__", 2));
+		algname = algs[i].base.cra_name + 2;
+		drvname = algs[i].base.cra_driver_name + 2;
+		basename = algs[i].base.cra_driver_name;
+		simd = simd_skcipher_create_compat(algname, drvname, basename);
+		err = PTR_ERR(simd);
+		if (IS_ERR(simd))
+			goto err_unregister;
+		simd_algs[i] = simd;
+	}
+	return 0;
+
+err_unregister:
+	simd_unregister_skciphers(algs, count, simd_algs);
+	return err;
+}
+EXPORT_SYMBOL_GPL(simd_register_skciphers_compat);
+
+void simd_unregister_skciphers(struct skcipher_alg *algs, int count,
+			       struct simd_skcipher_alg **simd_algs)
+{
+	int i;
+
+	crypto_unregister_skciphers(algs, count);
+
+	for (i = 0; i < count; i++) {
+		if (simd_algs[i]) {
+			simd_skcipher_free(simd_algs[i]);
+			simd_algs[i] = NULL;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(simd_unregister_skciphers);
+
 MODULE_LICENSE("GPL");

+ 244 - 0
crypto/sm4_generic.c

@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * SM4 Cipher Algorithm.
+ *
+ * Copyright (C) 2018 ARM Limited or its affiliates.
+ * All rights reserved.
+ */
+
+#include <crypto/sm4.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/crypto.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+static const u32 fk[4] = {
+	0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+};
+
+static const u8 sbox[256] = {
+	0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
+	0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
+	0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
+	0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
+	0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
+	0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
+	0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
+	0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
+	0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
+	0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
+	0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
+	0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
+	0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
+	0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
+	0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
+	0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
+	0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
+	0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
+	0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
+	0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
+	0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
+	0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
+	0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
+	0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
+	0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
+	0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
+	0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
+	0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
+	0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
+	0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
+	0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
+	0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
+};
+
+static const u32 ck[] = {
+	0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
+	0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
+	0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
+	0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
+	0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
+	0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
+	0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
+	0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
+};
+
+static u32 sm4_t_non_lin_sub(u32 x)
+{
+	int i;
+	u8 *b = (u8 *)&x;
+
+	for (i = 0; i < 4; ++i)
+		b[i] = sbox[b[i]];
+
+	return x;
+}
+
+static u32 sm4_key_lin_sub(u32 x)
+{
+	return x ^ rol32(x, 13) ^ rol32(x, 23);
+
+}
+
+static u32 sm4_enc_lin_sub(u32 x)
+{
+	return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24);
+}
+
+static u32 sm4_key_sub(u32 x)
+{
+	return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static u32 sm4_enc_sub(u32 x)
+{
+	return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static u32 sm4_round(const u32 *x, const u32 rk)
+{
+	return x[0] ^ sm4_enc_sub(x[1] ^ x[2] ^ x[3] ^ rk);
+}
+
+
+/**
+ * crypto_sm4_expand_key - Expands the SM4 key as described in GB/T 32907-2016
+ * @ctx:	The location where the computed key will be stored.
+ * @in_key:	The supplied key.
+ * @key_len:	The length of the supplied key.
+ *
+ * Returns 0 on success. The function fails only if an invalid key size (or
+ * pointer) is supplied.
+ */
+int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
+			  unsigned int key_len)
+{
+	u32 rk[4], t;
+	const u32 *key = (u32 *)in_key;
+	int i;
+
+	if (key_len != SM4_KEY_SIZE)
+		return -EINVAL;
+
+	for (i = 0; i < 4; ++i)
+		rk[i] = get_unaligned_be32(&key[i]) ^ fk[i];
+
+	for (i = 0; i < 32; ++i) {
+		t = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i]);
+		ctx->rkey_enc[i] = t;
+		rk[0] = rk[1];
+		rk[1] = rk[2];
+		rk[2] = rk[3];
+		rk[3] = t;
+	}
+
+	for (i = 0; i < 32; ++i)
+		ctx->rkey_dec[i] = ctx->rkey_enc[31 - i];
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_sm4_expand_key);
+
+/**
+ * crypto_sm4_set_key - Set the AES key.
+ * @tfm:	The %crypto_tfm that is used in the context.
+ * @in_key:	The input key.
+ * @key_len:	The size of the key.
+ *
+ * Returns 0 on success, on failure the %CRYPTO_TFM_RES_BAD_KEY_LEN flag in tfm
+ * is set. The function uses crypto_sm4_expand_key() to expand the key.
+ * &crypto_sm4_ctx _must_ be the private data embedded in @tfm which is
+ * retrieved with crypto_tfm_ctx().
+ */
+int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len)
+{
+	struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	int ret;
+
+	ret = crypto_sm4_expand_key(ctx, in_key, key_len);
+	if (!ret)
+		return 0;
+
+	*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(crypto_sm4_set_key);
+
+static void sm4_do_crypt(const u32 *rk, u32 *out, const u32 *in)
+{
+	u32 x[4], i, t;
+
+	for (i = 0; i < 4; ++i)
+		x[i] = get_unaligned_be32(&in[i]);
+
+	for (i = 0; i < 32; ++i) {
+		t = sm4_round(x, rk[i]);
+		x[0] = x[1];
+		x[1] = x[2];
+		x[2] = x[3];
+		x[3] = t;
+	}
+
+	for (i = 0; i < 4; ++i)
+		put_unaligned_be32(x[3 - i], &out[i]);
+}
+
+/* encrypt a block of text */
+
+static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in);
+}
+
+/* decrypt a block of text */
+
+static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in);
+}
+
+static struct crypto_alg sm4_alg = {
+	.cra_name		=	"sm4",
+	.cra_driver_name	=	"sm4-generic",
+	.cra_priority		=	100,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	SM4_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypto_sm4_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	SM4_KEY_SIZE,
+			.cia_max_keysize	=	SM4_KEY_SIZE,
+			.cia_setkey		=	crypto_sm4_set_key,
+			.cia_encrypt		=	sm4_encrypt,
+			.cia_decrypt		=	sm4_decrypt
+		}
+	}
+};
+
+static int __init sm4_init(void)
+{
+	return crypto_register_alg(&sm4_alg);
+}
+
+static void __exit sm4_fini(void)
+{
+	crypto_unregister_alg(&sm4_alg);
+}
+
+module_init(sm4_init);
+module_exit(sm4_fini);
+
+MODULE_DESCRIPTION("SM4 Cipher Algorithm");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("sm4");
+MODULE_ALIAS_CRYPTO("sm4-generic");

+ 307 - 0
crypto/speck.c

@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Speck: a lightweight block cipher
+ *
+ * Copyright (c) 2018 Google, Inc
+ *
+ * Speck has 10 variants, including 5 block sizes.  For now we only implement
+ * the variants Speck128/128, Speck128/192, Speck128/256, Speck64/96, and
+ * Speck64/128.   Speck${B}/${K} denotes the variant with a block size of B bits
+ * and a key size of K bits.  The Speck128 variants are believed to be the most
+ * secure variants, and they use the same block size and key sizes as AES.  The
+ * Speck64 variants are less secure, but on 32-bit processors are usually
+ * faster.  The remaining variants (Speck32, Speck48, and Speck96) are even less
+ * secure and/or not as well suited for implementation on either 32-bit or
+ * 64-bit processors, so are omitted.
+ *
+ * Reference: "The Simon and Speck Families of Lightweight Block Ciphers"
+ * https://eprint.iacr.org/2013/404.pdf
+ *
+ * In a correspondence, the Speck designers have also clarified that the words
+ * should be interpreted in little-endian format, and the words should be
+ * ordered such that the first word of each block is 'y' rather than 'x', and
+ * the first key word (rather than the last) becomes the first round key.
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/speck.h>
+#include <linux/bitops.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+/* Speck128 */
+
+static __always_inline void speck128_round(u64 *x, u64 *y, u64 k)
+{
+	*x = ror64(*x, 8);
+	*x += *y;
+	*x ^= k;
+	*y = rol64(*y, 3);
+	*y ^= *x;
+}
+
+static __always_inline void speck128_unround(u64 *x, u64 *y, u64 k)
+{
+	*y ^= *x;
+	*y = ror64(*y, 3);
+	*x ^= k;
+	*x -= *y;
+	*x = rol64(*x, 8);
+}
+
+void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx,
+			     u8 *out, const u8 *in)
+{
+	u64 y = get_unaligned_le64(in);
+	u64 x = get_unaligned_le64(in + 8);
+	int i;
+
+	for (i = 0; i < ctx->nrounds; i++)
+		speck128_round(&x, &y, ctx->round_keys[i]);
+
+	put_unaligned_le64(y, out);
+	put_unaligned_le64(x, out + 8);
+}
+EXPORT_SYMBOL_GPL(crypto_speck128_encrypt);
+
+static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	crypto_speck128_encrypt(crypto_tfm_ctx(tfm), out, in);
+}
+
+void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx,
+			     u8 *out, const u8 *in)
+{
+	u64 y = get_unaligned_le64(in);
+	u64 x = get_unaligned_le64(in + 8);
+	int i;
+
+	for (i = ctx->nrounds - 1; i >= 0; i--)
+		speck128_unround(&x, &y, ctx->round_keys[i]);
+
+	put_unaligned_le64(y, out);
+	put_unaligned_le64(x, out + 8);
+}
+EXPORT_SYMBOL_GPL(crypto_speck128_decrypt);
+
+static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	crypto_speck128_decrypt(crypto_tfm_ctx(tfm), out, in);
+}
+
+int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key,
+			   unsigned int keylen)
+{
+	u64 l[3];
+	u64 k;
+	int i;
+
+	switch (keylen) {
+	case SPECK128_128_KEY_SIZE:
+		k = get_unaligned_le64(key);
+		l[0] = get_unaligned_le64(key + 8);
+		ctx->nrounds = SPECK128_128_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck128_round(&l[0], &k, i);
+		}
+		break;
+	case SPECK128_192_KEY_SIZE:
+		k = get_unaligned_le64(key);
+		l[0] = get_unaligned_le64(key + 8);
+		l[1] = get_unaligned_le64(key + 16);
+		ctx->nrounds = SPECK128_192_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck128_round(&l[i % 2], &k, i);
+		}
+		break;
+	case SPECK128_256_KEY_SIZE:
+		k = get_unaligned_le64(key);
+		l[0] = get_unaligned_le64(key + 8);
+		l[1] = get_unaligned_le64(key + 16);
+		l[2] = get_unaligned_le64(key + 24);
+		ctx->nrounds = SPECK128_256_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck128_round(&l[i % 3], &k, i);
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_speck128_setkey);
+
+static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key,
+			   unsigned int keylen)
+{
+	return crypto_speck128_setkey(crypto_tfm_ctx(tfm), key, keylen);
+}
+
+/* Speck64 */
+
+static __always_inline void speck64_round(u32 *x, u32 *y, u32 k)
+{
+	*x = ror32(*x, 8);
+	*x += *y;
+	*x ^= k;
+	*y = rol32(*y, 3);
+	*y ^= *x;
+}
+
+static __always_inline void speck64_unround(u32 *x, u32 *y, u32 k)
+{
+	*y ^= *x;
+	*y = ror32(*y, 3);
+	*x ^= k;
+	*x -= *y;
+	*x = rol32(*x, 8);
+}
+
+void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx,
+			    u8 *out, const u8 *in)
+{
+	u32 y = get_unaligned_le32(in);
+	u32 x = get_unaligned_le32(in + 4);
+	int i;
+
+	for (i = 0; i < ctx->nrounds; i++)
+		speck64_round(&x, &y, ctx->round_keys[i]);
+
+	put_unaligned_le32(y, out);
+	put_unaligned_le32(x, out + 4);
+}
+EXPORT_SYMBOL_GPL(crypto_speck64_encrypt);
+
+static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	crypto_speck64_encrypt(crypto_tfm_ctx(tfm), out, in);
+}
+
+void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx,
+			    u8 *out, const u8 *in)
+{
+	u32 y = get_unaligned_le32(in);
+	u32 x = get_unaligned_le32(in + 4);
+	int i;
+
+	for (i = ctx->nrounds - 1; i >= 0; i--)
+		speck64_unround(&x, &y, ctx->round_keys[i]);
+
+	put_unaligned_le32(y, out);
+	put_unaligned_le32(x, out + 4);
+}
+EXPORT_SYMBOL_GPL(crypto_speck64_decrypt);
+
+static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	crypto_speck64_decrypt(crypto_tfm_ctx(tfm), out, in);
+}
+
+int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key,
+			  unsigned int keylen)
+{
+	u32 l[3];
+	u32 k;
+	int i;
+
+	switch (keylen) {
+	case SPECK64_96_KEY_SIZE:
+		k = get_unaligned_le32(key);
+		l[0] = get_unaligned_le32(key + 4);
+		l[1] = get_unaligned_le32(key + 8);
+		ctx->nrounds = SPECK64_96_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck64_round(&l[i % 2], &k, i);
+		}
+		break;
+	case SPECK64_128_KEY_SIZE:
+		k = get_unaligned_le32(key);
+		l[0] = get_unaligned_le32(key + 4);
+		l[1] = get_unaligned_le32(key + 8);
+		l[2] = get_unaligned_le32(key + 12);
+		ctx->nrounds = SPECK64_128_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck64_round(&l[i % 3], &k, i);
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_speck64_setkey);
+
+static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key,
+			  unsigned int keylen)
+{
+	return crypto_speck64_setkey(crypto_tfm_ctx(tfm), key, keylen);
+}
+
+/* Algorithm definitions */
+
+static struct crypto_alg speck_algs[] = {
+	{
+		.cra_name		= "speck128",
+		.cra_driver_name	= "speck128-generic",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+		.cra_blocksize		= SPECK128_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct speck128_tfm_ctx),
+		.cra_module		= THIS_MODULE,
+		.cra_u			= {
+			.cipher = {
+				.cia_min_keysize	= SPECK128_128_KEY_SIZE,
+				.cia_max_keysize	= SPECK128_256_KEY_SIZE,
+				.cia_setkey		= speck128_setkey,
+				.cia_encrypt		= speck128_encrypt,
+				.cia_decrypt		= speck128_decrypt
+			}
+		}
+	}, {
+		.cra_name		= "speck64",
+		.cra_driver_name	= "speck64-generic",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+		.cra_blocksize		= SPECK64_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct speck64_tfm_ctx),
+		.cra_module		= THIS_MODULE,
+		.cra_u			= {
+			.cipher = {
+				.cia_min_keysize	= SPECK64_96_KEY_SIZE,
+				.cia_max_keysize	= SPECK64_128_KEY_SIZE,
+				.cia_setkey		= speck64_setkey,
+				.cia_encrypt		= speck64_encrypt,
+				.cia_decrypt		= speck64_decrypt
+			}
+		}
+	}
+};
+
+static int __init speck_module_init(void)
+{
+	return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+static void __exit speck_module_exit(void)
+{
+	crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+module_init(speck_module_init);
+module_exit(speck_module_exit);
+
+MODULE_DESCRIPTION("Speck block cipher (generic)");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("speck128");
+MODULE_ALIAS_CRYPTO("speck128-generic");
+MODULE_ALIAS_CRYPTO("speck64");
+MODULE_ALIAS_CRYPTO("speck64-generic");

+ 3 - 0
crypto/tcrypt.c

@@ -1983,6 +1983,9 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 	case 190:
 		ret += tcrypt_test("authenc(hmac(sha512),cbc(des3_ede))");
 		break;
+	case 191:
+		ret += tcrypt_test("ecb(sm4)");
+		break;
 	case 200:
 		test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
 				speed_template_16_24_32);

+ 45 - 0
crypto/testmgr.c

@@ -3000,6 +3000,33 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.dec = __VECS(serpent_dec_tv_template)
 			}
 		}
+	}, {
+		.alg = "ecb(sm4)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(sm4_enc_tv_template),
+				.dec = __VECS(sm4_dec_tv_template)
+			}
+		}
+	}, {
+		.alg = "ecb(speck128)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(speck128_enc_tv_template),
+				.dec = __VECS(speck128_dec_tv_template)
+			}
+		}
+	}, {
+		.alg = "ecb(speck64)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(speck64_enc_tv_template),
+				.dec = __VECS(speck64_dec_tv_template)
+			}
+		}
 	}, {
 		.alg = "ecb(tea)",
 		.test = alg_test_skcipher,
@@ -3557,6 +3584,24 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.dec = __VECS(serpent_xts_dec_tv_template)
 			}
 		}
+	}, {
+		.alg = "xts(speck128)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(speck128_xts_enc_tv_template),
+				.dec = __VECS(speck128_xts_dec_tv_template)
+			}
+		}
+	}, {
+		.alg = "xts(speck64)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(speck64_xts_enc_tv_template),
+				.dec = __VECS(speck64_xts_dec_tv_template)
+			}
+		}
 	}, {
 		.alg = "xts(twofish)",
 		.test = alg_test_skcipher,

+ 3321 - 1445
crypto/testmgr.h

@@ -548,7 +548,7 @@ static const struct akcipher_testvec rsa_tv_template[] = {
 static const struct akcipher_testvec pkcs1pad_rsa_tv_template[] = {
 	{
 	.key =
-	"\x30\x82\x03\x1f\x02\x01\x10\x02\x82\x01\x01\x00\xd7\x1e\x77\x82"
+	"\x30\x82\x03\x1f\x02\x01\x00\x02\x82\x01\x01\x00\xd7\x1e\x77\x82"
 	"\x8c\x92\x31\xe7\x69\x02\xa2\xd5\x5c\x78\xde\xa2\x0c\x8f\xfe\x28"
 	"\x59\x31\xdf\x40\x9c\x60\x61\x06\xb9\x2f\x62\x40\x80\x76\xcb\x67"
 	"\x4a\xb5\x59\x56\x69\x17\x07\xfa\xf9\x4c\xbd\x6c\x37\x7a\x46\x7d"
@@ -597,8 +597,8 @@ static const struct akcipher_testvec pkcs1pad_rsa_tv_template[] = {
 	"\xfe\xf8\x27\x1b\xd6\x55\x60\x5e\x48\xb7\x6d\x9a\xa8\x37\xf9\x7a"
 	"\xde\x1b\xcd\x5d\x1a\x30\xd4\xe9\x9e\x5b\x3c\x15\xf8\x9c\x1f\xda"
 	"\xd1\x86\x48\x55\xce\x83\xee\x8e\x51\xc7\xde\x32\x12\x47\x7d\x46"
-	"\xb8\x35\xdf\x41\x02\x01\x30\x02\x01\x30\x02\x01\x30\x02\x01\x30"
-	"\x02\x01\x30",
+	"\xb8\x35\xdf\x41\x02\x01\x00\x02\x01\x00\x02\x01\x00\x02\x01\x00"
+	"\x02\x01\x00",
 	.key_len = 804,
 	/*
 	 * m is SHA256 hash of following message:
@@ -2044,263 +2044,522 @@ static const struct hash_testvec crct10dif_tv_template[] = {
 		.digest		= (u8 *)(u16 []){ 0x44c6 },
 		.np		= 4,
 		.tap		= { 1, 255, 57, 6 },
-	}
-};
-
-/* Example vectors below taken from
- * http://www.oscca.gov.cn/UpFile/20101222141857786.pdf
- *
- * The rest taken from
- * https://github.com/adamws/oscca-sm3
- */
-static const struct hash_testvec sm3_tv_template[] = {
-	{
-		.plaintext = "",
-		.psize = 0,
-		.digest = (u8 *)(u8 []) {
-			0x1A, 0xB2, 0x1D, 0x83, 0x55, 0xCF, 0xA1, 0x7F,
-			0x8e, 0x61, 0x19, 0x48, 0x31, 0xE8, 0x1A, 0x8F,
-			0x22, 0xBE, 0xC8, 0xC7, 0x28, 0xFE, 0xFB, 0x74,
-			0x7E, 0xD0, 0x35, 0xEB, 0x50, 0x82, 0xAA, 0x2B }
-	}, {
-		.plaintext = "a",
-		.psize = 1,
-		.digest = (u8 *)(u8 []) {
-			0x62, 0x34, 0x76, 0xAC, 0x18, 0xF6, 0x5A, 0x29,
-			0x09, 0xE4, 0x3C, 0x7F, 0xEC, 0x61, 0xB4, 0x9C,
-			0x7E, 0x76, 0x4A, 0x91, 0xA1, 0x8C, 0xCB, 0x82,
-			0xF1, 0x91, 0x7A, 0x29, 0xC8, 0x6C, 0x5E, 0x88 }
-	}, {
-		/* A.1. Example 1 */
-		.plaintext = "abc",
-		.psize = 3,
-		.digest = (u8 *)(u8 []) {
-			0x66, 0xC7, 0xF0, 0xF4, 0x62, 0xEE, 0xED, 0xD9,
-			0xD1, 0xF2, 0xD4, 0x6B, 0xDC, 0x10, 0xE4, 0xE2,
-			0x41, 0x67, 0xC4, 0x87, 0x5C, 0xF2, 0xF7, 0xA2,
-			0x29, 0x7D, 0xA0, 0x2B, 0x8F, 0x4B, 0xA8, 0xE0 }
-	}, {
-		.plaintext = "abcdefghijklmnopqrstuvwxyz",
-		.psize = 26,
-		.digest = (u8 *)(u8 []) {
-			0xB8, 0x0F, 0xE9, 0x7A, 0x4D, 0xA2, 0x4A, 0xFC,
-			0x27, 0x75, 0x64, 0xF6, 0x6A, 0x35, 0x9E, 0xF4,
-			0x40, 0x46, 0x2A, 0xD2, 0x8D, 0xCC, 0x6D, 0x63,
-			0xAD, 0xB2, 0x4D, 0x5C, 0x20, 0xA6, 0x15, 0x95 }
-	}, {
-		/* A.1. Example 2 */
-		.plaintext = "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdab"
-			     "cdabcdabcdabcdabcd",
-		.psize = 64,
-		.digest = (u8 *)(u8 []) {
-			0xDE, 0xBE, 0x9F, 0xF9, 0x22, 0x75, 0xB8, 0xA1,
-			0x38, 0x60, 0x48, 0x89, 0xC1, 0x8E, 0x5A, 0x4D,
-			0x6F, 0xDB, 0x70, 0xE5, 0x38, 0x7E, 0x57, 0x65,
-			0x29, 0x3D, 0xCB, 0xA3, 0x9C, 0x0C, 0x57, 0x32 }
-	}, {
-		.plaintext = "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
-			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
-			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
-			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
-			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
-			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
-			     "abcdabcdabcdabcdabcdabcdabcdabcd",
-		.psize = 256,
-		.digest = (u8 *)(u8 []) {
-			0xB9, 0x65, 0x76, 0x4C, 0x8B, 0xEB, 0xB0, 0x91,
-			0xC7, 0x60, 0x2B, 0x74, 0xAF, 0xD3, 0x4E, 0xEF,
-			0xB5, 0x31, 0xDC, 0xCB, 0x4E, 0x00, 0x76, 0xD9,
-			0xB7, 0xCD, 0x81, 0x31, 0x99, 0xB4, 0x59, 0x71 }
-	}
-};
-
-/*
- * SHA1 test vectors  from from FIPS PUB 180-1
- * Long vector from CAVS 5.0
- */
-static const struct hash_testvec sha1_tv_template[] = {
-	{
-		.plaintext = "",
-		.psize	= 0,
-		.digest	= "\xda\x39\xa3\xee\x5e\x6b\x4b\x0d\x32\x55"
-			  "\xbf\xef\x95\x60\x18\x90\xaf\xd8\x07\x09",
-	}, {
-		.plaintext = "abc",
-		.psize	= 3,
-		.digest	= "\xa9\x99\x3e\x36\x47\x06\x81\x6a\xba\x3e"
-			  "\x25\x71\x78\x50\xc2\x6c\x9c\xd0\xd8\x9d",
-	}, {
-		.plaintext = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
-		.psize	= 56,
-		.digest	= "\x84\x98\x3e\x44\x1c\x3b\xd2\x6e\xba\xae"
-			  "\x4a\xa1\xf9\x51\x29\xe5\xe5\x46\x70\xf1",
-		.np	= 2,
-		.tap	= { 28, 28 }
-	}, {
-		.plaintext = "\xec\x29\x56\x12\x44\xed\xe7\x06"
-			     "\xb6\xeb\x30\xa1\xc3\x71\xd7\x44"
-			     "\x50\xa1\x05\xc3\xf9\x73\x5f\x7f"
-			     "\xa9\xfe\x38\xcf\x67\xf3\x04\xa5"
-			     "\x73\x6a\x10\x6e\x92\xe1\x71\x39"
-			     "\xa6\x81\x3b\x1c\x81\xa4\xf3\xd3"
-			     "\xfb\x95\x46\xab\x42\x96\xfa\x9f"
-			     "\x72\x28\x26\xc0\x66\x86\x9e\xda"
-			     "\xcd\x73\xb2\x54\x80\x35\x18\x58"
-			     "\x13\xe2\x26\x34\xa9\xda\x44\x00"
-			     "\x0d\x95\xa2\x81\xff\x9f\x26\x4e"
-			     "\xcc\xe0\xa9\x31\x22\x21\x62\xd0"
-			     "\x21\xcc\xa2\x8d\xb5\xf3\xc2\xaa"
-			     "\x24\x94\x5a\xb1\xe3\x1c\xb4\x13"
-			     "\xae\x29\x81\x0f\xd7\x94\xca\xd5"
-			     "\xdf\xaf\x29\xec\x43\xcb\x38\xd1"
-			     "\x98\xfe\x4a\xe1\xda\x23\x59\x78"
-			     "\x02\x21\x40\x5b\xd6\x71\x2a\x53"
-			     "\x05\xda\x4b\x1b\x73\x7f\xce\x7c"
-			     "\xd2\x1c\x0e\xb7\x72\x8d\x08\x23"
-			     "\x5a\x90\x11",
-		.psize	= 163,
-		.digest	= "\x97\x01\x11\xc4\xe7\x7b\xcc\x88\xcc\x20"
-			  "\x45\x9c\x02\xb6\x9b\x4a\xa8\xf5\x82\x17",
-		.np	= 4,
-		.tap	= { 63, 64, 31, 5 }
-	}, {
-		.plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-",
-		.psize	= 64,
-		.digest = "\xc8\x71\xf6\x9a\x63\xcc\xa9\x84\x84\x82"
-			  "\x64\xe7\x79\x95\x5d\xd7\x19\x41\x7c\x91",
 	}, {
-		.plaintext = "\x08\x9f\x13\xaa\x41\xd8\x4c\xe3"
-			     "\x7a\x11\x85\x1c\xb3\x27\xbe\x55"
-			     "\xec\x60\xf7\x8e\x02\x99\x30\xc7"
-			     "\x3b\xd2\x69\x00\x74\x0b\xa2\x16"
-			     "\xad\x44\xdb\x4f\xe6\x7d\x14\x88"
-			     "\x1f\xb6\x2a\xc1\x58\xef\x63\xfa"
-			     "\x91\x05\x9c\x33\xca\x3e\xd5\x6c"
-			     "\x03\x77\x0e\xa5\x19\xb0\x47\xde"
-			     "\x52\xe9\x80\x17\x8b\x22\xb9\x2d"
-			     "\xc4\x5b\xf2\x66\xfd\x94\x08\x9f"
-			     "\x36\xcd\x41\xd8\x6f\x06\x7a\x11"
-			     "\xa8\x1c\xb3\x4a\xe1\x55\xec\x83"
-			     "\x1a\x8e\x25\xbc\x30\xc7\x5e\xf5"
-			     "\x69\x00\x97\x0b\xa2\x39\xd0\x44"
-			     "\xdb\x72\x09\x7d\x14\xab\x1f\xb6"
-			     "\x4d\xe4\x58\xef\x86\x1d\x91\x28"
-			     "\xbf\x33\xca\x61\xf8\x6c\x03\x9a"
-			     "\x0e\xa5\x3c\xd3\x47\xde\x75\x0c"
-			     "\x80\x17\xae\x22\xb9\x50\xe7\x5b"
-			     "\xf2\x89\x20\x94\x2b\xc2\x36\xcd"
-			     "\x64\xfb\x6f\x06\x9d\x11\xa8\x3f"
-			     "\xd6\x4a\xe1\x78\x0f\x83\x1a\xb1"
-			     "\x25\xbc\x53\xea\x5e\xf5\x8c\x00"
-			     "\x97\x2e\xc5\x39\xd0\x67\xfe\x72"
-			     "\x09\xa0\x14\xab\x42\xd9\x4d\xe4"
-			     "\x7b\x12\x86\x1d\xb4\x28\xbf\x56"
-			     "\xed\x61\xf8\x8f\x03\x9a\x31\xc8"
-			     "\x3c\xd3\x6a\x01\x75\x0c\xa3\x17"
-			     "\xae\x45\xdc\x50\xe7\x7e\x15\x89"
-			     "\x20\xb7\x2b\xc2\x59\xf0\x64\xfb"
-			     "\x92\x06\x9d\x34\xcb\x3f\xd6\x6d"
-			     "\x04\x78\x0f\xa6\x1a\xb1\x48\xdf"
-			     "\x53\xea\x81\x18\x8c\x23\xba\x2e"
-			     "\xc5\x5c\xf3\x67\xfe\x95\x09\xa0"
-			     "\x37\xce\x42\xd9\x70\x07\x7b\x12"
-			     "\xa9\x1d\xb4\x4b\xe2\x56\xed\x84"
-			     "\x1b\x8f\x26\xbd\x31\xc8\x5f\xf6"
-			     "\x6a\x01\x98\x0c\xa3\x3a\xd1\x45"
-			     "\xdc\x73\x0a\x7e\x15\xac\x20\xb7"
-			     "\x4e\xe5\x59\xf0\x87\x1e\x92\x29"
-			     "\xc0\x34\xcb\x62\xf9\x6d\x04\x9b"
-			     "\x0f\xa6\x3d\xd4\x48\xdf\x76\x0d"
-			     "\x81\x18\xaf\x23\xba\x51\xe8\x5c"
-			     "\xf3\x8a\x21\x95\x2c\xc3\x37\xce"
-			     "\x65\xfc\x70\x07\x9e\x12\xa9\x40"
-			     "\xd7\x4b\xe2\x79\x10\x84\x1b\xb2"
-			     "\x26\xbd\x54\xeb\x5f\xf6\x8d\x01"
-			     "\x98\x2f\xc6\x3a\xd1\x68\xff\x73"
-			     "\x0a\xa1\x15\xac\x43\xda\x4e\xe5"
-			     "\x7c\x13\x87\x1e\xb5\x29\xc0\x57"
-			     "\xee\x62\xf9\x90\x04\x9b\x32\xc9"
-			     "\x3d\xd4\x6b\x02\x76\x0d\xa4\x18"
-			     "\xaf\x46\xdd\x51\xe8\x7f\x16\x8a"
-			     "\x21\xb8\x2c\xc3\x5a\xf1\x65\xfc"
-			     "\x93\x07\x9e\x35\xcc\x40\xd7\x6e"
-			     "\x05\x79\x10\xa7\x1b\xb2\x49\xe0"
-			     "\x54\xeb\x82\x19\x8d\x24\xbb\x2f"
-			     "\xc6\x5d\xf4\x68\xff\x96\x0a\xa1"
-			     "\x38\xcf\x43\xda\x71\x08\x7c\x13"
-			     "\xaa\x1e\xb5\x4c\xe3\x57\xee\x85"
-			     "\x1c\x90\x27\xbe\x32\xc9\x60\xf7"
-			     "\x6b\x02\x99\x0d\xa4\x3b\xd2\x46"
-			     "\xdd\x74\x0b\x7f\x16\xad\x21\xb8"
-			     "\x4f\xe6\x5a\xf1\x88\x1f\x93\x2a"
-			     "\xc1\x35\xcc\x63\xfa\x6e\x05\x9c"
-			     "\x10\xa7\x3e\xd5\x49\xe0\x77\x0e"
-			     "\x82\x19\xb0\x24\xbb\x52\xe9\x5d"
-			     "\xf4\x8b\x22\x96\x2d\xc4\x38\xcf"
-			     "\x66\xfd\x71\x08\x9f\x13\xaa\x41"
-			     "\xd8\x4c\xe3\x7a\x11\x85\x1c\xb3"
-			     "\x27\xbe\x55\xec\x60\xf7\x8e\x02"
-			     "\x99\x30\xc7\x3b\xd2\x69\x00\x74"
-			     "\x0b\xa2\x16\xad\x44\xdb\x4f\xe6"
-			     "\x7d\x14\x88\x1f\xb6\x2a\xc1\x58"
-			     "\xef\x63\xfa\x91\x05\x9c\x33\xca"
-			     "\x3e\xd5\x6c\x03\x77\x0e\xa5\x19"
-			     "\xb0\x47\xde\x52\xe9\x80\x17\x8b"
-			     "\x22\xb9\x2d\xc4\x5b\xf2\x66\xfd"
-			     "\x94\x08\x9f\x36\xcd\x41\xd8\x6f"
-			     "\x06\x7a\x11\xa8\x1c\xb3\x4a\xe1"
-			     "\x55\xec\x83\x1a\x8e\x25\xbc\x30"
-			     "\xc7\x5e\xf5\x69\x00\x97\x0b\xa2"
-			     "\x39\xd0\x44\xdb\x72\x09\x7d\x14"
-			     "\xab\x1f\xb6\x4d\xe4\x58\xef\x86"
-			     "\x1d\x91\x28\xbf\x33\xca\x61\xf8"
-			     "\x6c\x03\x9a\x0e\xa5\x3c\xd3\x47"
-			     "\xde\x75\x0c\x80\x17\xae\x22\xb9"
-			     "\x50\xe7\x5b\xf2\x89\x20\x94\x2b"
-			     "\xc2\x36\xcd\x64\xfb\x6f\x06\x9d"
-			     "\x11\xa8\x3f\xd6\x4a\xe1\x78\x0f"
-			     "\x83\x1a\xb1\x25\xbc\x53\xea\x5e"
-			     "\xf5\x8c\x00\x97\x2e\xc5\x39\xd0"
-			     "\x67\xfe\x72\x09\xa0\x14\xab\x42"
-			     "\xd9\x4d\xe4\x7b\x12\x86\x1d\xb4"
-			     "\x28\xbf\x56\xed\x61\xf8\x8f\x03"
-			     "\x9a\x31\xc8\x3c\xd3\x6a\x01\x75"
-			     "\x0c\xa3\x17\xae\x45\xdc\x50\xe7"
-			     "\x7e\x15\x89\x20\xb7\x2b\xc2\x59"
-			     "\xf0\x64\xfb\x92\x06\x9d\x34\xcb"
-			     "\x3f\xd6\x6d\x04\x78\x0f\xa6\x1a"
-			     "\xb1\x48\xdf\x53\xea\x81\x18\x8c"
-			     "\x23\xba\x2e\xc5\x5c\xf3\x67\xfe"
-			     "\x95\x09\xa0\x37\xce\x42\xd9\x70"
-			     "\x07\x7b\x12\xa9\x1d\xb4\x4b\xe2"
-			     "\x56\xed\x84\x1b\x8f\x26\xbd\x31"
-			     "\xc8\x5f\xf6\x6a\x01\x98\x0c\xa3"
-			     "\x3a\xd1\x45\xdc\x73\x0a\x7e\x15"
-			     "\xac\x20\xb7\x4e\xe5\x59\xf0\x87"
-			     "\x1e\x92\x29\xc0\x34\xcb\x62\xf9"
-			     "\x6d\x04\x9b\x0f\xa6\x3d\xd4\x48"
-			     "\xdf\x76\x0d\x81\x18\xaf\x23\xba"
-			     "\x51\xe8\x5c\xf3\x8a\x21\x95\x2c"
-			     "\xc3\x37\xce\x65\xfc\x70\x07\x9e"
-			     "\x12\xa9\x40\xd7\x4b\xe2\x79\x10"
-			     "\x84\x1b\xb2\x26\xbd\x54\xeb\x5f"
-			     "\xf6\x8d\x01\x98\x2f\xc6\x3a\xd1"
-			     "\x68\xff\x73\x0a\xa1\x15\xac\x43"
-			     "\xda\x4e\xe5\x7c\x13\x87\x1e\xb5"
-			     "\x29\xc0\x57\xee\x62\xf9\x90\x04"
-			     "\x9b\x32\xc9\x3d\xd4\x6b\x02\x76"
-			     "\x0d\xa4\x18\xaf\x46\xdd\x51\xe8"
-			     "\x7f\x16\x8a\x21\xb8\x2c\xc3\x5a"
-			     "\xf1\x65\xfc\x93\x07\x9e\x35\xcc"
-			     "\x40\xd7\x6e\x05\x79\x10\xa7\x1b"
-			     "\xb2\x49\xe0\x54\xeb\x82\x19\x8d"
-			     "\x24\xbb\x2f\xc6\x5d\xf4\x68\xff"
-			     "\x96\x0a\xa1\x38\xcf\x43\xda\x71"
-			     "\x08\x7c\x13\xaa\x1e\xb5\x4c",
-		.psize     = 1023,
-		.digest    = "\xb8\xe3\x54\xed\xc5\xfc\xef\xa4"
-			     "\x55\x73\x4a\x81\x99\xe4\x47\x2a"
-			     "\x30\xd6\xc9\x85",
+		.plaintext =	"\x6e\x05\x79\x10\xa7\x1b\xb2\x49"
+				"\xe0\x54\xeb\x82\x19\x8d\x24\xbb"
+				"\x2f\xc6\x5d\xf4\x68\xff\x96\x0a"
+				"\xa1\x38\xcf\x43\xda\x71\x08\x7c"
+				"\x13\xaa\x1e\xb5\x4c\xe3\x57\xee"
+				"\x85\x1c\x90\x27\xbe\x32\xc9\x60"
+				"\xf7\x6b\x02\x99\x0d\xa4\x3b\xd2"
+				"\x46\xdd\x74\x0b\x7f\x16\xad\x21"
+				"\xb8\x4f\xe6\x5a\xf1\x88\x1f\x93"
+				"\x2a\xc1\x35\xcc\x63\xfa\x6e\x05"
+				"\x9c\x10\xa7\x3e\xd5\x49\xe0\x77"
+				"\x0e\x82\x19\xb0\x24\xbb\x52\xe9"
+				"\x5d\xf4\x8b\x22\x96\x2d\xc4\x38"
+				"\xcf\x66\xfd\x71\x08\x9f\x13\xaa"
+				"\x41\xd8\x4c\xe3\x7a\x11\x85\x1c"
+				"\xb3\x27\xbe\x55\xec\x60\xf7\x8e"
+				"\x02\x99\x30\xc7\x3b\xd2\x69\x00"
+				"\x74\x0b\xa2\x16\xad\x44\xdb\x4f"
+				"\xe6\x7d\x14\x88\x1f\xb6\x2a\xc1"
+				"\x58\xef\x63\xfa\x91\x05\x9c\x33"
+				"\xca\x3e\xd5\x6c\x03\x77\x0e\xa5"
+				"\x19\xb0\x47\xde\x52\xe9\x80\x17"
+				"\x8b\x22\xb9\x2d\xc4\x5b\xf2\x66"
+				"\xfd\x94\x08\x9f\x36\xcd\x41\xd8"
+				"\x6f\x06\x7a\x11\xa8\x1c\xb3\x4a"
+				"\xe1\x55\xec\x83\x1a\x8e\x25\xbc"
+				"\x30\xc7\x5e\xf5\x69\x00\x97\x0b"
+				"\xa2\x39\xd0\x44\xdb\x72\x09\x7d"
+				"\x14\xab\x1f\xb6\x4d\xe4\x58\xef"
+				"\x86\x1d\x91\x28\xbf\x33\xca\x61"
+				"\xf8\x6c\x03\x9a\x0e\xa5\x3c\xd3"
+				"\x47\xde\x75\x0c\x80\x17\xae\x22"
+				"\xb9\x50\xe7\x5b\xf2\x89\x20\x94"
+				"\x2b\xc2\x36\xcd\x64\xfb\x6f\x06"
+				"\x9d\x11\xa8\x3f\xd6\x4a\xe1\x78"
+				"\x0f\x83\x1a\xb1\x25\xbc\x53\xea"
+				"\x5e\xf5\x8c\x00\x97\x2e\xc5\x39"
+				"\xd0\x67\xfe\x72\x09\xa0\x14\xab"
+				"\x42\xd9\x4d\xe4\x7b\x12\x86\x1d"
+				"\xb4\x28\xbf\x56\xed\x61\xf8\x8f"
+				"\x03\x9a\x31\xc8\x3c\xd3\x6a\x01"
+				"\x75\x0c\xa3\x17\xae\x45\xdc\x50"
+				"\xe7\x7e\x15\x89\x20\xb7\x2b\xc2"
+				"\x59\xf0\x64\xfb\x92\x06\x9d\x34"
+				"\xcb\x3f\xd6\x6d\x04\x78\x0f\xa6"
+				"\x1a\xb1\x48\xdf\x53\xea\x81\x18"
+				"\x8c\x23\xba\x2e\xc5\x5c\xf3\x67"
+				"\xfe\x95\x09\xa0\x37\xce\x42\xd9"
+				"\x70\x07\x7b\x12\xa9\x1d\xb4\x4b"
+				"\xe2\x56\xed\x84\x1b\x8f\x26\xbd"
+				"\x31\xc8\x5f\xf6\x6a\x01\x98\x0c"
+				"\xa3\x3a\xd1\x45\xdc\x73\x0a\x7e"
+				"\x15\xac\x20\xb7\x4e\xe5\x59\xf0"
+				"\x87\x1e\x92\x29\xc0\x34\xcb\x62"
+				"\xf9\x6d\x04\x9b\x0f\xa6\x3d\xd4"
+				"\x48\xdf\x76\x0d\x81\x18\xaf\x23"
+				"\xba\x51\xe8\x5c\xf3\x8a\x21\x95"
+				"\x2c\xc3\x37\xce\x65\xfc\x70\x07"
+				"\x9e\x12\xa9\x40\xd7\x4b\xe2\x79"
+				"\x10\x84\x1b\xb2\x26\xbd\x54\xeb"
+				"\x5f\xf6\x8d\x01\x98\x2f\xc6\x3a"
+				"\xd1\x68\xff\x73\x0a\xa1\x15\xac"
+				"\x43\xda\x4e\xe5\x7c\x13\x87\x1e"
+				"\xb5\x29\xc0\x57\xee\x62\xf9\x90"
+				"\x04\x9b\x32\xc9\x3d\xd4\x6b\x02"
+				"\x76\x0d\xa4\x18\xaf\x46\xdd\x51"
+				"\xe8\x7f\x16\x8a\x21\xb8\x2c\xc3"
+				"\x5a\xf1\x65\xfc\x93\x07\x9e\x35"
+				"\xcc\x40\xd7\x6e\x05\x79\x10\xa7"
+				"\x1b\xb2\x49\xe0\x54\xeb\x82\x19"
+				"\x8d\x24\xbb\x2f\xc6\x5d\xf4\x68"
+				"\xff\x96\x0a\xa1\x38\xcf\x43\xda"
+				"\x71\x08\x7c\x13\xaa\x1e\xb5\x4c"
+				"\xe3\x57\xee\x85\x1c\x90\x27\xbe"
+				"\x32\xc9\x60\xf7\x6b\x02\x99\x0d"
+				"\xa4\x3b\xd2\x46\xdd\x74\x0b\x7f"
+				"\x16\xad\x21\xb8\x4f\xe6\x5a\xf1"
+				"\x88\x1f\x93\x2a\xc1\x35\xcc\x63"
+				"\xfa\x6e\x05\x9c\x10\xa7\x3e\xd5"
+				"\x49\xe0\x77\x0e\x82\x19\xb0\x24"
+				"\xbb\x52\xe9\x5d\xf4\x8b\x22\x96"
+				"\x2d\xc4\x38\xcf\x66\xfd\x71\x08"
+				"\x9f\x13\xaa\x41\xd8\x4c\xe3\x7a"
+				"\x11\x85\x1c\xb3\x27\xbe\x55\xec"
+				"\x60\xf7\x8e\x02\x99\x30\xc7\x3b"
+				"\xd2\x69\x00\x74\x0b\xa2\x16\xad"
+				"\x44\xdb\x4f\xe6\x7d\x14\x88\x1f"
+				"\xb6\x2a\xc1\x58\xef\x63\xfa\x91"
+				"\x05\x9c\x33\xca\x3e\xd5\x6c\x03"
+				"\x77\x0e\xa5\x19\xb0\x47\xde\x52"
+				"\xe9\x80\x17\x8b\x22\xb9\x2d\xc4"
+				"\x5b\xf2\x66\xfd\x94\x08\x9f\x36"
+				"\xcd\x41\xd8\x6f\x06\x7a\x11\xa8"
+				"\x1c\xb3\x4a\xe1\x55\xec\x83\x1a"
+				"\x8e\x25\xbc\x30\xc7\x5e\xf5\x69"
+				"\x00\x97\x0b\xa2\x39\xd0\x44\xdb"
+				"\x72\x09\x7d\x14\xab\x1f\xb6\x4d"
+				"\xe4\x58\xef\x86\x1d\x91\x28\xbf"
+				"\x33\xca\x61\xf8\x6c\x03\x9a\x0e"
+				"\xa5\x3c\xd3\x47\xde\x75\x0c\x80"
+				"\x17\xae\x22\xb9\x50\xe7\x5b\xf2"
+				"\x89\x20\x94\x2b\xc2\x36\xcd\x64"
+				"\xfb\x6f\x06\x9d\x11\xa8\x3f\xd6"
+				"\x4a\xe1\x78\x0f\x83\x1a\xb1\x25"
+				"\xbc\x53\xea\x5e\xf5\x8c\x00\x97"
+				"\x2e\xc5\x39\xd0\x67\xfe\x72\x09"
+				"\xa0\x14\xab\x42\xd9\x4d\xe4\x7b"
+				"\x12\x86\x1d\xb4\x28\xbf\x56\xed"
+				"\x61\xf8\x8f\x03\x9a\x31\xc8\x3c"
+				"\xd3\x6a\x01\x75\x0c\xa3\x17\xae"
+				"\x45\xdc\x50\xe7\x7e\x15\x89\x20"
+				"\xb7\x2b\xc2\x59\xf0\x64\xfb\x92"
+				"\x06\x9d\x34\xcb\x3f\xd6\x6d\x04"
+				"\x78\x0f\xa6\x1a\xb1\x48\xdf\x53"
+				"\xea\x81\x18\x8c\x23\xba\x2e\xc5"
+				"\x5c\xf3\x67\xfe\x95\x09\xa0\x37"
+				"\xce\x42\xd9\x70\x07\x7b\x12\xa9"
+				"\x1d\xb4\x4b\xe2\x56\xed\x84\x1b"
+				"\x8f\x26\xbd\x31\xc8\x5f\xf6\x6a"
+				"\x01\x98\x0c\xa3\x3a\xd1\x45\xdc"
+				"\x73\x0a\x7e\x15\xac\x20\xb7\x4e"
+				"\xe5\x59\xf0\x87\x1e\x92\x29\xc0"
+				"\x34\xcb\x62\xf9\x6d\x04\x9b\x0f"
+				"\xa6\x3d\xd4\x48\xdf\x76\x0d\x81"
+				"\x18\xaf\x23\xba\x51\xe8\x5c\xf3"
+				"\x8a\x21\x95\x2c\xc3\x37\xce\x65"
+				"\xfc\x70\x07\x9e\x12\xa9\x40\xd7"
+				"\x4b\xe2\x79\x10\x84\x1b\xb2\x26"
+				"\xbd\x54\xeb\x5f\xf6\x8d\x01\x98"
+				"\x2f\xc6\x3a\xd1\x68\xff\x73\x0a"
+				"\xa1\x15\xac\x43\xda\x4e\xe5\x7c"
+				"\x13\x87\x1e\xb5\x29\xc0\x57\xee"
+				"\x62\xf9\x90\x04\x9b\x32\xc9\x3d"
+				"\xd4\x6b\x02\x76\x0d\xa4\x18\xaf"
+				"\x46\xdd\x51\xe8\x7f\x16\x8a\x21"
+				"\xb8\x2c\xc3\x5a\xf1\x65\xfc\x93"
+				"\x07\x9e\x35\xcc\x40\xd7\x6e\x05"
+				"\x79\x10\xa7\x1b\xb2\x49\xe0\x54"
+				"\xeb\x82\x19\x8d\x24\xbb\x2f\xc6"
+				"\x5d\xf4\x68\xff\x96\x0a\xa1\x38"
+				"\xcf\x43\xda\x71\x08\x7c\x13\xaa"
+				"\x1e\xb5\x4c\xe3\x57\xee\x85\x1c"
+				"\x90\x27\xbe\x32\xc9\x60\xf7\x6b"
+				"\x02\x99\x0d\xa4\x3b\xd2\x46\xdd"
+				"\x74\x0b\x7f\x16\xad\x21\xb8\x4f"
+				"\xe6\x5a\xf1\x88\x1f\x93\x2a\xc1"
+				"\x35\xcc\x63\xfa\x6e\x05\x9c\x10"
+				"\xa7\x3e\xd5\x49\xe0\x77\x0e\x82"
+				"\x19\xb0\x24\xbb\x52\xe9\x5d\xf4"
+				"\x8b\x22\x96\x2d\xc4\x38\xcf\x66"
+				"\xfd\x71\x08\x9f\x13\xaa\x41\xd8"
+				"\x4c\xe3\x7a\x11\x85\x1c\xb3\x27"
+				"\xbe\x55\xec\x60\xf7\x8e\x02\x99"
+				"\x30\xc7\x3b\xd2\x69\x00\x74\x0b"
+				"\xa2\x16\xad\x44\xdb\x4f\xe6\x7d"
+				"\x14\x88\x1f\xb6\x2a\xc1\x58\xef"
+				"\x63\xfa\x91\x05\x9c\x33\xca\x3e"
+				"\xd5\x6c\x03\x77\x0e\xa5\x19\xb0"
+				"\x47\xde\x52\xe9\x80\x17\x8b\x22"
+				"\xb9\x2d\xc4\x5b\xf2\x66\xfd\x94"
+				"\x08\x9f\x36\xcd\x41\xd8\x6f\x06"
+				"\x7a\x11\xa8\x1c\xb3\x4a\xe1\x55"
+				"\xec\x83\x1a\x8e\x25\xbc\x30\xc7"
+				"\x5e\xf5\x69\x00\x97\x0b\xa2\x39"
+				"\xd0\x44\xdb\x72\x09\x7d\x14\xab"
+				"\x1f\xb6\x4d\xe4\x58\xef\x86\x1d"
+				"\x91\x28\xbf\x33\xca\x61\xf8\x6c"
+				"\x03\x9a\x0e\xa5\x3c\xd3\x47\xde"
+				"\x75\x0c\x80\x17\xae\x22\xb9\x50"
+				"\xe7\x5b\xf2\x89\x20\x94\x2b\xc2"
+				"\x36\xcd\x64\xfb\x6f\x06\x9d\x11"
+				"\xa8\x3f\xd6\x4a\xe1\x78\x0f\x83"
+				"\x1a\xb1\x25\xbc\x53\xea\x5e\xf5"
+				"\x8c\x00\x97\x2e\xc5\x39\xd0\x67"
+				"\xfe\x72\x09\xa0\x14\xab\x42\xd9"
+				"\x4d\xe4\x7b\x12\x86\x1d\xb4\x28"
+				"\xbf\x56\xed\x61\xf8\x8f\x03\x9a"
+				"\x31\xc8\x3c\xd3\x6a\x01\x75\x0c"
+				"\xa3\x17\xae\x45\xdc\x50\xe7\x7e"
+				"\x15\x89\x20\xb7\x2b\xc2\x59\xf0"
+				"\x64\xfb\x92\x06\x9d\x34\xcb\x3f"
+				"\xd6\x6d\x04\x78\x0f\xa6\x1a\xb1"
+				"\x48\xdf\x53\xea\x81\x18\x8c\x23"
+				"\xba\x2e\xc5\x5c\xf3\x67\xfe\x95"
+				"\x09\xa0\x37\xce\x42\xd9\x70\x07"
+				"\x7b\x12\xa9\x1d\xb4\x4b\xe2\x56"
+				"\xed\x84\x1b\x8f\x26\xbd\x31\xc8"
+				"\x5f\xf6\x6a\x01\x98\x0c\xa3\x3a"
+				"\xd1\x45\xdc\x73\x0a\x7e\x15\xac"
+				"\x20\xb7\x4e\xe5\x59\xf0\x87\x1e"
+				"\x92\x29\xc0\x34\xcb\x62\xf9\x6d"
+				"\x04\x9b\x0f\xa6\x3d\xd4\x48\xdf"
+				"\x76\x0d\x81\x18\xaf\x23\xba\x51"
+				"\xe8\x5c\xf3\x8a\x21\x95\x2c\xc3"
+				"\x37\xce\x65\xfc\x70\x07\x9e\x12"
+				"\xa9\x40\xd7\x4b\xe2\x79\x10\x84"
+				"\x1b\xb2\x26\xbd\x54\xeb\x5f\xf6"
+				"\x8d\x01\x98\x2f\xc6\x3a\xd1\x68"
+				"\xff\x73\x0a\xa1\x15\xac\x43\xda"
+				"\x4e\xe5\x7c\x13\x87\x1e\xb5\x29"
+				"\xc0\x57\xee\x62\xf9\x90\x04\x9b"
+				"\x32\xc9\x3d\xd4\x6b\x02\x76\x0d"
+				"\xa4\x18\xaf\x46\xdd\x51\xe8\x7f"
+				"\x16\x8a\x21\xb8\x2c\xc3\x5a\xf1"
+				"\x65\xfc\x93\x07\x9e\x35\xcc\x40"
+				"\xd7\x6e\x05\x79\x10\xa7\x1b\xb2"
+				"\x49\xe0\x54\xeb\x82\x19\x8d\x24"
+				"\xbb\x2f\xc6\x5d\xf4\x68\xff\x96"
+				"\x0a\xa1\x38\xcf\x43\xda\x71\x08"
+				"\x7c\x13\xaa\x1e\xb5\x4c\xe3\x57"
+				"\xee\x85\x1c\x90\x27\xbe\x32\xc9"
+				"\x60\xf7\x6b\x02\x99\x0d\xa4\x3b"
+				"\xd2\x46\xdd\x74\x0b\x7f\x16\xad"
+				"\x21\xb8\x4f\xe6\x5a\xf1\x88\x1f"
+				"\x93\x2a\xc1\x35\xcc\x63\xfa\x6e"
+				"\x05\x9c\x10\xa7\x3e\xd5\x49\xe0"
+				"\x77\x0e\x82\x19\xb0\x24\xbb\x52"
+				"\xe9\x5d\xf4\x8b\x22\x96\x2d\xc4"
+				"\x38\xcf\x66\xfd\x71\x08\x9f\x13"
+				"\xaa\x41\xd8\x4c\xe3\x7a\x11\x85"
+				"\x1c\xb3\x27\xbe\x55\xec\x60\xf7"
+				"\x8e\x02\x99\x30\xc7\x3b\xd2\x69"
+				"\x00\x74\x0b\xa2\x16\xad\x44\xdb"
+				"\x4f\xe6\x7d\x14\x88\x1f\xb6\x2a"
+				"\xc1\x58\xef\x63\xfa\x91\x05\x9c"
+				"\x33\xca\x3e\xd5\x6c\x03\x77\x0e"
+				"\xa5\x19\xb0\x47\xde\x52\xe9\x80"
+				"\x17\x8b\x22\xb9\x2d\xc4\x5b\xf2"
+				"\x66\xfd\x94\x08\x9f\x36\xcd\x41"
+				"\xd8\x6f\x06\x7a\x11\xa8\x1c\xb3"
+				"\x4a\xe1\x55\xec\x83\x1a\x8e\x25"
+				"\xbc\x30\xc7\x5e\xf5\x69\x00\x97"
+				"\x0b\xa2\x39\xd0\x44\xdb\x72\x09"
+				"\x7d\x14\xab\x1f\xb6\x4d\xe4\x58"
+				"\xef\x86\x1d\x91\x28\xbf\x33\xca"
+				"\x61\xf8\x6c\x03\x9a\x0e\xa5\x3c"
+				"\xd3\x47\xde\x75\x0c\x80\x17\xae"
+				"\x22\xb9\x50\xe7\x5b\xf2\x89\x20"
+				"\x94\x2b\xc2\x36\xcd\x64\xfb\x6f"
+				"\x06\x9d\x11\xa8\x3f\xd6\x4a\xe1"
+				"\x78\x0f\x83\x1a\xb1\x25\xbc\x53"
+				"\xea\x5e\xf5\x8c\x00\x97\x2e\xc5"
+				"\x39\xd0\x67\xfe\x72\x09\xa0\x14"
+				"\xab\x42\xd9\x4d\xe4\x7b\x12\x86"
+				"\x1d\xb4\x28\xbf\x56\xed\x61\xf8"
+				"\x8f\x03\x9a\x31\xc8\x3c\xd3\x6a"
+				"\x01\x75\x0c\xa3\x17\xae\x45\xdc"
+				"\x50\xe7\x7e\x15\x89\x20\xb7\x2b"
+				"\xc2\x59\xf0\x64\xfb\x92\x06\x9d"
+				"\x34\xcb\x3f\xd6\x6d\x04\x78\x0f"
+				"\xa6\x1a\xb1\x48\xdf\x53\xea\x81"
+				"\x18\x8c\x23\xba\x2e\xc5\x5c\xf3"
+				"\x67\xfe\x95\x09\xa0\x37\xce\x42"
+				"\xd9\x70\x07\x7b\x12\xa9\x1d\xb4"
+				"\x4b\xe2\x56\xed\x84\x1b\x8f\x26"
+				"\xbd\x31\xc8\x5f\xf6\x6a\x01\x98",
+		.psize = 2048,
+		.digest		= (u8 *)(u16 []){ 0x23ca },
+	}
+};
+
+/* Example vectors below taken from
+ * http://www.oscca.gov.cn/UpFile/20101222141857786.pdf
+ *
+ * The rest taken from
+ * https://github.com/adamws/oscca-sm3
+ */
+static const struct hash_testvec sm3_tv_template[] = {
+	{
+		.plaintext = "",
+		.psize = 0,
+		.digest = (u8 *)(u8 []) {
+			0x1A, 0xB2, 0x1D, 0x83, 0x55, 0xCF, 0xA1, 0x7F,
+			0x8e, 0x61, 0x19, 0x48, 0x31, 0xE8, 0x1A, 0x8F,
+			0x22, 0xBE, 0xC8, 0xC7, 0x28, 0xFE, 0xFB, 0x74,
+			0x7E, 0xD0, 0x35, 0xEB, 0x50, 0x82, 0xAA, 0x2B }
+	}, {
+		.plaintext = "a",
+		.psize = 1,
+		.digest = (u8 *)(u8 []) {
+			0x62, 0x34, 0x76, 0xAC, 0x18, 0xF6, 0x5A, 0x29,
+			0x09, 0xE4, 0x3C, 0x7F, 0xEC, 0x61, 0xB4, 0x9C,
+			0x7E, 0x76, 0x4A, 0x91, 0xA1, 0x8C, 0xCB, 0x82,
+			0xF1, 0x91, 0x7A, 0x29, 0xC8, 0x6C, 0x5E, 0x88 }
+	}, {
+		/* A.1. Example 1 */
+		.plaintext = "abc",
+		.psize = 3,
+		.digest = (u8 *)(u8 []) {
+			0x66, 0xC7, 0xF0, 0xF4, 0x62, 0xEE, 0xED, 0xD9,
+			0xD1, 0xF2, 0xD4, 0x6B, 0xDC, 0x10, 0xE4, 0xE2,
+			0x41, 0x67, 0xC4, 0x87, 0x5C, 0xF2, 0xF7, 0xA2,
+			0x29, 0x7D, 0xA0, 0x2B, 0x8F, 0x4B, 0xA8, 0xE0 }
+	}, {
+		.plaintext = "abcdefghijklmnopqrstuvwxyz",
+		.psize = 26,
+		.digest = (u8 *)(u8 []) {
+			0xB8, 0x0F, 0xE9, 0x7A, 0x4D, 0xA2, 0x4A, 0xFC,
+			0x27, 0x75, 0x64, 0xF6, 0x6A, 0x35, 0x9E, 0xF4,
+			0x40, 0x46, 0x2A, 0xD2, 0x8D, 0xCC, 0x6D, 0x63,
+			0xAD, 0xB2, 0x4D, 0x5C, 0x20, 0xA6, 0x15, 0x95 }
+	}, {
+		/* A.1. Example 2 */
+		.plaintext = "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdab"
+			     "cdabcdabcdabcdabcd",
+		.psize = 64,
+		.digest = (u8 *)(u8 []) {
+			0xDE, 0xBE, 0x9F, 0xF9, 0x22, 0x75, 0xB8, 0xA1,
+			0x38, 0x60, 0x48, 0x89, 0xC1, 0x8E, 0x5A, 0x4D,
+			0x6F, 0xDB, 0x70, 0xE5, 0x38, 0x7E, 0x57, 0x65,
+			0x29, 0x3D, 0xCB, 0xA3, 0x9C, 0x0C, 0x57, 0x32 }
+	}, {
+		.plaintext = "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
+			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
+			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
+			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
+			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
+			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
+			     "abcdabcdabcdabcdabcdabcdabcdabcd",
+		.psize = 256,
+		.digest = (u8 *)(u8 []) {
+			0xB9, 0x65, 0x76, 0x4C, 0x8B, 0xEB, 0xB0, 0x91,
+			0xC7, 0x60, 0x2B, 0x74, 0xAF, 0xD3, 0x4E, 0xEF,
+			0xB5, 0x31, 0xDC, 0xCB, 0x4E, 0x00, 0x76, 0xD9,
+			0xB7, 0xCD, 0x81, 0x31, 0x99, 0xB4, 0x59, 0x71 }
+	}
+};
+
+/*
+ * SHA1 test vectors  from from FIPS PUB 180-1
+ * Long vector from CAVS 5.0
+ */
+static const struct hash_testvec sha1_tv_template[] = {
+	{
+		.plaintext = "",
+		.psize	= 0,
+		.digest	= "\xda\x39\xa3\xee\x5e\x6b\x4b\x0d\x32\x55"
+			  "\xbf\xef\x95\x60\x18\x90\xaf\xd8\x07\x09",
+	}, {
+		.plaintext = "abc",
+		.psize	= 3,
+		.digest	= "\xa9\x99\x3e\x36\x47\x06\x81\x6a\xba\x3e"
+			  "\x25\x71\x78\x50\xc2\x6c\x9c\xd0\xd8\x9d",
+	}, {
+		.plaintext = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+		.psize	= 56,
+		.digest	= "\x84\x98\x3e\x44\x1c\x3b\xd2\x6e\xba\xae"
+			  "\x4a\xa1\xf9\x51\x29\xe5\xe5\x46\x70\xf1",
+		.np	= 2,
+		.tap	= { 28, 28 }
+	}, {
+		.plaintext = "\xec\x29\x56\x12\x44\xed\xe7\x06"
+			     "\xb6\xeb\x30\xa1\xc3\x71\xd7\x44"
+			     "\x50\xa1\x05\xc3\xf9\x73\x5f\x7f"
+			     "\xa9\xfe\x38\xcf\x67\xf3\x04\xa5"
+			     "\x73\x6a\x10\x6e\x92\xe1\x71\x39"
+			     "\xa6\x81\x3b\x1c\x81\xa4\xf3\xd3"
+			     "\xfb\x95\x46\xab\x42\x96\xfa\x9f"
+			     "\x72\x28\x26\xc0\x66\x86\x9e\xda"
+			     "\xcd\x73\xb2\x54\x80\x35\x18\x58"
+			     "\x13\xe2\x26\x34\xa9\xda\x44\x00"
+			     "\x0d\x95\xa2\x81\xff\x9f\x26\x4e"
+			     "\xcc\xe0\xa9\x31\x22\x21\x62\xd0"
+			     "\x21\xcc\xa2\x8d\xb5\xf3\xc2\xaa"
+			     "\x24\x94\x5a\xb1\xe3\x1c\xb4\x13"
+			     "\xae\x29\x81\x0f\xd7\x94\xca\xd5"
+			     "\xdf\xaf\x29\xec\x43\xcb\x38\xd1"
+			     "\x98\xfe\x4a\xe1\xda\x23\x59\x78"
+			     "\x02\x21\x40\x5b\xd6\x71\x2a\x53"
+			     "\x05\xda\x4b\x1b\x73\x7f\xce\x7c"
+			     "\xd2\x1c\x0e\xb7\x72\x8d\x08\x23"
+			     "\x5a\x90\x11",
+		.psize	= 163,
+		.digest	= "\x97\x01\x11\xc4\xe7\x7b\xcc\x88\xcc\x20"
+			  "\x45\x9c\x02\xb6\x9b\x4a\xa8\xf5\x82\x17",
+		.np	= 4,
+		.tap	= { 63, 64, 31, 5 }
+	}, {
+		.plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-",
+		.psize	= 64,
+		.digest = "\xc8\x71\xf6\x9a\x63\xcc\xa9\x84\x84\x82"
+			  "\x64\xe7\x79\x95\x5d\xd7\x19\x41\x7c\x91",
+	}, {
+		.plaintext = "\x08\x9f\x13\xaa\x41\xd8\x4c\xe3"
+			     "\x7a\x11\x85\x1c\xb3\x27\xbe\x55"
+			     "\xec\x60\xf7\x8e\x02\x99\x30\xc7"
+			     "\x3b\xd2\x69\x00\x74\x0b\xa2\x16"
+			     "\xad\x44\xdb\x4f\xe6\x7d\x14\x88"
+			     "\x1f\xb6\x2a\xc1\x58\xef\x63\xfa"
+			     "\x91\x05\x9c\x33\xca\x3e\xd5\x6c"
+			     "\x03\x77\x0e\xa5\x19\xb0\x47\xde"
+			     "\x52\xe9\x80\x17\x8b\x22\xb9\x2d"
+			     "\xc4\x5b\xf2\x66\xfd\x94\x08\x9f"
+			     "\x36\xcd\x41\xd8\x6f\x06\x7a\x11"
+			     "\xa8\x1c\xb3\x4a\xe1\x55\xec\x83"
+			     "\x1a\x8e\x25\xbc\x30\xc7\x5e\xf5"
+			     "\x69\x00\x97\x0b\xa2\x39\xd0\x44"
+			     "\xdb\x72\x09\x7d\x14\xab\x1f\xb6"
+			     "\x4d\xe4\x58\xef\x86\x1d\x91\x28"
+			     "\xbf\x33\xca\x61\xf8\x6c\x03\x9a"
+			     "\x0e\xa5\x3c\xd3\x47\xde\x75\x0c"
+			     "\x80\x17\xae\x22\xb9\x50\xe7\x5b"
+			     "\xf2\x89\x20\x94\x2b\xc2\x36\xcd"
+			     "\x64\xfb\x6f\x06\x9d\x11\xa8\x3f"
+			     "\xd6\x4a\xe1\x78\x0f\x83\x1a\xb1"
+			     "\x25\xbc\x53\xea\x5e\xf5\x8c\x00"
+			     "\x97\x2e\xc5\x39\xd0\x67\xfe\x72"
+			     "\x09\xa0\x14\xab\x42\xd9\x4d\xe4"
+			     "\x7b\x12\x86\x1d\xb4\x28\xbf\x56"
+			     "\xed\x61\xf8\x8f\x03\x9a\x31\xc8"
+			     "\x3c\xd3\x6a\x01\x75\x0c\xa3\x17"
+			     "\xae\x45\xdc\x50\xe7\x7e\x15\x89"
+			     "\x20\xb7\x2b\xc2\x59\xf0\x64\xfb"
+			     "\x92\x06\x9d\x34\xcb\x3f\xd6\x6d"
+			     "\x04\x78\x0f\xa6\x1a\xb1\x48\xdf"
+			     "\x53\xea\x81\x18\x8c\x23\xba\x2e"
+			     "\xc5\x5c\xf3\x67\xfe\x95\x09\xa0"
+			     "\x37\xce\x42\xd9\x70\x07\x7b\x12"
+			     "\xa9\x1d\xb4\x4b\xe2\x56\xed\x84"
+			     "\x1b\x8f\x26\xbd\x31\xc8\x5f\xf6"
+			     "\x6a\x01\x98\x0c\xa3\x3a\xd1\x45"
+			     "\xdc\x73\x0a\x7e\x15\xac\x20\xb7"
+			     "\x4e\xe5\x59\xf0\x87\x1e\x92\x29"
+			     "\xc0\x34\xcb\x62\xf9\x6d\x04\x9b"
+			     "\x0f\xa6\x3d\xd4\x48\xdf\x76\x0d"
+			     "\x81\x18\xaf\x23\xba\x51\xe8\x5c"
+			     "\xf3\x8a\x21\x95\x2c\xc3\x37\xce"
+			     "\x65\xfc\x70\x07\x9e\x12\xa9\x40"
+			     "\xd7\x4b\xe2\x79\x10\x84\x1b\xb2"
+			     "\x26\xbd\x54\xeb\x5f\xf6\x8d\x01"
+			     "\x98\x2f\xc6\x3a\xd1\x68\xff\x73"
+			     "\x0a\xa1\x15\xac\x43\xda\x4e\xe5"
+			     "\x7c\x13\x87\x1e\xb5\x29\xc0\x57"
+			     "\xee\x62\xf9\x90\x04\x9b\x32\xc9"
+			     "\x3d\xd4\x6b\x02\x76\x0d\xa4\x18"
+			     "\xaf\x46\xdd\x51\xe8\x7f\x16\x8a"
+			     "\x21\xb8\x2c\xc3\x5a\xf1\x65\xfc"
+			     "\x93\x07\x9e\x35\xcc\x40\xd7\x6e"
+			     "\x05\x79\x10\xa7\x1b\xb2\x49\xe0"
+			     "\x54\xeb\x82\x19\x8d\x24\xbb\x2f"
+			     "\xc6\x5d\xf4\x68\xff\x96\x0a\xa1"
+			     "\x38\xcf\x43\xda\x71\x08\x7c\x13"
+			     "\xaa\x1e\xb5\x4c\xe3\x57\xee\x85"
+			     "\x1c\x90\x27\xbe\x32\xc9\x60\xf7"
+			     "\x6b\x02\x99\x0d\xa4\x3b\xd2\x46"
+			     "\xdd\x74\x0b\x7f\x16\xad\x21\xb8"
+			     "\x4f\xe6\x5a\xf1\x88\x1f\x93\x2a"
+			     "\xc1\x35\xcc\x63\xfa\x6e\x05\x9c"
+			     "\x10\xa7\x3e\xd5\x49\xe0\x77\x0e"
+			     "\x82\x19\xb0\x24\xbb\x52\xe9\x5d"
+			     "\xf4\x8b\x22\x96\x2d\xc4\x38\xcf"
+			     "\x66\xfd\x71\x08\x9f\x13\xaa\x41"
+			     "\xd8\x4c\xe3\x7a\x11\x85\x1c\xb3"
+			     "\x27\xbe\x55\xec\x60\xf7\x8e\x02"
+			     "\x99\x30\xc7\x3b\xd2\x69\x00\x74"
+			     "\x0b\xa2\x16\xad\x44\xdb\x4f\xe6"
+			     "\x7d\x14\x88\x1f\xb6\x2a\xc1\x58"
+			     "\xef\x63\xfa\x91\x05\x9c\x33\xca"
+			     "\x3e\xd5\x6c\x03\x77\x0e\xa5\x19"
+			     "\xb0\x47\xde\x52\xe9\x80\x17\x8b"
+			     "\x22\xb9\x2d\xc4\x5b\xf2\x66\xfd"
+			     "\x94\x08\x9f\x36\xcd\x41\xd8\x6f"
+			     "\x06\x7a\x11\xa8\x1c\xb3\x4a\xe1"
+			     "\x55\xec\x83\x1a\x8e\x25\xbc\x30"
+			     "\xc7\x5e\xf5\x69\x00\x97\x0b\xa2"
+			     "\x39\xd0\x44\xdb\x72\x09\x7d\x14"
+			     "\xab\x1f\xb6\x4d\xe4\x58\xef\x86"
+			     "\x1d\x91\x28\xbf\x33\xca\x61\xf8"
+			     "\x6c\x03\x9a\x0e\xa5\x3c\xd3\x47"
+			     "\xde\x75\x0c\x80\x17\xae\x22\xb9"
+			     "\x50\xe7\x5b\xf2\x89\x20\x94\x2b"
+			     "\xc2\x36\xcd\x64\xfb\x6f\x06\x9d"
+			     "\x11\xa8\x3f\xd6\x4a\xe1\x78\x0f"
+			     "\x83\x1a\xb1\x25\xbc\x53\xea\x5e"
+			     "\xf5\x8c\x00\x97\x2e\xc5\x39\xd0"
+			     "\x67\xfe\x72\x09\xa0\x14\xab\x42"
+			     "\xd9\x4d\xe4\x7b\x12\x86\x1d\xb4"
+			     "\x28\xbf\x56\xed\x61\xf8\x8f\x03"
+			     "\x9a\x31\xc8\x3c\xd3\x6a\x01\x75"
+			     "\x0c\xa3\x17\xae\x45\xdc\x50\xe7"
+			     "\x7e\x15\x89\x20\xb7\x2b\xc2\x59"
+			     "\xf0\x64\xfb\x92\x06\x9d\x34\xcb"
+			     "\x3f\xd6\x6d\x04\x78\x0f\xa6\x1a"
+			     "\xb1\x48\xdf\x53\xea\x81\x18\x8c"
+			     "\x23\xba\x2e\xc5\x5c\xf3\x67\xfe"
+			     "\x95\x09\xa0\x37\xce\x42\xd9\x70"
+			     "\x07\x7b\x12\xa9\x1d\xb4\x4b\xe2"
+			     "\x56\xed\x84\x1b\x8f\x26\xbd\x31"
+			     "\xc8\x5f\xf6\x6a\x01\x98\x0c\xa3"
+			     "\x3a\xd1\x45\xdc\x73\x0a\x7e\x15"
+			     "\xac\x20\xb7\x4e\xe5\x59\xf0\x87"
+			     "\x1e\x92\x29\xc0\x34\xcb\x62\xf9"
+			     "\x6d\x04\x9b\x0f\xa6\x3d\xd4\x48"
+			     "\xdf\x76\x0d\x81\x18\xaf\x23\xba"
+			     "\x51\xe8\x5c\xf3\x8a\x21\x95\x2c"
+			     "\xc3\x37\xce\x65\xfc\x70\x07\x9e"
+			     "\x12\xa9\x40\xd7\x4b\xe2\x79\x10"
+			     "\x84\x1b\xb2\x26\xbd\x54\xeb\x5f"
+			     "\xf6\x8d\x01\x98\x2f\xc6\x3a\xd1"
+			     "\x68\xff\x73\x0a\xa1\x15\xac\x43"
+			     "\xda\x4e\xe5\x7c\x13\x87\x1e\xb5"
+			     "\x29\xc0\x57\xee\x62\xf9\x90\x04"
+			     "\x9b\x32\xc9\x3d\xd4\x6b\x02\x76"
+			     "\x0d\xa4\x18\xaf\x46\xdd\x51\xe8"
+			     "\x7f\x16\x8a\x21\xb8\x2c\xc3\x5a"
+			     "\xf1\x65\xfc\x93\x07\x9e\x35\xcc"
+			     "\x40\xd7\x6e\x05\x79\x10\xa7\x1b"
+			     "\xb2\x49\xe0\x54\xeb\x82\x19\x8d"
+			     "\x24\xbb\x2f\xc6\x5d\xf4\x68\xff"
+			     "\x96\x0a\xa1\x38\xcf\x43\xda\x71"
+			     "\x08\x7c\x13\xaa\x1e\xb5\x4c",
+		.psize     = 1023,
+		.digest    = "\xb8\xe3\x54\xed\xc5\xfc\xef\xa4"
+			     "\x55\x73\x4a\x81\x99\xe4\x47\x2a"
+			     "\x30\xd6\xc9\x85",
 	}
 };
 
@@ -12303,23 +12562,296 @@ static const struct cipher_testvec serpent_cbc_dec_tv_template[] = {
 			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
+		.rlen	= 496,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 496 - 20, 4, 16 },
+	},
+};
+
+static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
+	{ /* Generated with Crypto++ */
+		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
+			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
+			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
+			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
+		.klen	= 32,
+		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
+			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
+		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
+			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
+			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
+			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
+			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
+			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
+			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
+			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
+			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
+			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
+			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
+			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
+			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
+			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
+			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
+			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
+			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
+			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
+			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
+			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
+			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
+			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
+			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
+			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
+			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
+			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
+			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
+			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
+			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
+			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
+			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
+			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
+			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
+			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
+			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
+			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
+			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
+			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
+			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
+			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
+			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
+			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
+			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
+			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
+			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
+			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
+			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
+			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
+			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
+			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
+			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
+			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
+			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
+			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
+			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
+			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
+			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
+			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
+			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
+			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
+		.ilen	= 496,
+		.result	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
+			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
+			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
+			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
+			  "\xE0\x4A\x73\x00\x65\xB6\x1A\x0D"
+			  "\x5C\x60\xDF\x34\xDC\x60\x4C\xDF"
+			  "\xB5\x1F\x26\x8C\xDA\xC1\x11\xA8"
+			  "\x80\xFA\x37\x7A\x89\xAA\xAE\x7B"
+			  "\x92\x6E\xB9\xDC\xC9\x62\x4F\x88"
+			  "\x0A\x5D\x97\x2F\x6B\xAC\x03\x7C"
+			  "\x22\xF6\x55\x5A\xFA\x35\xA5\x17"
+			  "\xA1\x5C\x5E\x2B\x63\x2D\xB9\x91"
+			  "\x3E\x83\x26\x00\x4E\xD5\xBE\xCE"
+			  "\x79\xC4\x3D\xFC\x70\xA0\xAD\x96"
+			  "\xBA\x58\x2A\x1C\xDF\xC2\x3A\xA5"
+			  "\x7C\xB5\x12\x89\xED\xBF\xB6\x09"
+			  "\x13\x4F\x7D\x61\x3C\x5C\x27\xFC"
+			  "\x5D\xE1\x4F\xA1\xEA\xB3\xCA\xB9"
+			  "\xE6\xD0\x97\x81\xDE\xD1\xFB\x8A"
+			  "\x30\xDB\xA3\x5D\xEC\x25\x0B\x86"
+			  "\x71\xC8\xA7\x67\xE8\xBC\x7D\x4C"
+			  "\xAE\x82\xD3\x73\x31\x09\xCB\xB3"
+			  "\x4D\xD4\xC0\x8A\x2B\xFA\xA6\x55"
+			  "\x39\x0A\xBC\x6E\x75\xAB\xC2\xE2"
+			  "\x8A\xF2\x26\xCD\x63\x38\x35\xF7"
+			  "\xAE\x12\x83\xCD\x8A\x9E\x7E\x4C"
+			  "\xFE\x4D\xD7\xCE\x5C\x6E\x4C\xAF"
+			  "\xE3\xCD\x76\xA7\x87\xA1\x54\x7C"
+			  "\xEC\x32\xC7\x83\x2A\xFF\xF8\xEA"
+			  "\x87\xB2\x47\xA3\x9D\xC2\x9C\xA2"
+			  "\xB7\x2C\x7C\x1A\x24\xCB\x88\x61"
+			  "\xFF\xA7\x1A\x16\x01\xDD\x4B\xFC"
+			  "\x2E\xE0\x48\x67\x09\x42\xCC\x91"
+			  "\xBE\x20\x38\xC0\x5E\x3B\x95\x00"
+			  "\xA1\x96\x66\x0B\x8A\xE9\x9E\xF7"
+			  "\x6B\x34\x0A\x51\xC0\x3B\xEB\x71"
+			  "\x07\x97\x38\x4B\x5C\x56\x98\x67"
+			  "\x78\x9C\xD0\x0E\x2B\xB5\x67\x90"
+			  "\x75\xF8\xFE\x6D\x4E\x85\xCC\x0D"
+			  "\x18\x06\x15\x9D\x5A\x10\x13\x37"
+			  "\xA3\xD6\x68\xA2\xDF\x7E\xC7\x12"
+			  "\xC9\x0D\x4D\x91\xB0\x2A\x55\xFF"
+			  "\x6F\x73\x13\xDF\x28\xB5\x2A\x2C"
+			  "\xE4\xFC\x20\xD9\xF1\x7A\x82\xB1"
+			  "\xCB\x57\xB6\x3D\x8C\xF4\x8E\x27"
+			  "\x37\xDC\x35\xF3\x79\x01\x53\xA4"
+			  "\x7B\x37\xDE\x7C\x04\xAE\x50\xDB"
+			  "\x9B\x1E\x8C\x07\xA7\x52\x49\x50"
+			  "\x34\x25\x65\xDD\xA9\x8F\x7E\xBD"
+			  "\x7A\xC9\x36\xAE\xDE\x21\x48\x64"
+			  "\xC2\x02\xBA\xBE\x11\x1E\x3D\x9C"
+			  "\x98\x52\xCC\x04\xBD\x5E\x61\x26"
+			  "\x10\xD3\x21\xD9\x6E\x25\x98\x77"
+			  "\x8E\x98\x63\xF6\xF6\x52\xFB\x13"
+			  "\xAA\x30\xF2\xB9\xA4\x43\x53\x39"
+			  "\x1C\x97\x07\x7E\x6B\xFF\x3D\x43"
+			  "\xA6\x71\x6B\x66\x8F\x58\x3F\x71"
+			  "\x90\x47\x40\x92\xE6\x69\xD1\x96"
+			  "\x34\xB3\x3B\xE5\x43\xE4\xD5\x56"
+			  "\xB2\xE6\x7E\x86\x7A\x12\x17\x5B"
+			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
+			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13",
+		.rlen	= 496,
+	}, { /* Generated with Crypto++ */
+		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
+			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
+			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
+			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
+		.klen	= 32,
+		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
+			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
+		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
+			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
+			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
+			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
+			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
+			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
+			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
+			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
+			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
+			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
+			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
+			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
+			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
+			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
+			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
+			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
+			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
+			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
+			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
+			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
+			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
+			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
+			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
+			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
+			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
+			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
+			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
+			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
+			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
+			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
+			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
+			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
+			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
+			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
+			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
+			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
+			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
+			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
+			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
+			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
+			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
+			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
+			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
+			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
+			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
+			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
+			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
+			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
+			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
+			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
+			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
+			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
+			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
+			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
+			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
+			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
+			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
+			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
+			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
+			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
+			  "\x2B\xC2\x59",
+		.ilen	= 499,
+		.result	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
+			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
+			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
+			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
+			  "\xE0\x4A\x73\x00\x65\xB6\x1A\x0D"
+			  "\x5C\x60\xDF\x34\xDC\x60\x4C\xDF"
+			  "\xB5\x1F\x26\x8C\xDA\xC1\x11\xA8"
+			  "\x80\xFA\x37\x7A\x89\xAA\xAE\x7B"
+			  "\x92\x6E\xB9\xDC\xC9\x62\x4F\x88"
+			  "\x0A\x5D\x97\x2F\x6B\xAC\x03\x7C"
+			  "\x22\xF6\x55\x5A\xFA\x35\xA5\x17"
+			  "\xA1\x5C\x5E\x2B\x63\x2D\xB9\x91"
+			  "\x3E\x83\x26\x00\x4E\xD5\xBE\xCE"
+			  "\x79\xC4\x3D\xFC\x70\xA0\xAD\x96"
+			  "\xBA\x58\x2A\x1C\xDF\xC2\x3A\xA5"
+			  "\x7C\xB5\x12\x89\xED\xBF\xB6\x09"
+			  "\x13\x4F\x7D\x61\x3C\x5C\x27\xFC"
+			  "\x5D\xE1\x4F\xA1\xEA\xB3\xCA\xB9"
+			  "\xE6\xD0\x97\x81\xDE\xD1\xFB\x8A"
+			  "\x30\xDB\xA3\x5D\xEC\x25\x0B\x86"
+			  "\x71\xC8\xA7\x67\xE8\xBC\x7D\x4C"
+			  "\xAE\x82\xD3\x73\x31\x09\xCB\xB3"
+			  "\x4D\xD4\xC0\x8A\x2B\xFA\xA6\x55"
+			  "\x39\x0A\xBC\x6E\x75\xAB\xC2\xE2"
+			  "\x8A\xF2\x26\xCD\x63\x38\x35\xF7"
+			  "\xAE\x12\x83\xCD\x8A\x9E\x7E\x4C"
+			  "\xFE\x4D\xD7\xCE\x5C\x6E\x4C\xAF"
+			  "\xE3\xCD\x76\xA7\x87\xA1\x54\x7C"
+			  "\xEC\x32\xC7\x83\x2A\xFF\xF8\xEA"
+			  "\x87\xB2\x47\xA3\x9D\xC2\x9C\xA2"
+			  "\xB7\x2C\x7C\x1A\x24\xCB\x88\x61"
+			  "\xFF\xA7\x1A\x16\x01\xDD\x4B\xFC"
+			  "\x2E\xE0\x48\x67\x09\x42\xCC\x91"
+			  "\xBE\x20\x38\xC0\x5E\x3B\x95\x00"
+			  "\xA1\x96\x66\x0B\x8A\xE9\x9E\xF7"
+			  "\x6B\x34\x0A\x51\xC0\x3B\xEB\x71"
+			  "\x07\x97\x38\x4B\x5C\x56\x98\x67"
+			  "\x78\x9C\xD0\x0E\x2B\xB5\x67\x90"
+			  "\x75\xF8\xFE\x6D\x4E\x85\xCC\x0D"
+			  "\x18\x06\x15\x9D\x5A\x10\x13\x37"
+			  "\xA3\xD6\x68\xA2\xDF\x7E\xC7\x12"
+			  "\xC9\x0D\x4D\x91\xB0\x2A\x55\xFF"
+			  "\x6F\x73\x13\xDF\x28\xB5\x2A\x2C"
+			  "\xE4\xFC\x20\xD9\xF1\x7A\x82\xB1"
+			  "\xCB\x57\xB6\x3D\x8C\xF4\x8E\x27"
+			  "\x37\xDC\x35\xF3\x79\x01\x53\xA4"
+			  "\x7B\x37\xDE\x7C\x04\xAE\x50\xDB"
+			  "\x9B\x1E\x8C\x07\xA7\x52\x49\x50"
+			  "\x34\x25\x65\xDD\xA9\x8F\x7E\xBD"
+			  "\x7A\xC9\x36\xAE\xDE\x21\x48\x64"
+			  "\xC2\x02\xBA\xBE\x11\x1E\x3D\x9C"
+			  "\x98\x52\xCC\x04\xBD\x5E\x61\x26"
+			  "\x10\xD3\x21\xD9\x6E\x25\x98\x77"
+			  "\x8E\x98\x63\xF6\xF6\x52\xFB\x13"
+			  "\xAA\x30\xF2\xB9\xA4\x43\x53\x39"
+			  "\x1C\x97\x07\x7E\x6B\xFF\x3D\x43"
+			  "\xA6\x71\x6B\x66\x8F\x58\x3F\x71"
+			  "\x90\x47\x40\x92\xE6\x69\xD1\x96"
+			  "\x34\xB3\x3B\xE5\x43\xE4\xD5\x56"
+			  "\xB2\xE6\x7E\x86\x7A\x12\x17\x5B"
+			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
+			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13"
+			  "\x38\xE2\xE5",
+		.rlen	= 499,
 		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
-	{ /* Generated with Crypto++ */
+		.np	= 2,
+		.tap	= { 499 - 16, 16 },
+	}, { /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
 			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
 			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
 		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
+		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
+			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
 		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
@@ -12383,7 +12915,82 @@ static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
 		.ilen	= 496,
-		.result	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
+		.result	= "\x06\x9A\xF8\xB4\x53\x88\x62\xFC"
+			  "\x68\xB8\x2E\xDF\xC1\x05\x0F\x3D"
+			  "\xAF\x4D\x95\xAE\xC4\xE9\x1C\xDC"
+			  "\xF6\x2B\x8F\x90\x89\xF6\x7E\x1A"
+			  "\xA6\xB9\xE4\xF4\xFA\xCA\xE5\x7E"
+			  "\x71\x28\x06\x4F\xE8\x08\x39\xDA"
+			  "\xA5\x0E\xC8\xC0\xB8\x16\xE5\x69"
+			  "\xE5\xCA\xEC\x4F\x63\x2C\xC0\x9B"
+			  "\x9F\x3E\x39\x79\xF0\xCD\x64\x35"
+			  "\x4A\xD3\xC8\xA9\x31\xCD\x48\x5B"
+			  "\x92\x3D\x8F\x3F\x96\xBD\xB3\x18"
+			  "\x74\x2A\x5D\x29\x3F\x57\x8F\xE2"
+			  "\x67\x9A\xE0\xE5\xD4\x4A\xE2\x47"
+			  "\xBC\xF6\xEB\x14\xF3\x8C\x20\xC2"
+			  "\x7D\xE2\x43\x81\x86\x72\x2E\xB1"
+			  "\x39\xF6\x95\xE1\x1F\xCB\x76\x33"
+			  "\x5B\x7D\x23\x0F\x3A\x67\x2A\x2F"
+			  "\xB9\x37\x9D\xDD\x1F\x16\xA1\x3C"
+			  "\x70\xFE\x52\xAA\x93\x3C\xC4\x46"
+			  "\xB1\xE5\xFF\xDA\xAF\xE2\x84\xFE"
+			  "\x25\x92\xB2\x63\xBD\x49\x77\xB4"
+			  "\x22\xA4\x6A\xD5\x04\xE0\x45\x58"
+			  "\x1C\x34\x96\x7C\x03\x0C\x13\xA2"
+			  "\x05\x22\xE2\xCB\x5A\x35\x03\x09"
+			  "\x40\xD2\x82\x05\xCA\x58\x73\xF2"
+			  "\x29\x5E\x01\x47\x13\x32\x78\xBE"
+			  "\x06\xB0\x51\xDB\x6C\x31\xA0\x1C"
+			  "\x74\xBC\x8D\x25\xDF\xF8\x65\xD1"
+			  "\x38\x35\x11\x26\x4A\xB4\x06\x32"
+			  "\xFA\xD2\x07\x77\xB3\x74\x98\x80"
+			  "\x61\x59\xA8\x9F\xF3\x6F\x2A\xBF"
+			  "\xE6\xA5\x9A\xC4\x6B\xA6\x49\x6F"
+			  "\xBC\x47\xD9\xFB\xC6\xEF\x25\x65"
+			  "\x96\xAC\x9F\xE4\x81\x4B\xD8\xBA"
+			  "\xD6\x9B\xC9\x6D\x58\x40\x81\x02"
+			  "\x73\x44\x4E\x43\x6E\x37\xBB\x11"
+			  "\xE3\xF9\xB8\x2F\xEC\x76\x34\xEA"
+			  "\x90\xCD\xB7\x2E\x0E\x32\x71\xE8"
+			  "\xBB\x4E\x0B\x98\xA4\x17\x17\x5B"
+			  "\x07\xB5\x82\x3A\xC4\xE8\x42\x51"
+			  "\x5A\x4C\x4E\x7D\xBF\xC4\xC0\x4F"
+			  "\x68\xB8\xC6\x4A\x32\x6F\x0B\xD7"
+			  "\x85\xED\x6B\xFB\x72\xD2\xA5\x8F"
+			  "\xBF\xF9\xAC\x59\x50\xA8\x08\x70"
+			  "\xEC\xBD\x0A\xBF\xE5\x87\xA1\xC2"
+			  "\x92\x14\x78\xAF\xE8\xEA\x2E\xDD"
+			  "\xC1\x03\x9A\xAA\x89\x8B\x32\x46"
+			  "\x5B\x18\x27\xBA\x46\xAA\x64\xDE"
+			  "\xE3\xD5\xA3\xFC\x7B\x5B\x61\xDB"
+			  "\x7E\xDA\xEC\x30\x17\x19\xF8\x80"
+			  "\xB5\x5E\x27\xB5\x37\x3A\x1F\x28"
+			  "\x07\x73\xC3\x63\xCE\xFF\x8C\xFE"
+			  "\x81\x4E\xF8\x24\xF3\xB8\xC7\xE8"
+			  "\x16\x9A\xCC\x58\x2F\x88\x1C\x4B"
+			  "\xBB\x33\xA2\x73\xF0\x1C\x89\x0E"
+			  "\xDC\x34\x27\x89\x98\xCE\x1C\xA2"
+			  "\xD8\xB8\x90\xBE\xEC\x72\x28\x13"
+			  "\xAC\x7B\xF1\xD0\x7F\x7A\x28\x50"
+			  "\xB7\x99\x65\x8A\xC9\xC6\x21\x34"
+			  "\x7F\x67\x9D\xB7\x2C\xCC\xF5\x17"
+			  "\x2B\x89\xAC\xB0\xD7\x1E\x47\xB0"
+			  "\x61\xAF\xD4\x63\x6D\xB8\x2D\x20",
+		.rlen	= 496,
+	},
+};
+
+static const struct cipher_testvec serpent_ctr_dec_tv_template[] = {
+	{ /* Generated with Crypto++ */
+		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
+			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
+			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
+			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
+		.klen	= 32,
+		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
+			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
+		.input	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
 			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
 			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
 			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
@@ -12445,16 +13052,8 @@ static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
 			  "\xB2\xE6\x7E\x86\x7A\x12\x17\x5B"
 			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
 			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13",
-		.rlen	= 496,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ilen	= 496,
+		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -12515,10 +13114,17 @@ static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
 			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59",
-		.ilen	= 499,
-		.result	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
+		.rlen	= 496,
+	}, { /* Generated with Crypto++ */
+		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
+			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
+			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
+			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
+		.klen	= 32,
+		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
+			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
+		.input	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
 			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
 			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
 			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
@@ -12581,19 +13187,8 @@ static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
 			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
 			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13"
 			  "\x38\xE2\xE5",
-		.rlen	= 499,
-		.also_non_np = 1,
-		.np	= 2,
-		.tap	= { 499 - 16, 16 },
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
-			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ilen	= 499,
+		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -12654,9 +13249,21 @@ static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
 			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\x06\x9A\xF8\xB4\x53\x88\x62\xFC"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
+			  "\x2B\xC2\x59",
+		.rlen	= 499,
+		.also_non_np = 1,
+		.np	= 2,
+		.tap	= { 499 - 16, 16 },
+	}, { /* Generated with Crypto++ */
+		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
+			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
+			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
+			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
+		.klen	= 32,
+		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
+			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
+		.input	= "\x06\x9A\xF8\xB4\x53\x88\x62\xFC"
 			  "\x68\xB8\x2E\xDF\xC1\x05\x0F\x3D"
 			  "\xAF\x4D\x95\xAE\xC4\xE9\x1C\xDC"
 			  "\xF6\x2B\x8F\x90\x89\xF6\x7E\x1A"
@@ -12718,81 +13325,6 @@ static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
 			  "\x7F\x67\x9D\xB7\x2C\xCC\xF5\x17"
 			  "\x2B\x89\xAC\xB0\xD7\x1E\x47\xB0"
 			  "\x61\xAF\xD4\x63\x6D\xB8\x2D\x20",
-		.rlen	= 496,
-	},
-};
-
-static const struct cipher_testvec serpent_ctr_dec_tv_template[] = {
-	{ /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
-			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
-			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
-			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
-			  "\xE0\x4A\x73\x00\x65\xB6\x1A\x0D"
-			  "\x5C\x60\xDF\x34\xDC\x60\x4C\xDF"
-			  "\xB5\x1F\x26\x8C\xDA\xC1\x11\xA8"
-			  "\x80\xFA\x37\x7A\x89\xAA\xAE\x7B"
-			  "\x92\x6E\xB9\xDC\xC9\x62\x4F\x88"
-			  "\x0A\x5D\x97\x2F\x6B\xAC\x03\x7C"
-			  "\x22\xF6\x55\x5A\xFA\x35\xA5\x17"
-			  "\xA1\x5C\x5E\x2B\x63\x2D\xB9\x91"
-			  "\x3E\x83\x26\x00\x4E\xD5\xBE\xCE"
-			  "\x79\xC4\x3D\xFC\x70\xA0\xAD\x96"
-			  "\xBA\x58\x2A\x1C\xDF\xC2\x3A\xA5"
-			  "\x7C\xB5\x12\x89\xED\xBF\xB6\x09"
-			  "\x13\x4F\x7D\x61\x3C\x5C\x27\xFC"
-			  "\x5D\xE1\x4F\xA1\xEA\xB3\xCA\xB9"
-			  "\xE6\xD0\x97\x81\xDE\xD1\xFB\x8A"
-			  "\x30\xDB\xA3\x5D\xEC\x25\x0B\x86"
-			  "\x71\xC8\xA7\x67\xE8\xBC\x7D\x4C"
-			  "\xAE\x82\xD3\x73\x31\x09\xCB\xB3"
-			  "\x4D\xD4\xC0\x8A\x2B\xFA\xA6\x55"
-			  "\x39\x0A\xBC\x6E\x75\xAB\xC2\xE2"
-			  "\x8A\xF2\x26\xCD\x63\x38\x35\xF7"
-			  "\xAE\x12\x83\xCD\x8A\x9E\x7E\x4C"
-			  "\xFE\x4D\xD7\xCE\x5C\x6E\x4C\xAF"
-			  "\xE3\xCD\x76\xA7\x87\xA1\x54\x7C"
-			  "\xEC\x32\xC7\x83\x2A\xFF\xF8\xEA"
-			  "\x87\xB2\x47\xA3\x9D\xC2\x9C\xA2"
-			  "\xB7\x2C\x7C\x1A\x24\xCB\x88\x61"
-			  "\xFF\xA7\x1A\x16\x01\xDD\x4B\xFC"
-			  "\x2E\xE0\x48\x67\x09\x42\xCC\x91"
-			  "\xBE\x20\x38\xC0\x5E\x3B\x95\x00"
-			  "\xA1\x96\x66\x0B\x8A\xE9\x9E\xF7"
-			  "\x6B\x34\x0A\x51\xC0\x3B\xEB\x71"
-			  "\x07\x97\x38\x4B\x5C\x56\x98\x67"
-			  "\x78\x9C\xD0\x0E\x2B\xB5\x67\x90"
-			  "\x75\xF8\xFE\x6D\x4E\x85\xCC\x0D"
-			  "\x18\x06\x15\x9D\x5A\x10\x13\x37"
-			  "\xA3\xD6\x68\xA2\xDF\x7E\xC7\x12"
-			  "\xC9\x0D\x4D\x91\xB0\x2A\x55\xFF"
-			  "\x6F\x73\x13\xDF\x28\xB5\x2A\x2C"
-			  "\xE4\xFC\x20\xD9\xF1\x7A\x82\xB1"
-			  "\xCB\x57\xB6\x3D\x8C\xF4\x8E\x27"
-			  "\x37\xDC\x35\xF3\x79\x01\x53\xA4"
-			  "\x7B\x37\xDE\x7C\x04\xAE\x50\xDB"
-			  "\x9B\x1E\x8C\x07\xA7\x52\x49\x50"
-			  "\x34\x25\x65\xDD\xA9\x8F\x7E\xBD"
-			  "\x7A\xC9\x36\xAE\xDE\x21\x48\x64"
-			  "\xC2\x02\xBA\xBE\x11\x1E\x3D\x9C"
-			  "\x98\x52\xCC\x04\xBD\x5E\x61\x26"
-			  "\x10\xD3\x21\xD9\x6E\x25\x98\x77"
-			  "\x8E\x98\x63\xF6\xF6\x52\xFB\x13"
-			  "\xAA\x30\xF2\xB9\xA4\x43\x53\x39"
-			  "\x1C\x97\x07\x7E\x6B\xFF\x3D\x43"
-			  "\xA6\x71\x6B\x66\x8F\x58\x3F\x71"
-			  "\x90\x47\x40\x92\xE6\x69\xD1\x96"
-			  "\x34\xB3\x3B\xE5\x43\xE4\xD5\x56"
-			  "\xB2\xE6\x7E\x86\x7A\x12\x17\x5B"
-			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
-			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13",
 		.ilen	= 496,
 		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
@@ -12857,795 +13389,2156 @@ static const struct cipher_testvec serpent_ctr_dec_tv_template[] = {
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
 		.rlen	= 496,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
+	},
+};
+
+static const struct cipher_testvec serpent_lrw_enc_tv_template[] = {
+	/* Generated from AES-LRW test vectors */
+	{
+		.key	= "\x45\x62\xac\x25\xf8\x28\x17\x6d"
+			  "\x4c\x26\x84\x14\xb5\x68\x01\x85"
+			  "\x25\x8e\x2a\x05\xe7\x3e\x9d\x03"
+			  "\xee\x5a\x83\x0c\xcc\x09\x4c\x87",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ilen	= 16,
+		.result	= "\x6f\xbf\xd4\xa4\x5d\x71\x16\x79"
+			  "\x63\x9c\xa6\x8e\x40\xbe\x0d\x8a",
+		.rlen	= 16,
+	}, {
+		.key	= "\x59\x70\x47\x14\xf5\x57\x47\x8c"
+			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
+			  "\x0d\x48\xf0\xb7\xb1\x5a\x53\xea"
+			  "\x1c\xaa\x6b\x29\xc2\xca\xfb\xaf",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x02",
+		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ilen	= 16,
+		.result	= "\xfd\xb2\x66\x98\x80\x96\x55\xad"
+			  "\x08\x94\x54\x9c\x21\x7c\x69\xe3",
+		.rlen	= 16,
+	}, {
+		.key	= "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
+			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
+			  "\xcd\xf9\x0b\x16\x0c\x64\x8f\xb6"
+			  "\xb0\x0d\x0d\x1b\xae\x85\x87\x1f",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x00\x00\x00\x00",
+		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ilen	= 16,
+		.result	= "\x14\x5e\x3d\x70\xc0\x6e\x9c\x34"
+			  "\x5b\x5e\xcf\x0f\xe4\x8c\x21\x5c",
+		.rlen	= 16,
+	}, {
+		.key	= "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
+			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
+			  "\xca\xc6\xbc\x35\x4d\x4a\x65\x54"
+			  "\x90\xae\x61\xcf\x7b\xae\xbd\xcc"
+			  "\xad\xe4\x94\xc5\x4a\x29\xae\x70",
+		.klen	= 40,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ilen	= 16,
+		.result	= "\x25\x39\xaa\xa5\xf0\x65\xc8\xdc"
+			  "\x5d\x45\x95\x30\x8f\xff\x2f\x1b",
+		.rlen	= 16,
+	}, {
+		.key	= "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
+			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
+			  "\xa0\x19\x07\xc0\x9d\xf7\xbb\xdd"
+			  "\x52\x13\xb2\xb7\xf0\xff\x11\xd8"
+			  "\xd6\x08\xd0\xcd\x2e\xb1\x17\x6f",
+		.klen	= 40,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x00\x00\x00\x00",
+		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ilen	= 16,
+		.result	= "\x0c\x20\x20\x63\xd6\x8b\xfc\x8f"
+			  "\xc0\xe2\x17\xbb\xd2\x59\x6f\x26",
+		.rlen	= 16,
+	}, {
+		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
+			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
+			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
+			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
+			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
+			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
+		.klen	= 48,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ilen	= 16,
+		.result	= "\xc1\x35\x2e\x53\xf0\x96\x4d\x9c"
+			  "\x2e\x18\xe6\x99\xcd\xd3\x15\x68",
+		.rlen	= 16,
+	}, {
+		.key	= "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
+			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
+			  "\xb2\xfb\x64\xce\x60\x97\x87\x8d"
+			  "\x17\xfc\xe4\x5a\x49\xe8\x30\xb7"
+			  "\x6e\x78\x17\xe7\x2d\x5e\x12\xd4"
+			  "\x60\x64\x04\x7a\xf1\x2f\x9e\x0c",
+		.klen	= 48,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x00\x00\x00\x00",
+		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ilen	= 16,
+		.result	= "\x86\x0a\xc6\xa9\x1a\x9f\xe7\xe6"
+			  "\x64\x3b\x33\xd6\xd5\x84\xd6\xdf",
+		.rlen	= 16,
+	}, {
+		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
+			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
+			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
+			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
+			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
+			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
+		.klen	= 48,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
+			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
+			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
+			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
+			  "\x8f\xcb\x8d\x6b\xe3\xa6\x29\x90"
+			  "\xfe\x2a\x62\x82\xae\x6d\x8b\xf6"
+			  "\xad\x1e\x9e\x20\x5f\x38\xbe\x04"
+			  "\xda\x10\x8e\xed\xa2\xa4\x87\xab"
+			  "\xda\x6b\xb4\x0c\x75\xba\xd3\x7c"
+			  "\xc9\xac\x42\x31\x95\x7c\xc9\x04"
+			  "\xeb\xd5\x6e\x32\x69\x8a\xdb\xa6"
+			  "\x15\xd7\x3f\x4f\x2f\x66\x69\x03"
+			  "\x9c\x1f\x54\x0f\xde\x1f\xf3\x65"
+			  "\x4c\x96\x12\xed\x7c\x92\x03\x01"
+			  "\x6f\xbc\x35\x93\xac\xf1\x27\xf1"
+			  "\xb4\x96\x82\x5a\x5f\xb0\xa0\x50"
+			  "\x89\xa4\x8e\x66\x44\x85\xcc\xfd"
+			  "\x33\x14\x70\xe3\x96\xb2\xc3\xd3"
+			  "\xbb\x54\x5a\x1a\xf9\x74\xa2\xc5"
+			  "\x2d\x64\x75\xdd\xb4\x54\xe6\x74"
+			  "\x8c\xd3\x9d\x9e\x86\xab\x51\x53"
+			  "\xb7\x93\x3e\x6f\xd0\x4e\x2c\x40"
+			  "\xf6\xa8\x2e\x3e\x9d\xf4\x66\xa5"
+			  "\x76\x12\x73\x44\x1a\x56\xd7\x72"
+			  "\x88\xcd\x21\x8c\x4c\x0f\xfe\xda"
+			  "\x95\xe0\x3a\xa6\xa5\x84\x46\xcd"
+			  "\xd5\x3e\x9d\x3a\xe2\x67\xe6\x60"
+			  "\x1a\xe2\x70\x85\x58\xc2\x1b\x09"
+			  "\xe1\xd7\x2c\xca\xad\xa8\x8f\xf9"
+			  "\xac\xb3\x0e\xdb\xca\x2e\xe2\xb8"
+			  "\x51\x71\xd9\x3c\x6c\xf1\x56\xf8"
+			  "\xea\x9c\xf1\xfb\x0c\xe6\xb7\x10"
+			  "\x1c\xf8\xa9\x7c\xe8\x53\x35\xc1"
+			  "\x90\x3e\x76\x4a\x74\xa4\x21\x2c"
+			  "\xf6\x2c\x4e\x0f\x94\x3a\x88\x2e"
+			  "\x41\x09\x6a\x33\x7d\xf6\xdd\x3f"
+			  "\x8d\x23\x31\x74\x84\xeb\x88\x6e"
+			  "\xcc\xb9\xbc\x22\x83\x19\x07\x22"
+			  "\xa5\x2d\xdf\xa5\xf3\x80\x85\x78"
+			  "\x84\x39\x6a\x6d\x6a\x99\x4f\xa5"
+			  "\x15\xfe\x46\xb0\xe4\x6c\xa5\x41"
+			  "\x3c\xce\x8f\x42\x60\x71\xa7\x75"
+			  "\x08\x40\x65\x8a\x82\xbf\xf5\x43"
+			  "\x71\x96\xa9\x4d\x44\x8a\x20\xbe"
+			  "\xfa\x4d\xbb\xc0\x7d\x31\x96\x65"
+			  "\xe7\x75\xe5\x3e\xfd\x92\x3b\xc9"
+			  "\x55\xbb\x16\x7e\xf7\xc2\x8c\xa4"
+			  "\x40\x1d\xe5\xef\x0e\xdf\xe4\x9a"
+			  "\x62\x73\x65\xfd\x46\x63\x25\x3d"
+			  "\x2b\xaf\xe5\x64\xfe\xa5\x5c\xcf"
+			  "\x24\xf3\xb4\xac\x64\xba\xdf\x4b"
+			  "\xc6\x96\x7d\x81\x2d\x8d\x97\xf7"
+			  "\xc5\x68\x77\x84\x32\x2b\xcc\x85"
+			  "\x74\x96\xf0\x12\x77\x61\xb9\xeb"
+			  "\x71\xaa\x82\xcb\x1c\xdb\x89\xc8"
+			  "\xc6\xb5\xe3\x5c\x7d\x39\x07\x24"
+			  "\xda\x39\x87\x45\xc0\x2b\xbb\x01"
+			  "\xac\xbc\x2a\x5c\x7f\xfc\xe8\xce"
+			  "\x6d\x9c\x6f\xed\xd3\xc1\xa1\xd6"
+			  "\xc5\x55\xa9\x66\x2f\xe1\xc8\x32"
+			  "\xa6\x5d\xa4\x3a\x98\x73\xe8\x45"
+			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
+			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
+			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
+		.ilen	= 512,
+		.result	= "\xe3\x5a\x38\x0f\x4d\x92\x3a\x74"
+			  "\x15\xb1\x50\x8c\x9a\xd8\x99\x1d"
+			  "\x82\xec\xf1\x5f\x03\x6d\x02\x58"
+			  "\x90\x67\xfc\xdd\x8d\xe1\x38\x08"
+			  "\x7b\xc9\x9b\x4b\x04\x09\x50\x15"
+			  "\xce\xab\xda\x33\x30\x20\x12\xfa"
+			  "\x83\xc4\xa6\x9a\x2e\x7d\x90\xd9"
+			  "\xa6\xa6\x67\x43\xb4\xa7\xa8\x5c"
+			  "\xbb\x6a\x49\x2b\x8b\xf8\xd0\x22"
+			  "\xe5\x9e\xba\xe8\x8c\x67\xb8\x5b"
+			  "\x60\xbc\xf5\xa4\x95\x4e\x66\xe5"
+			  "\x6d\x8e\xa9\xf6\x65\x2e\x04\xf5"
+			  "\xba\xb5\xdb\x88\xc2\xf6\x7a\x4b"
+			  "\x89\x58\x7c\x9a\xae\x26\xe8\xb7"
+			  "\xb7\x28\xcc\xd6\xcc\xa5\x98\x4d"
+			  "\xb9\x91\xcb\xb4\xe4\x8b\x96\x47"
+			  "\x5f\x03\x8b\xdd\x94\xd1\xee\x12"
+			  "\xa7\x83\x80\xf2\xc1\x15\x74\x4f"
+			  "\x49\xf9\xb0\x7e\x6f\xdc\x73\x2f"
+			  "\xe2\xcf\xe0\x1b\x34\xa5\xa0\x52"
+			  "\xfb\x3c\x5d\x85\x91\xe6\x6d\x98"
+			  "\x04\xd6\xdd\x4c\x00\x64\xd9\x54"
+			  "\x5c\x3c\x08\x1d\x4c\x06\x9f\xb8"
+			  "\x1c\x4d\x8d\xdc\xa4\x3c\xb9\x3b"
+			  "\x9e\x85\xce\xc3\xa8\x4a\x0c\xd9"
+			  "\x04\xc3\x6f\x17\x66\xa9\x1f\x59"
+			  "\xd9\xe2\x19\x36\xa3\x88\xb8\x0b"
+			  "\x0f\x4a\x4d\xf8\xc8\x6f\xd5\x43"
+			  "\xeb\xa0\xab\x1f\x61\xc0\x06\xeb"
+			  "\x93\xb7\xb8\x6f\x0d\xbd\x07\x49"
+			  "\xb3\xac\x5d\xcf\x31\xa0\x27\x26"
+			  "\x21\xbe\x94\x2e\x19\xea\xf4\xee"
+			  "\xb5\x13\x89\xf7\x94\x0b\xef\x59"
+			  "\x44\xc5\x78\x8b\x3c\x3b\x71\x20"
+			  "\xf9\x35\x0c\x70\x74\xdc\x5b\xc2"
+			  "\xb4\x11\x0e\x2c\x61\xa1\x52\x46"
+			  "\x18\x11\x16\xc6\x86\x44\xa7\xaf"
+			  "\xd5\x0c\x7d\xa6\x9e\x25\x2d\x1b"
+			  "\x9a\x8f\x0f\xf8\x6a\x61\xa0\xea"
+			  "\x3f\x0e\x90\xd6\x8f\x83\x30\x64"
+			  "\xb5\x51\x2d\x08\x3c\xcd\x99\x36"
+			  "\x96\xd4\xb1\xb5\x48\x30\xca\x48"
+			  "\xf7\x11\xa8\xf5\x97\x8a\x6a\x6d"
+			  "\x12\x33\x2f\xc0\xe8\xda\xec\x8a"
+			  "\xe1\x88\x72\x63\xde\x20\xa3\xe1"
+			  "\x8e\xac\x84\x37\x35\xf5\xf7\x3f"
+			  "\x00\x02\x0e\xe4\xc1\x53\x68\x3f"
+			  "\xaa\xd5\xac\x52\x3d\x20\x2f\x4d"
+			  "\x7c\x83\xd0\xbd\xaa\x97\x35\x36"
+			  "\x98\x88\x59\x5d\xe7\x24\xe3\x90"
+			  "\x9d\x30\x47\xa7\xc3\x60\x35\xf4"
+			  "\xd5\xdb\x0e\x4d\x44\xc1\x81\x8b"
+			  "\xfd\xbd\xc3\x2b\xba\x68\xfe\x8d"
+			  "\x49\x5a\x3c\x8a\xa3\x01\xae\x25"
+			  "\x42\xab\xd2\x87\x1b\x35\xd6\xd2"
+			  "\xd7\x70\x1c\x1f\x72\xd1\xe1\x39"
+			  "\x1c\x58\xa2\xb4\xd0\x78\x55\x72"
+			  "\x76\x59\xea\xd9\xd7\x6e\x63\x8b"
+			  "\xcc\x9b\xa7\x74\x89\xfc\xa3\x68"
+			  "\x86\x28\xd1\xbb\x54\x8d\x66\xad"
+			  "\x2a\x92\xf9\x4e\x04\x3d\xae\xfd"
+			  "\x1b\x2b\x7f\xc3\x2f\x1a\x78\x0a"
+			  "\x5c\xc6\x84\xfe\x7c\xcb\x26\xfd"
+			  "\xd9\x51\x0f\xd7\x94\x2f\xc5\xa7",
+		.rlen	= 512,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 512 - 20, 4, 16 },
+	},
+};
+
+static const struct cipher_testvec serpent_lrw_dec_tv_template[] = {
+	/* Generated from AES-LRW test vectors */
+	/* same as enc vectors with input and result reversed */
+	{
+		.key	= "\x45\x62\xac\x25\xf8\x28\x17\x6d"
+			  "\x4c\x26\x84\x14\xb5\x68\x01\x85"
+			  "\x25\x8e\x2a\x05\xe7\x3e\x9d\x03"
+			  "\xee\x5a\x83\x0c\xcc\x09\x4c\x87",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\x6f\xbf\xd4\xa4\x5d\x71\x16\x79"
+			  "\x63\x9c\xa6\x8e\x40\xbe\x0d\x8a",
+		.ilen	= 16,
+		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\x59\x70\x47\x14\xf5\x57\x47\x8c"
+			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
+			  "\x0d\x48\xf0\xb7\xb1\x5a\x53\xea"
+			  "\x1c\xaa\x6b\x29\xc2\xca\xfb\xaf",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x02",
+		.input	= "\xfd\xb2\x66\x98\x80\x96\x55\xad"
+			  "\x08\x94\x54\x9c\x21\x7c\x69\xe3",
+		.ilen	= 16,
+		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
+			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
+			  "\xcd\xf9\x0b\x16\x0c\x64\x8f\xb6"
+			  "\xb0\x0d\x0d\x1b\xae\x85\x87\x1f",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x00\x00\x00\x00",
+		.input	= "\x14\x5e\x3d\x70\xc0\x6e\x9c\x34"
+			  "\x5b\x5e\xcf\x0f\xe4\x8c\x21\x5c",
+		.ilen	= 16,
+		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
+			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
+			  "\xca\xc6\xbc\x35\x4d\x4a\x65\x54"
+			  "\x90\xae\x61\xcf\x7b\xae\xbd\xcc"
+			  "\xad\xe4\x94\xc5\x4a\x29\xae\x70",
+		.klen	= 40,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\x25\x39\xaa\xa5\xf0\x65\xc8\xdc"
+			  "\x5d\x45\x95\x30\x8f\xff\x2f\x1b",
+		.ilen	= 16,
+		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
+			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
+			  "\xa0\x19\x07\xc0\x9d\xf7\xbb\xdd"
+			  "\x52\x13\xb2\xb7\xf0\xff\x11\xd8"
+			  "\xd6\x08\xd0\xcd\x2e\xb1\x17\x6f",
+		.klen	= 40,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x00\x00\x00\x00",
+		.input	= "\x0c\x20\x20\x63\xd6\x8b\xfc\x8f"
+			  "\xc0\xe2\x17\xbb\xd2\x59\x6f\x26",
+		.ilen	= 16,
+		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
+			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
+			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
+			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
+			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
+			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
+		.klen	= 48,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\xc1\x35\x2e\x53\xf0\x96\x4d\x9c"
+			  "\x2e\x18\xe6\x99\xcd\xd3\x15\x68",
+		.ilen	= 16,
+		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
+			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
+			  "\xb2\xfb\x64\xce\x60\x97\x87\x8d"
+			  "\x17\xfc\xe4\x5a\x49\xe8\x30\xb7"
+			  "\x6e\x78\x17\xe7\x2d\x5e\x12\xd4"
+			  "\x60\x64\x04\x7a\xf1\x2f\x9e\x0c",
+		.klen	= 48,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x00\x00\x00\x00",
+		.input	= "\x86\x0a\xc6\xa9\x1a\x9f\xe7\xe6"
+			  "\x64\x3b\x33\xd6\xd5\x84\xd6\xdf",
+		.ilen	= 16,
+		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
+			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
+			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
+			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
+			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
+			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
+		.klen	= 48,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\xe3\x5a\x38\x0f\x4d\x92\x3a\x74"
+			  "\x15\xb1\x50\x8c\x9a\xd8\x99\x1d"
+			  "\x82\xec\xf1\x5f\x03\x6d\x02\x58"
+			  "\x90\x67\xfc\xdd\x8d\xe1\x38\x08"
+			  "\x7b\xc9\x9b\x4b\x04\x09\x50\x15"
+			  "\xce\xab\xda\x33\x30\x20\x12\xfa"
+			  "\x83\xc4\xa6\x9a\x2e\x7d\x90\xd9"
+			  "\xa6\xa6\x67\x43\xb4\xa7\xa8\x5c"
+			  "\xbb\x6a\x49\x2b\x8b\xf8\xd0\x22"
+			  "\xe5\x9e\xba\xe8\x8c\x67\xb8\x5b"
+			  "\x60\xbc\xf5\xa4\x95\x4e\x66\xe5"
+			  "\x6d\x8e\xa9\xf6\x65\x2e\x04\xf5"
+			  "\xba\xb5\xdb\x88\xc2\xf6\x7a\x4b"
+			  "\x89\x58\x7c\x9a\xae\x26\xe8\xb7"
+			  "\xb7\x28\xcc\xd6\xcc\xa5\x98\x4d"
+			  "\xb9\x91\xcb\xb4\xe4\x8b\x96\x47"
+			  "\x5f\x03\x8b\xdd\x94\xd1\xee\x12"
+			  "\xa7\x83\x80\xf2\xc1\x15\x74\x4f"
+			  "\x49\xf9\xb0\x7e\x6f\xdc\x73\x2f"
+			  "\xe2\xcf\xe0\x1b\x34\xa5\xa0\x52"
+			  "\xfb\x3c\x5d\x85\x91\xe6\x6d\x98"
+			  "\x04\xd6\xdd\x4c\x00\x64\xd9\x54"
+			  "\x5c\x3c\x08\x1d\x4c\x06\x9f\xb8"
+			  "\x1c\x4d\x8d\xdc\xa4\x3c\xb9\x3b"
+			  "\x9e\x85\xce\xc3\xa8\x4a\x0c\xd9"
+			  "\x04\xc3\x6f\x17\x66\xa9\x1f\x59"
+			  "\xd9\xe2\x19\x36\xa3\x88\xb8\x0b"
+			  "\x0f\x4a\x4d\xf8\xc8\x6f\xd5\x43"
+			  "\xeb\xa0\xab\x1f\x61\xc0\x06\xeb"
+			  "\x93\xb7\xb8\x6f\x0d\xbd\x07\x49"
+			  "\xb3\xac\x5d\xcf\x31\xa0\x27\x26"
+			  "\x21\xbe\x94\x2e\x19\xea\xf4\xee"
+			  "\xb5\x13\x89\xf7\x94\x0b\xef\x59"
+			  "\x44\xc5\x78\x8b\x3c\x3b\x71\x20"
+			  "\xf9\x35\x0c\x70\x74\xdc\x5b\xc2"
+			  "\xb4\x11\x0e\x2c\x61\xa1\x52\x46"
+			  "\x18\x11\x16\xc6\x86\x44\xa7\xaf"
+			  "\xd5\x0c\x7d\xa6\x9e\x25\x2d\x1b"
+			  "\x9a\x8f\x0f\xf8\x6a\x61\xa0\xea"
+			  "\x3f\x0e\x90\xd6\x8f\x83\x30\x64"
+			  "\xb5\x51\x2d\x08\x3c\xcd\x99\x36"
+			  "\x96\xd4\xb1\xb5\x48\x30\xca\x48"
+			  "\xf7\x11\xa8\xf5\x97\x8a\x6a\x6d"
+			  "\x12\x33\x2f\xc0\xe8\xda\xec\x8a"
+			  "\xe1\x88\x72\x63\xde\x20\xa3\xe1"
+			  "\x8e\xac\x84\x37\x35\xf5\xf7\x3f"
+			  "\x00\x02\x0e\xe4\xc1\x53\x68\x3f"
+			  "\xaa\xd5\xac\x52\x3d\x20\x2f\x4d"
+			  "\x7c\x83\xd0\xbd\xaa\x97\x35\x36"
+			  "\x98\x88\x59\x5d\xe7\x24\xe3\x90"
+			  "\x9d\x30\x47\xa7\xc3\x60\x35\xf4"
+			  "\xd5\xdb\x0e\x4d\x44\xc1\x81\x8b"
+			  "\xfd\xbd\xc3\x2b\xba\x68\xfe\x8d"
+			  "\x49\x5a\x3c\x8a\xa3\x01\xae\x25"
+			  "\x42\xab\xd2\x87\x1b\x35\xd6\xd2"
+			  "\xd7\x70\x1c\x1f\x72\xd1\xe1\x39"
+			  "\x1c\x58\xa2\xb4\xd0\x78\x55\x72"
+			  "\x76\x59\xea\xd9\xd7\x6e\x63\x8b"
+			  "\xcc\x9b\xa7\x74\x89\xfc\xa3\x68"
+			  "\x86\x28\xd1\xbb\x54\x8d\x66\xad"
+			  "\x2a\x92\xf9\x4e\x04\x3d\xae\xfd"
+			  "\x1b\x2b\x7f\xc3\x2f\x1a\x78\x0a"
+			  "\x5c\xc6\x84\xfe\x7c\xcb\x26\xfd"
+			  "\xd9\x51\x0f\xd7\x94\x2f\xc5\xa7",
+		.ilen	= 512,
+		.result	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
+			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
+			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
+			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
+			  "\x8f\xcb\x8d\x6b\xe3\xa6\x29\x90"
+			  "\xfe\x2a\x62\x82\xae\x6d\x8b\xf6"
+			  "\xad\x1e\x9e\x20\x5f\x38\xbe\x04"
+			  "\xda\x10\x8e\xed\xa2\xa4\x87\xab"
+			  "\xda\x6b\xb4\x0c\x75\xba\xd3\x7c"
+			  "\xc9\xac\x42\x31\x95\x7c\xc9\x04"
+			  "\xeb\xd5\x6e\x32\x69\x8a\xdb\xa6"
+			  "\x15\xd7\x3f\x4f\x2f\x66\x69\x03"
+			  "\x9c\x1f\x54\x0f\xde\x1f\xf3\x65"
+			  "\x4c\x96\x12\xed\x7c\x92\x03\x01"
+			  "\x6f\xbc\x35\x93\xac\xf1\x27\xf1"
+			  "\xb4\x96\x82\x5a\x5f\xb0\xa0\x50"
+			  "\x89\xa4\x8e\x66\x44\x85\xcc\xfd"
+			  "\x33\x14\x70\xe3\x96\xb2\xc3\xd3"
+			  "\xbb\x54\x5a\x1a\xf9\x74\xa2\xc5"
+			  "\x2d\x64\x75\xdd\xb4\x54\xe6\x74"
+			  "\x8c\xd3\x9d\x9e\x86\xab\x51\x53"
+			  "\xb7\x93\x3e\x6f\xd0\x4e\x2c\x40"
+			  "\xf6\xa8\x2e\x3e\x9d\xf4\x66\xa5"
+			  "\x76\x12\x73\x44\x1a\x56\xd7\x72"
+			  "\x88\xcd\x21\x8c\x4c\x0f\xfe\xda"
+			  "\x95\xe0\x3a\xa6\xa5\x84\x46\xcd"
+			  "\xd5\x3e\x9d\x3a\xe2\x67\xe6\x60"
+			  "\x1a\xe2\x70\x85\x58\xc2\x1b\x09"
+			  "\xe1\xd7\x2c\xca\xad\xa8\x8f\xf9"
+			  "\xac\xb3\x0e\xdb\xca\x2e\xe2\xb8"
+			  "\x51\x71\xd9\x3c\x6c\xf1\x56\xf8"
+			  "\xea\x9c\xf1\xfb\x0c\xe6\xb7\x10"
+			  "\x1c\xf8\xa9\x7c\xe8\x53\x35\xc1"
+			  "\x90\x3e\x76\x4a\x74\xa4\x21\x2c"
+			  "\xf6\x2c\x4e\x0f\x94\x3a\x88\x2e"
+			  "\x41\x09\x6a\x33\x7d\xf6\xdd\x3f"
+			  "\x8d\x23\x31\x74\x84\xeb\x88\x6e"
+			  "\xcc\xb9\xbc\x22\x83\x19\x07\x22"
+			  "\xa5\x2d\xdf\xa5\xf3\x80\x85\x78"
+			  "\x84\x39\x6a\x6d\x6a\x99\x4f\xa5"
+			  "\x15\xfe\x46\xb0\xe4\x6c\xa5\x41"
+			  "\x3c\xce\x8f\x42\x60\x71\xa7\x75"
+			  "\x08\x40\x65\x8a\x82\xbf\xf5\x43"
+			  "\x71\x96\xa9\x4d\x44\x8a\x20\xbe"
+			  "\xfa\x4d\xbb\xc0\x7d\x31\x96\x65"
+			  "\xe7\x75\xe5\x3e\xfd\x92\x3b\xc9"
+			  "\x55\xbb\x16\x7e\xf7\xc2\x8c\xa4"
+			  "\x40\x1d\xe5\xef\x0e\xdf\xe4\x9a"
+			  "\x62\x73\x65\xfd\x46\x63\x25\x3d"
+			  "\x2b\xaf\xe5\x64\xfe\xa5\x5c\xcf"
+			  "\x24\xf3\xb4\xac\x64\xba\xdf\x4b"
+			  "\xc6\x96\x7d\x81\x2d\x8d\x97\xf7"
+			  "\xc5\x68\x77\x84\x32\x2b\xcc\x85"
+			  "\x74\x96\xf0\x12\x77\x61\xb9\xeb"
+			  "\x71\xaa\x82\xcb\x1c\xdb\x89\xc8"
+			  "\xc6\xb5\xe3\x5c\x7d\x39\x07\x24"
+			  "\xda\x39\x87\x45\xc0\x2b\xbb\x01"
+			  "\xac\xbc\x2a\x5c\x7f\xfc\xe8\xce"
+			  "\x6d\x9c\x6f\xed\xd3\xc1\xa1\xd6"
+			  "\xc5\x55\xa9\x66\x2f\xe1\xc8\x32"
+			  "\xa6\x5d\xa4\x3a\x98\x73\xe8\x45"
+			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
+			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
+			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
+		.rlen	= 512,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 512 - 20, 4, 16 },
+	},
+};
+
+static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
+	/* Generated from AES-XTS test vectors */
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
-			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
-			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
-			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
-			  "\xE0\x4A\x73\x00\x65\xB6\x1A\x0D"
-			  "\x5C\x60\xDF\x34\xDC\x60\x4C\xDF"
-			  "\xB5\x1F\x26\x8C\xDA\xC1\x11\xA8"
-			  "\x80\xFA\x37\x7A\x89\xAA\xAE\x7B"
-			  "\x92\x6E\xB9\xDC\xC9\x62\x4F\x88"
-			  "\x0A\x5D\x97\x2F\x6B\xAC\x03\x7C"
-			  "\x22\xF6\x55\x5A\xFA\x35\xA5\x17"
-			  "\xA1\x5C\x5E\x2B\x63\x2D\xB9\x91"
-			  "\x3E\x83\x26\x00\x4E\xD5\xBE\xCE"
-			  "\x79\xC4\x3D\xFC\x70\xA0\xAD\x96"
-			  "\xBA\x58\x2A\x1C\xDF\xC2\x3A\xA5"
-			  "\x7C\xB5\x12\x89\xED\xBF\xB6\x09"
-			  "\x13\x4F\x7D\x61\x3C\x5C\x27\xFC"
-			  "\x5D\xE1\x4F\xA1\xEA\xB3\xCA\xB9"
-			  "\xE6\xD0\x97\x81\xDE\xD1\xFB\x8A"
-			  "\x30\xDB\xA3\x5D\xEC\x25\x0B\x86"
-			  "\x71\xC8\xA7\x67\xE8\xBC\x7D\x4C"
-			  "\xAE\x82\xD3\x73\x31\x09\xCB\xB3"
-			  "\x4D\xD4\xC0\x8A\x2B\xFA\xA6\x55"
-			  "\x39\x0A\xBC\x6E\x75\xAB\xC2\xE2"
-			  "\x8A\xF2\x26\xCD\x63\x38\x35\xF7"
-			  "\xAE\x12\x83\xCD\x8A\x9E\x7E\x4C"
-			  "\xFE\x4D\xD7\xCE\x5C\x6E\x4C\xAF"
-			  "\xE3\xCD\x76\xA7\x87\xA1\x54\x7C"
-			  "\xEC\x32\xC7\x83\x2A\xFF\xF8\xEA"
-			  "\x87\xB2\x47\xA3\x9D\xC2\x9C\xA2"
-			  "\xB7\x2C\x7C\x1A\x24\xCB\x88\x61"
-			  "\xFF\xA7\x1A\x16\x01\xDD\x4B\xFC"
-			  "\x2E\xE0\x48\x67\x09\x42\xCC\x91"
-			  "\xBE\x20\x38\xC0\x5E\x3B\x95\x00"
-			  "\xA1\x96\x66\x0B\x8A\xE9\x9E\xF7"
-			  "\x6B\x34\x0A\x51\xC0\x3B\xEB\x71"
-			  "\x07\x97\x38\x4B\x5C\x56\x98\x67"
-			  "\x78\x9C\xD0\x0E\x2B\xB5\x67\x90"
-			  "\x75\xF8\xFE\x6D\x4E\x85\xCC\x0D"
-			  "\x18\x06\x15\x9D\x5A\x10\x13\x37"
-			  "\xA3\xD6\x68\xA2\xDF\x7E\xC7\x12"
-			  "\xC9\x0D\x4D\x91\xB0\x2A\x55\xFF"
-			  "\x6F\x73\x13\xDF\x28\xB5\x2A\x2C"
-			  "\xE4\xFC\x20\xD9\xF1\x7A\x82\xB1"
-			  "\xCB\x57\xB6\x3D\x8C\xF4\x8E\x27"
-			  "\x37\xDC\x35\xF3\x79\x01\x53\xA4"
-			  "\x7B\x37\xDE\x7C\x04\xAE\x50\xDB"
-			  "\x9B\x1E\x8C\x07\xA7\x52\x49\x50"
-			  "\x34\x25\x65\xDD\xA9\x8F\x7E\xBD"
-			  "\x7A\xC9\x36\xAE\xDE\x21\x48\x64"
-			  "\xC2\x02\xBA\xBE\x11\x1E\x3D\x9C"
-			  "\x98\x52\xCC\x04\xBD\x5E\x61\x26"
-			  "\x10\xD3\x21\xD9\x6E\x25\x98\x77"
-			  "\x8E\x98\x63\xF6\xF6\x52\xFB\x13"
-			  "\xAA\x30\xF2\xB9\xA4\x43\x53\x39"
-			  "\x1C\x97\x07\x7E\x6B\xFF\x3D\x43"
-			  "\xA6\x71\x6B\x66\x8F\x58\x3F\x71"
-			  "\x90\x47\x40\x92\xE6\x69\xD1\x96"
-			  "\x34\xB3\x3B\xE5\x43\xE4\xD5\x56"
-			  "\xB2\xE6\x7E\x86\x7A\x12\x17\x5B"
-			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
-			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13"
-			  "\x38\xE2\xE5",
-		.ilen	= 499,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59",
-		.rlen	= 499,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ilen	= 32,
+		.result	= "\xe1\x08\xb8\x1d\x2c\xf5\x33\x64"
+			  "\xc8\x12\x04\xc7\xb3\x70\xe8\xc4"
+			  "\x6a\x31\xc5\xf3\x00\xca\xb9\x16"
+			  "\xde\xe2\x77\x66\xf7\xfe\x62\x08",
+		.rlen	= 32,
+	}, {
+		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.ilen	= 32,
+		.result	= "\x1a\x0a\x09\x5f\xcd\x07\x07\x98"
+			  "\x41\x86\x12\xaf\xb3\xd7\x68\x13"
+			  "\xed\x81\xcd\x06\x87\x43\x1a\xbb"
+			  "\x13\x3d\xd6\x1e\x2b\xe1\x77\xbe",
+		.rlen	= 32,
+	}, {
+		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.ilen	= 32,
+		.result	= "\xf9\x9b\x28\xb8\x5c\xaf\x8c\x61"
+			  "\xb6\x1c\x81\x8f\x2c\x87\x60\x89"
+			  "\x0d\x8d\x7a\xe8\x60\x48\xcc\x86"
+			  "\xc1\x68\x45\xaa\x00\xe9\x24\xc5",
+		.rlen	= 32,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ilen	= 512,
+		.result	= "\xfe\x47\x4a\xc8\x60\x7e\xb4\x8b"
+			  "\x0d\x10\xf4\xb0\x0d\xba\xf8\x53"
+			  "\x65\x6e\x38\x4b\xdb\xaa\xb1\x9e"
+			  "\x28\xca\xb0\x22\xb3\x85\x75\xf4"
+			  "\x00\x5c\x75\x14\x06\xd6\x25\x82"
+			  "\xe6\xcb\x08\xf7\x29\x90\x23\x8e"
+			  "\xa4\x68\x57\xe4\xf0\xd8\x32\xf3"
+			  "\x80\x51\x67\xb5\x0b\x85\x69\xe8"
+			  "\x19\xfe\xc4\xc7\x3e\xea\x90\xd3"
+			  "\x8f\xa3\xf2\x0a\xac\x17\x4b\xa0"
+			  "\x63\x5a\x16\x0f\xf0\xce\x66\x1f"
+			  "\x2c\x21\x07\xf1\xa4\x03\xa3\x44"
+			  "\x41\x61\x87\x5d\x6b\xb3\xef\xd4"
+			  "\xfc\xaa\x32\x7e\x55\x58\x04\x41"
+			  "\xc9\x07\x33\xc6\xa2\x68\xd6\x5a"
+			  "\x55\x79\x4b\x6f\xcf\x89\xb9\x19"
+			  "\xe5\x54\x13\x15\xb2\x1a\xfa\x15"
+			  "\xc2\xf0\x06\x59\xfa\xa0\x25\x05"
+			  "\x58\xfa\x43\x91\x16\x85\x40\xbb"
+			  "\x0d\x34\x4d\xc5\x1e\x20\xd5\x08"
+			  "\xcd\x22\x22\x41\x11\x9f\x6c\x7c"
+			  "\x8d\x57\xc9\xba\x57\xe8\x2c\xf7"
+			  "\xa0\x42\xa8\xde\xfc\xa3\xca\x98"
+			  "\x4b\x43\xb1\xce\x4b\xbf\x01\x67"
+			  "\x6e\x29\x60\xbd\x10\x14\x84\x82"
+			  "\x83\x82\x0c\x63\x73\x92\x02\x7c"
+			  "\x55\x37\x20\x80\x17\x51\xc8\xbc"
+			  "\x46\x02\xcb\x38\x07\x6d\xe2\x85"
+			  "\xaa\x29\xaf\x24\x58\x0d\xf0\x75"
+			  "\x08\x0a\xa5\x34\x25\x16\xf3\x74"
+			  "\xa7\x0b\x97\xbe\xc1\xa9\xdc\x29"
+			  "\x1a\x0a\x56\xc1\x1a\x91\x97\x8c"
+			  "\x0b\xc7\x16\xed\x5a\x22\xa6\x2e"
+			  "\x8c\x2b\x4f\x54\x76\x47\x53\x8e"
+			  "\xe8\x00\xec\x92\xb9\x55\xe6\xa2"
+			  "\xf3\xe2\x4f\x6a\x66\x60\xd0\x87"
+			  "\xe6\xd1\xcc\xe3\x6a\xc5\x2d\x21"
+			  "\xcc\x9d\x6a\xb6\x75\xaa\xe2\x19"
+			  "\x21\x9f\xa1\x5e\x4c\xfd\x72\xf9"
+			  "\x94\x4e\x63\xc7\xae\xfc\xed\x47"
+			  "\xe2\xfe\x7a\x63\x77\xfe\x97\x82"
+			  "\xb1\x10\x6e\x36\x1d\xe1\xc4\x80"
+			  "\xec\x69\x41\xec\xa7\x8a\xe0\x2f"
+			  "\xe3\x49\x26\xa2\x41\xb2\x08\x0f"
+			  "\x28\xb4\xa7\x39\xa1\x99\x2d\x1e"
+			  "\x43\x42\x35\xd0\xcf\xec\x77\x67"
+			  "\xb2\x3b\x9e\x1c\x35\xde\x4f\x5e"
+			  "\x73\x3f\x5d\x6f\x07\x4b\x2e\x50"
+			  "\xab\x6c\x6b\xff\xea\x00\x67\xaa"
+			  "\x0e\x82\x32\xdd\x3d\xb5\xe5\x76"
+			  "\x2b\x77\x3f\xbe\x12\x75\xfb\x92"
+			  "\xc6\x89\x67\x4d\xca\xf7\xd4\x50"
+			  "\xc0\x74\x47\xcc\xd9\x0a\xd4\xc6"
+			  "\x3b\x17\x2e\xe3\x35\xbb\x53\xb5"
+			  "\x86\xad\x51\xcc\xd5\x96\xb8\xdc"
+			  "\x03\x57\xe6\x98\x52\x2f\x61\x62"
+			  "\xc4\x5c\x9c\x36\x71\x07\xfb\x94"
+			  "\xe3\x02\xc4\x2b\x08\x75\xc7\x35"
+			  "\xfb\x2e\x88\x7b\xbb\x67\x00\xe1"
+			  "\xc9\xdd\x99\xb2\x13\x53\x1a\x4e"
+			  "\x76\x87\x19\x04\x1a\x2f\x38\x3e"
+			  "\xef\x91\x64\x1d\x18\x07\x4e\x31"
+			  "\x88\x21\x7c\xb0\xa5\x12\x4c\x3c"
+			  "\xb0\x20\xbd\xda\xdf\xf9\x7c\xdd",
+		.rlen	= 512,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x62\x49\x77\x57\x24\x70\x93\x69"
+			  "\x99\x59\x57\x49\x66\x96\x76\x27"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95"
+			  "\x02\x88\x41\x97\x16\x93\x99\x37"
+			  "\x51\x05\x82\x09\x74\x94\x45\x92",
+		.klen	= 64,
+		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ilen	= 512,
+		.result	= "\x2b\xc9\xb4\x6b\x10\x94\xa9\x32"
+			  "\xaa\xb0\x20\xc6\x44\x3d\x74\x1f"
+			  "\x75\x01\xa7\xf6\xf5\xf7\x62\x1b"
+			  "\x80\x1b\x82\xcb\x01\x59\x91\x7f"
+			  "\x80\x3a\x98\xf0\xd2\xca\xc4\xc3"
+			  "\x34\xfd\xe6\x11\xf9\x33\x45\x12"
+			  "\x48\xc5\x8c\x25\xf1\xc5\xc5\x23"
+			  "\xd3\x44\xb4\x73\xd5\x04\xc0\xb7"
+			  "\xca\x2f\xf5\xcd\xc5\xb4\xdd\xb0"
+			  "\xf4\x60\xe8\xfb\xc6\x9c\xc5\x78"
+			  "\xcd\xec\x7d\xdc\x19\x9c\x72\x64"
+			  "\x63\x0b\x38\x2e\x76\xdd\x2d\x36"
+			  "\x49\xb0\x1d\xea\x78\x9e\x00\xca"
+			  "\x20\xcc\x1b\x1e\x98\x74\xab\xed"
+			  "\x79\xf7\xd0\x6c\xd8\x93\x80\x29"
+			  "\xac\xa5\x5e\x34\xa9\xab\xa0\x55"
+			  "\x9a\xea\xaa\x95\x4d\x7b\xfe\x46"
+			  "\x26\x8a\xfd\x88\xa2\xa8\xa6\xae"
+			  "\x25\x42\x17\xbf\x76\x8f\x1c\x3d"
+			  "\xec\x9a\xda\x64\x96\xb5\x61\xff"
+			  "\x99\xeb\x12\x96\x85\x82\x9d\xd5"
+			  "\x81\x85\x14\xa8\x59\xac\x8c\x94"
+			  "\xbb\x3b\x85\x2b\xdf\xb3\x0c\xba"
+			  "\x82\xc6\x4d\xca\x86\xea\x53\x28"
+			  "\x4c\xe0\x4e\x31\xe3\x73\x2f\x79"
+			  "\x9d\x42\xe1\x03\xe3\x8b\xc4\xff"
+			  "\x05\xca\x81\x7b\xda\xa2\xde\x63"
+			  "\x3a\x10\xbe\xc2\xac\x32\xc4\x05"
+			  "\x47\x7e\xef\x67\xe2\x5f\x5b\xae"
+			  "\xed\xf1\x70\x34\x16\x9a\x07\x7b"
+			  "\xf2\x25\x2b\xb0\xf8\x3c\x15\x9a"
+			  "\xa6\x59\x55\x5f\xc1\xf4\x1e\xcd"
+			  "\x93\x1f\x06\xba\xd4\x9a\x22\x69"
+			  "\xfa\x8e\x95\x0d\xf3\x23\x59\x2c"
+			  "\xfe\x00\xba\xf0\x0e\xbc\x6d\xd6"
+			  "\x62\xf0\x7a\x0e\x83\x3e\xdb\x32"
+			  "\xfd\x43\x7d\xda\x42\x51\x87\x43"
+			  "\x9d\xf9\xef\xf4\x30\x97\xf8\x09"
+			  "\x88\xfc\x3f\x93\x70\xc1\x4a\xec"
+			  "\x27\x5f\x11\xac\x71\xc7\x48\x46"
+			  "\x2f\xf9\xdf\x8d\x9f\xf7\x2e\x56"
+			  "\x0d\x4e\xb0\x32\x76\xce\x86\x81"
+			  "\xcd\xdf\xe4\x00\xbf\xfd\x5f\x24"
+			  "\xaf\xf7\x9a\xde\xff\x18\xac\x14"
+			  "\x90\xc5\x01\x39\x34\x0f\x24\xf3"
+			  "\x13\x2f\x5e\x4f\x30\x9a\x36\x40"
+			  "\xec\xea\xbc\xcd\x9e\x0e\x5b\x23"
+			  "\x50\x88\x97\x40\x69\xb1\x37\xf5"
+			  "\xc3\x15\xf9\x3f\xb7\x79\x64\xe8"
+			  "\x7b\x10\x20\xb9\x2b\x46\x83\x5b"
+			  "\xd8\x39\xfc\xe4\xfa\x88\x52\xf2"
+			  "\x72\xb0\x97\x4e\x89\xb3\x48\x00"
+			  "\xc1\x16\x73\x50\x77\xba\xa6\x65"
+			  "\x20\x2d\xb0\x02\x27\x89\xda\x99"
+			  "\x45\xfb\xe9\xd3\x1d\x39\x2f\xd6"
+			  "\x2a\xda\x09\x12\x11\xaf\xe6\x57"
+			  "\x01\x04\x8a\xff\x86\x8b\xac\xf8"
+			  "\xee\xe4\x1c\x98\x5b\xcf\x6b\x76"
+			  "\xa3\x0e\x33\x74\x40\x18\x39\x72"
+			  "\x66\x50\x31\xfd\x70\xdf\xe8\x51"
+			  "\x96\x21\x36\xb2\x9b\xfa\x85\xd1"
+			  "\x30\x05\xc8\x92\x98\x80\xff\x7a"
+			  "\xaf\x43\x0b\xc5\x20\x41\x92\x20"
+			  "\xd4\xa0\x91\x98\x11\x5f\x4d\xb1",
+		.rlen	= 512,
 		.also_non_np = 1,
-		.np	= 2,
-		.tap	= { 499 - 16, 16 },
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
-			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x06\x9A\xF8\xB4\x53\x88\x62\xFC"
-			  "\x68\xB8\x2E\xDF\xC1\x05\x0F\x3D"
-			  "\xAF\x4D\x95\xAE\xC4\xE9\x1C\xDC"
-			  "\xF6\x2B\x8F\x90\x89\xF6\x7E\x1A"
-			  "\xA6\xB9\xE4\xF4\xFA\xCA\xE5\x7E"
-			  "\x71\x28\x06\x4F\xE8\x08\x39\xDA"
-			  "\xA5\x0E\xC8\xC0\xB8\x16\xE5\x69"
-			  "\xE5\xCA\xEC\x4F\x63\x2C\xC0\x9B"
-			  "\x9F\x3E\x39\x79\xF0\xCD\x64\x35"
-			  "\x4A\xD3\xC8\xA9\x31\xCD\x48\x5B"
-			  "\x92\x3D\x8F\x3F\x96\xBD\xB3\x18"
-			  "\x74\x2A\x5D\x29\x3F\x57\x8F\xE2"
-			  "\x67\x9A\xE0\xE5\xD4\x4A\xE2\x47"
-			  "\xBC\xF6\xEB\x14\xF3\x8C\x20\xC2"
-			  "\x7D\xE2\x43\x81\x86\x72\x2E\xB1"
-			  "\x39\xF6\x95\xE1\x1F\xCB\x76\x33"
-			  "\x5B\x7D\x23\x0F\x3A\x67\x2A\x2F"
-			  "\xB9\x37\x9D\xDD\x1F\x16\xA1\x3C"
-			  "\x70\xFE\x52\xAA\x93\x3C\xC4\x46"
-			  "\xB1\xE5\xFF\xDA\xAF\xE2\x84\xFE"
-			  "\x25\x92\xB2\x63\xBD\x49\x77\xB4"
-			  "\x22\xA4\x6A\xD5\x04\xE0\x45\x58"
-			  "\x1C\x34\x96\x7C\x03\x0C\x13\xA2"
-			  "\x05\x22\xE2\xCB\x5A\x35\x03\x09"
-			  "\x40\xD2\x82\x05\xCA\x58\x73\xF2"
-			  "\x29\x5E\x01\x47\x13\x32\x78\xBE"
-			  "\x06\xB0\x51\xDB\x6C\x31\xA0\x1C"
-			  "\x74\xBC\x8D\x25\xDF\xF8\x65\xD1"
-			  "\x38\x35\x11\x26\x4A\xB4\x06\x32"
-			  "\xFA\xD2\x07\x77\xB3\x74\x98\x80"
-			  "\x61\x59\xA8\x9F\xF3\x6F\x2A\xBF"
-			  "\xE6\xA5\x9A\xC4\x6B\xA6\x49\x6F"
-			  "\xBC\x47\xD9\xFB\xC6\xEF\x25\x65"
-			  "\x96\xAC\x9F\xE4\x81\x4B\xD8\xBA"
-			  "\xD6\x9B\xC9\x6D\x58\x40\x81\x02"
-			  "\x73\x44\x4E\x43\x6E\x37\xBB\x11"
-			  "\xE3\xF9\xB8\x2F\xEC\x76\x34\xEA"
-			  "\x90\xCD\xB7\x2E\x0E\x32\x71\xE8"
-			  "\xBB\x4E\x0B\x98\xA4\x17\x17\x5B"
-			  "\x07\xB5\x82\x3A\xC4\xE8\x42\x51"
-			  "\x5A\x4C\x4E\x7D\xBF\xC4\xC0\x4F"
-			  "\x68\xB8\xC6\x4A\x32\x6F\x0B\xD7"
-			  "\x85\xED\x6B\xFB\x72\xD2\xA5\x8F"
-			  "\xBF\xF9\xAC\x59\x50\xA8\x08\x70"
-			  "\xEC\xBD\x0A\xBF\xE5\x87\xA1\xC2"
-			  "\x92\x14\x78\xAF\xE8\xEA\x2E\xDD"
-			  "\xC1\x03\x9A\xAA\x89\x8B\x32\x46"
-			  "\x5B\x18\x27\xBA\x46\xAA\x64\xDE"
-			  "\xE3\xD5\xA3\xFC\x7B\x5B\x61\xDB"
-			  "\x7E\xDA\xEC\x30\x17\x19\xF8\x80"
-			  "\xB5\x5E\x27\xB5\x37\x3A\x1F\x28"
-			  "\x07\x73\xC3\x63\xCE\xFF\x8C\xFE"
-			  "\x81\x4E\xF8\x24\xF3\xB8\xC7\xE8"
-			  "\x16\x9A\xCC\x58\x2F\x88\x1C\x4B"
-			  "\xBB\x33\xA2\x73\xF0\x1C\x89\x0E"
-			  "\xDC\x34\x27\x89\x98\xCE\x1C\xA2"
-			  "\xD8\xB8\x90\xBE\xEC\x72\x28\x13"
-			  "\xAC\x7B\xF1\xD0\x7F\x7A\x28\x50"
-			  "\xB7\x99\x65\x8A\xC9\xC6\x21\x34"
-			  "\x7F\x67\x9D\xB7\x2C\xCC\xF5\x17"
-			  "\x2B\x89\xAC\xB0\xD7\x1E\x47\xB0"
-			  "\x61\xAF\xD4\x63\x6D\xB8\x2D\x20",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
+		.np	= 3,
+		.tap	= { 512 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec serpent_lrw_enc_tv_template[] = {
-	/* Generated from AES-LRW test vectors */
+static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
+	/* Generated from AES-XTS test vectors */
+	/* same as enc vectors with input and result reversed */
 	{
-		.key	= "\x45\x62\xac\x25\xf8\x28\x17\x6d"
-			  "\x4c\x26\x84\x14\xb5\x68\x01\x85"
-			  "\x25\x8e\x2a\x05\xe7\x3e\x9d\x03"
-			  "\xee\x5a\x83\x0c\xcc\x09\x4c\x87",
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x6f\xbf\xd4\xa4\x5d\x71\x16\x79"
-			  "\x63\x9c\xa6\x8e\x40\xbe\x0d\x8a",
-		.rlen	= 16,
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xe1\x08\xb8\x1d\x2c\xf5\x33\x64"
+			  "\xc8\x12\x04\xc7\xb3\x70\xe8\xc4"
+			  "\x6a\x31\xc5\xf3\x00\xca\xb9\x16"
+			  "\xde\xe2\x77\x66\xf7\xfe\x62\x08",
+		.ilen	= 32,
+		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.rlen	= 32,
 	}, {
-		.key	= "\x59\x70\x47\x14\xf5\x57\x47\x8c"
-			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
-			  "\x0d\x48\xf0\xb7\xb1\x5a\x53\xea"
-			  "\x1c\xaa\x6b\x29\xc2\xca\xfb\xaf",
+		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
 		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x02",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\xfd\xb2\x66\x98\x80\x96\x55\xad"
-			  "\x08\x94\x54\x9c\x21\x7c\x69\xe3",
-		.rlen	= 16,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x1a\x0a\x09\x5f\xcd\x07\x07\x98"
+			  "\x41\x86\x12\xaf\xb3\xd7\x68\x13"
+			  "\xed\x81\xcd\x06\x87\x43\x1a\xbb"
+			  "\x13\x3d\xd6\x1e\x2b\xe1\x77\xbe",
+		.ilen	= 32,
+		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.rlen	= 32,
 	}, {
-		.key	= "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
-			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
-			  "\xcd\xf9\x0b\x16\x0c\x64\x8f\xb6"
-			  "\xb0\x0d\x0d\x1b\xae\x85\x87\x1f",
+		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xf9\x9b\x28\xb8\x5c\xaf\x8c\x61"
+			  "\xb6\x1c\x81\x8f\x2c\x87\x60\x89"
+			  "\x0d\x8d\x7a\xe8\x60\x48\xcc\x86"
+			  "\xc1\x68\x45\xaa\x00\xe9\x24\xc5",
+		.ilen	= 32,
+		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.rlen	= 32,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95",
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x14\x5e\x3d\x70\xc0\x6e\x9c\x34"
-			  "\x5b\x5e\xcf\x0f\xe4\x8c\x21\x5c",
-		.rlen	= 16,
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xfe\x47\x4a\xc8\x60\x7e\xb4\x8b"
+			  "\x0d\x10\xf4\xb0\x0d\xba\xf8\x53"
+			  "\x65\x6e\x38\x4b\xdb\xaa\xb1\x9e"
+			  "\x28\xca\xb0\x22\xb3\x85\x75\xf4"
+			  "\x00\x5c\x75\x14\x06\xd6\x25\x82"
+			  "\xe6\xcb\x08\xf7\x29\x90\x23\x8e"
+			  "\xa4\x68\x57\xe4\xf0\xd8\x32\xf3"
+			  "\x80\x51\x67\xb5\x0b\x85\x69\xe8"
+			  "\x19\xfe\xc4\xc7\x3e\xea\x90\xd3"
+			  "\x8f\xa3\xf2\x0a\xac\x17\x4b\xa0"
+			  "\x63\x5a\x16\x0f\xf0\xce\x66\x1f"
+			  "\x2c\x21\x07\xf1\xa4\x03\xa3\x44"
+			  "\x41\x61\x87\x5d\x6b\xb3\xef\xd4"
+			  "\xfc\xaa\x32\x7e\x55\x58\x04\x41"
+			  "\xc9\x07\x33\xc6\xa2\x68\xd6\x5a"
+			  "\x55\x79\x4b\x6f\xcf\x89\xb9\x19"
+			  "\xe5\x54\x13\x15\xb2\x1a\xfa\x15"
+			  "\xc2\xf0\x06\x59\xfa\xa0\x25\x05"
+			  "\x58\xfa\x43\x91\x16\x85\x40\xbb"
+			  "\x0d\x34\x4d\xc5\x1e\x20\xd5\x08"
+			  "\xcd\x22\x22\x41\x11\x9f\x6c\x7c"
+			  "\x8d\x57\xc9\xba\x57\xe8\x2c\xf7"
+			  "\xa0\x42\xa8\xde\xfc\xa3\xca\x98"
+			  "\x4b\x43\xb1\xce\x4b\xbf\x01\x67"
+			  "\x6e\x29\x60\xbd\x10\x14\x84\x82"
+			  "\x83\x82\x0c\x63\x73\x92\x02\x7c"
+			  "\x55\x37\x20\x80\x17\x51\xc8\xbc"
+			  "\x46\x02\xcb\x38\x07\x6d\xe2\x85"
+			  "\xaa\x29\xaf\x24\x58\x0d\xf0\x75"
+			  "\x08\x0a\xa5\x34\x25\x16\xf3\x74"
+			  "\xa7\x0b\x97\xbe\xc1\xa9\xdc\x29"
+			  "\x1a\x0a\x56\xc1\x1a\x91\x97\x8c"
+			  "\x0b\xc7\x16\xed\x5a\x22\xa6\x2e"
+			  "\x8c\x2b\x4f\x54\x76\x47\x53\x8e"
+			  "\xe8\x00\xec\x92\xb9\x55\xe6\xa2"
+			  "\xf3\xe2\x4f\x6a\x66\x60\xd0\x87"
+			  "\xe6\xd1\xcc\xe3\x6a\xc5\x2d\x21"
+			  "\xcc\x9d\x6a\xb6\x75\xaa\xe2\x19"
+			  "\x21\x9f\xa1\x5e\x4c\xfd\x72\xf9"
+			  "\x94\x4e\x63\xc7\xae\xfc\xed\x47"
+			  "\xe2\xfe\x7a\x63\x77\xfe\x97\x82"
+			  "\xb1\x10\x6e\x36\x1d\xe1\xc4\x80"
+			  "\xec\x69\x41\xec\xa7\x8a\xe0\x2f"
+			  "\xe3\x49\x26\xa2\x41\xb2\x08\x0f"
+			  "\x28\xb4\xa7\x39\xa1\x99\x2d\x1e"
+			  "\x43\x42\x35\xd0\xcf\xec\x77\x67"
+			  "\xb2\x3b\x9e\x1c\x35\xde\x4f\x5e"
+			  "\x73\x3f\x5d\x6f\x07\x4b\x2e\x50"
+			  "\xab\x6c\x6b\xff\xea\x00\x67\xaa"
+			  "\x0e\x82\x32\xdd\x3d\xb5\xe5\x76"
+			  "\x2b\x77\x3f\xbe\x12\x75\xfb\x92"
+			  "\xc6\x89\x67\x4d\xca\xf7\xd4\x50"
+			  "\xc0\x74\x47\xcc\xd9\x0a\xd4\xc6"
+			  "\x3b\x17\x2e\xe3\x35\xbb\x53\xb5"
+			  "\x86\xad\x51\xcc\xd5\x96\xb8\xdc"
+			  "\x03\x57\xe6\x98\x52\x2f\x61\x62"
+			  "\xc4\x5c\x9c\x36\x71\x07\xfb\x94"
+			  "\xe3\x02\xc4\x2b\x08\x75\xc7\x35"
+			  "\xfb\x2e\x88\x7b\xbb\x67\x00\xe1"
+			  "\xc9\xdd\x99\xb2\x13\x53\x1a\x4e"
+			  "\x76\x87\x19\x04\x1a\x2f\x38\x3e"
+			  "\xef\x91\x64\x1d\x18\x07\x4e\x31"
+			  "\x88\x21\x7c\xb0\xa5\x12\x4c\x3c"
+			  "\xb0\x20\xbd\xda\xdf\xf9\x7c\xdd",
+		.ilen	= 512,
+		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.rlen	= 512,
 	}, {
-		.key	= "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
-			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
-			  "\xca\xc6\xbc\x35\x4d\x4a\x65\x54"
-			  "\x90\xae\x61\xcf\x7b\xae\xbd\xcc"
-			  "\xad\xe4\x94\xc5\x4a\x29\xae\x70",
-		.klen	= 40,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x62\x49\x77\x57\x24\x70\x93\x69"
+			  "\x99\x59\x57\x49\x66\x96\x76\x27"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95"
+			  "\x02\x88\x41\x97\x16\x93\x99\x37"
+			  "\x51\x05\x82\x09\x74\x94\x45\x92",
+		.klen	= 64,
+		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x2b\xc9\xb4\x6b\x10\x94\xa9\x32"
+			  "\xaa\xb0\x20\xc6\x44\x3d\x74\x1f"
+			  "\x75\x01\xa7\xf6\xf5\xf7\x62\x1b"
+			  "\x80\x1b\x82\xcb\x01\x59\x91\x7f"
+			  "\x80\x3a\x98\xf0\xd2\xca\xc4\xc3"
+			  "\x34\xfd\xe6\x11\xf9\x33\x45\x12"
+			  "\x48\xc5\x8c\x25\xf1\xc5\xc5\x23"
+			  "\xd3\x44\xb4\x73\xd5\x04\xc0\xb7"
+			  "\xca\x2f\xf5\xcd\xc5\xb4\xdd\xb0"
+			  "\xf4\x60\xe8\xfb\xc6\x9c\xc5\x78"
+			  "\xcd\xec\x7d\xdc\x19\x9c\x72\x64"
+			  "\x63\x0b\x38\x2e\x76\xdd\x2d\x36"
+			  "\x49\xb0\x1d\xea\x78\x9e\x00\xca"
+			  "\x20\xcc\x1b\x1e\x98\x74\xab\xed"
+			  "\x79\xf7\xd0\x6c\xd8\x93\x80\x29"
+			  "\xac\xa5\x5e\x34\xa9\xab\xa0\x55"
+			  "\x9a\xea\xaa\x95\x4d\x7b\xfe\x46"
+			  "\x26\x8a\xfd\x88\xa2\xa8\xa6\xae"
+			  "\x25\x42\x17\xbf\x76\x8f\x1c\x3d"
+			  "\xec\x9a\xda\x64\x96\xb5\x61\xff"
+			  "\x99\xeb\x12\x96\x85\x82\x9d\xd5"
+			  "\x81\x85\x14\xa8\x59\xac\x8c\x94"
+			  "\xbb\x3b\x85\x2b\xdf\xb3\x0c\xba"
+			  "\x82\xc6\x4d\xca\x86\xea\x53\x28"
+			  "\x4c\xe0\x4e\x31\xe3\x73\x2f\x79"
+			  "\x9d\x42\xe1\x03\xe3\x8b\xc4\xff"
+			  "\x05\xca\x81\x7b\xda\xa2\xde\x63"
+			  "\x3a\x10\xbe\xc2\xac\x32\xc4\x05"
+			  "\x47\x7e\xef\x67\xe2\x5f\x5b\xae"
+			  "\xed\xf1\x70\x34\x16\x9a\x07\x7b"
+			  "\xf2\x25\x2b\xb0\xf8\x3c\x15\x9a"
+			  "\xa6\x59\x55\x5f\xc1\xf4\x1e\xcd"
+			  "\x93\x1f\x06\xba\xd4\x9a\x22\x69"
+			  "\xfa\x8e\x95\x0d\xf3\x23\x59\x2c"
+			  "\xfe\x00\xba\xf0\x0e\xbc\x6d\xd6"
+			  "\x62\xf0\x7a\x0e\x83\x3e\xdb\x32"
+			  "\xfd\x43\x7d\xda\x42\x51\x87\x43"
+			  "\x9d\xf9\xef\xf4\x30\x97\xf8\x09"
+			  "\x88\xfc\x3f\x93\x70\xc1\x4a\xec"
+			  "\x27\x5f\x11\xac\x71\xc7\x48\x46"
+			  "\x2f\xf9\xdf\x8d\x9f\xf7\x2e\x56"
+			  "\x0d\x4e\xb0\x32\x76\xce\x86\x81"
+			  "\xcd\xdf\xe4\x00\xbf\xfd\x5f\x24"
+			  "\xaf\xf7\x9a\xde\xff\x18\xac\x14"
+			  "\x90\xc5\x01\x39\x34\x0f\x24\xf3"
+			  "\x13\x2f\x5e\x4f\x30\x9a\x36\x40"
+			  "\xec\xea\xbc\xcd\x9e\x0e\x5b\x23"
+			  "\x50\x88\x97\x40\x69\xb1\x37\xf5"
+			  "\xc3\x15\xf9\x3f\xb7\x79\x64\xe8"
+			  "\x7b\x10\x20\xb9\x2b\x46\x83\x5b"
+			  "\xd8\x39\xfc\xe4\xfa\x88\x52\xf2"
+			  "\x72\xb0\x97\x4e\x89\xb3\x48\x00"
+			  "\xc1\x16\x73\x50\x77\xba\xa6\x65"
+			  "\x20\x2d\xb0\x02\x27\x89\xda\x99"
+			  "\x45\xfb\xe9\xd3\x1d\x39\x2f\xd6"
+			  "\x2a\xda\x09\x12\x11\xaf\xe6\x57"
+			  "\x01\x04\x8a\xff\x86\x8b\xac\xf8"
+			  "\xee\xe4\x1c\x98\x5b\xcf\x6b\x76"
+			  "\xa3\x0e\x33\x74\x40\x18\x39\x72"
+			  "\x66\x50\x31\xfd\x70\xdf\xe8\x51"
+			  "\x96\x21\x36\xb2\x9b\xfa\x85\xd1"
+			  "\x30\x05\xc8\x92\x98\x80\xff\x7a"
+			  "\xaf\x43\x0b\xc5\x20\x41\x92\x20"
+			  "\xd4\xa0\x91\x98\x11\x5f\x4d\xb1",
+		.ilen	= 512,
+		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.rlen	= 512,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 512 - 20, 4, 16 },
+	},
+};
+
+/*
+ * SM4 test vector taken from the draft RFC
+ * https://tools.ietf.org/html/draft-crypto-sm4-00#ref-GBT.32907-2016
+ */
+
+static const struct cipher_testvec sm4_enc_tv_template[] = {
+	{ /* SM4 Appendix A: Example Calculations. Example 1. */
+		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.input	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
 		.ilen	= 16,
-		.result	= "\x25\x39\xaa\xa5\xf0\x65\xc8\xdc"
-			  "\x5d\x45\x95\x30\x8f\xff\x2f\x1b",
+		.result	= "\x68\x1E\xDF\x34\xD2\x06\x96\x5E"
+			  "\x86\xB3\xE9\x4F\x53\x6E\x42\x46",
 		.rlen	= 16,
-	}, {
-		.key	= "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
-			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
-			  "\xa0\x19\x07\xc0\x9d\xf7\xbb\xdd"
-			  "\x52\x13\xb2\xb7\xf0\xff\x11\xd8"
-			  "\xd6\x08\xd0\xcd\x2e\xb1\x17\x6f",
-		.klen	= 40,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+	}, { /*
+	      *  SM4 Appendix A: Example Calculations.
+	      *  Last 10 iterations of Example 2.
+	      */
+		.key    = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.input	= "\x99\x4a\xc3\xe7\xc3\x57\x89\x6a"
+			  "\x81\xfc\xa8\xe\x38\x3e\xef\x80"
+			  "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
+			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
+			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
+			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
+			  "\x8c\xc8\x80\xbd\x11\x98\xf3\x7b"
+			  "\xa2\xdd\x14\x20\xf9\xe8\xbb\x82"
+			  "\xf7\x32\xca\x4b\xa8\xf7\xb3\x4d"
+			  "\x27\xd1\xcd\xe6\xb6\x65\x5a\x23"
+			  "\xc2\xf3\x54\x84\x53\xe3\xb9\x20"
+			  "\xa5\x37\x0\xbe\xe7\x7b\x48\xfb"
+			  "\x21\x3d\x9e\x48\x1d\x9e\xf5\xbf"
+			  "\x77\xd5\xb4\x4a\x53\x71\x94\x7a"
+			  "\x88\xa6\x6e\x6\x93\xca\x43\xa5"
+			  "\xc4\xf6\xcd\x53\x4b\x7b\x8e\xfe"
+			  "\xb4\x28\x7c\x42\x29\x32\x5d\x88"
+			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
+			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
+			  "\x31\x51\xec\x47\xc3\x51\x83\xc1",
+		.ilen	= 160,
+		.result	= "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
+			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
+			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
+			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
+			  "\x8c\xc8\x80\xbd\x11\x98\xf3\x7b"
+			  "\xa2\xdd\x14\x20\xf9\xe8\xbb\x82"
+			  "\xf7\x32\xca\x4b\xa8\xf7\xb3\x4d"
+			  "\x27\xd1\xcd\xe6\xb6\x65\x5a\x23"
+			  "\xc2\xf3\x54\x84\x53\xe3\xb9\x20"
+			  "\xa5\x37\x0\xbe\xe7\x7b\x48\xfb"
+			  "\x21\x3d\x9e\x48\x1d\x9e\xf5\xbf"
+			  "\x77\xd5\xb4\x4a\x53\x71\x94\x7a"
+			  "\x88\xa6\x6e\x6\x93\xca\x43\xa5"
+			  "\xc4\xf6\xcd\x53\x4b\x7b\x8e\xfe"
+			  "\xb4\x28\x7c\x42\x29\x32\x5d\x88"
+			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
+			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
+			  "\x31\x51\xec\x47\xc3\x51\x83\xc1"
+			  "\x59\x52\x98\xc7\xc6\xfd\x27\x1f"
+			  "\x4\x2\xf8\x4\xc3\x3d\x3f\x66",
+		.rlen	= 160
+	}
+};
+
+static const struct cipher_testvec sm4_dec_tv_template[] = {
+	{ /* SM4 Appendix A: Example Calculations. Example 1. */
+		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.input	= "\x68\x1E\xDF\x34\xD2\x06\x96\x5E"
+			  "\x86\xB3\xE9\x4F\x53\x6E\x42\x46",
 		.ilen	= 16,
-		.result	= "\x0c\x20\x20\x63\xd6\x8b\xfc\x8f"
-			  "\xc0\xe2\x17\xbb\xd2\x59\x6f\x26",
+		.result	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
 		.rlen	= 16,
-	}, {
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+	}, { /*
+	      *  SM4 Appendix A: Example Calculations.
+	      *  Last 10 iterations of Example 2.
+	      */
+		.key    = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.input	= "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
+			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
+			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
+			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
+			  "\x8c\xc8\x80\xbd\x11\x98\xf3\x7b"
+			  "\xa2\xdd\x14\x20\xf9\xe8\xbb\x82"
+			  "\xf7\x32\xca\x4b\xa8\xf7\xb3\x4d"
+			  "\x27\xd1\xcd\xe6\xb6\x65\x5a\x23"
+			  "\xc2\xf3\x54\x84\x53\xe3\xb9\x20"
+			  "\xa5\x37\x0\xbe\xe7\x7b\x48\xfb"
+			  "\x21\x3d\x9e\x48\x1d\x9e\xf5\xbf"
+			  "\x77\xd5\xb4\x4a\x53\x71\x94\x7a"
+			  "\x88\xa6\x6e\x6\x93\xca\x43\xa5"
+			  "\xc4\xf6\xcd\x53\x4b\x7b\x8e\xfe"
+			  "\xb4\x28\x7c\x42\x29\x32\x5d\x88"
+			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
+			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
+			  "\x31\x51\xec\x47\xc3\x51\x83\xc1"
+			  "\x59\x52\x98\xc7\xc6\xfd\x27\x1f"
+			  "\x4\x2\xf8\x4\xc3\x3d\x3f\x66",
+		.ilen	= 160,
+		.result	= "\x99\x4a\xc3\xe7\xc3\x57\x89\x6a"
+			  "\x81\xfc\xa8\xe\x38\x3e\xef\x80"
+			  "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
+			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
+			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
+			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
+			  "\x8c\xc8\x80\xbd\x11\x98\xf3\x7b"
+			  "\xa2\xdd\x14\x20\xf9\xe8\xbb\x82"
+			  "\xf7\x32\xca\x4b\xa8\xf7\xb3\x4d"
+			  "\x27\xd1\xcd\xe6\xb6\x65\x5a\x23"
+			  "\xc2\xf3\x54\x84\x53\xe3\xb9\x20"
+			  "\xa5\x37\x0\xbe\xe7\x7b\x48\xfb"
+			  "\x21\x3d\x9e\x48\x1d\x9e\xf5\xbf"
+			  "\x77\xd5\xb4\x4a\x53\x71\x94\x7a"
+			  "\x88\xa6\x6e\x6\x93\xca\x43\xa5"
+			  "\xc4\xf6\xcd\x53\x4b\x7b\x8e\xfe"
+			  "\xb4\x28\x7c\x42\x29\x32\x5d\x88"
+			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
+			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
+			  "\x31\x51\xec\x47\xc3\x51\x83\xc1",
+		.rlen	= 160
+	}
+};
+
+/*
+ * Speck test vectors taken from the original paper:
+ * "The Simon and Speck Families of Lightweight Block Ciphers"
+ * https://eprint.iacr.org/2013/404.pdf
+ *
+ * Note that the paper does not make byte and word order clear.  But it was
+ * confirmed with the authors that the intended orders are little endian byte
+ * order and (y, x) word order.  Equivalently, the printed test vectors, when
+ * looking at only the bytes (ignoring the whitespace that divides them into
+ * words), are backwards: the left-most byte is actually the one with the
+ * highest memory address, while the right-most byte is actually the one with
+ * the lowest memory address.
+ */
+
+static const struct cipher_testvec speck128_enc_tv_template[] = {
+	{ /* Speck128/128 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.klen	= 16,
+		.input	= "\x20\x6d\x61\x64\x65\x20\x69\x74"
+			  "\x20\x65\x71\x75\x69\x76\x61\x6c",
 		.ilen	= 16,
-		.result	= "\xc1\x35\x2e\x53\xf0\x96\x4d\x9c"
-			  "\x2e\x18\xe6\x99\xcd\xd3\x15\x68",
+		.result	= "\x18\x0d\x57\x5c\xdf\xfe\x60\x78"
+			  "\x65\x32\x78\x79\x51\x98\x5d\xa6",
 		.rlen	= 16,
-	}, {
-		.key	= "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
-			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
-			  "\xb2\xfb\x64\xce\x60\x97\x87\x8d"
-			  "\x17\xfc\xe4\x5a\x49\xe8\x30\xb7"
-			  "\x6e\x78\x17\xe7\x2d\x5e\x12\xd4"
-			  "\x60\x64\x04\x7a\xf1\x2f\x9e\x0c",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+	}, { /* Speck128/192 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17",
+		.klen	= 24,
+		.input	= "\x65\x6e\x74\x20\x74\x6f\x20\x43"
+			  "\x68\x69\x65\x66\x20\x48\x61\x72",
 		.ilen	= 16,
-		.result	= "\x86\x0a\xc6\xa9\x1a\x9f\xe7\xe6"
-			  "\x64\x3b\x33\xd6\xd5\x84\xd6\xdf",
+		.result	= "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9"
+			  "\x66\x55\x13\x13\x3a\xcf\xe4\x1b",
+		.rlen	= 16,
+	}, { /* Speck128/256 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+		.klen	= 32,
+		.input	= "\x70\x6f\x6f\x6e\x65\x72\x2e\x20"
+			  "\x49\x6e\x20\x74\x68\x6f\x73\x65",
+		.ilen	= 16,
+		.result	= "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e"
+			  "\x3e\xf5\xc0\x05\x04\x01\x09\x41",
 		.rlen	= 16,
-	}, {
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
-			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
-			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
-			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
-			  "\x8f\xcb\x8d\x6b\xe3\xa6\x29\x90"
-			  "\xfe\x2a\x62\x82\xae\x6d\x8b\xf6"
-			  "\xad\x1e\x9e\x20\x5f\x38\xbe\x04"
-			  "\xda\x10\x8e\xed\xa2\xa4\x87\xab"
-			  "\xda\x6b\xb4\x0c\x75\xba\xd3\x7c"
-			  "\xc9\xac\x42\x31\x95\x7c\xc9\x04"
-			  "\xeb\xd5\x6e\x32\x69\x8a\xdb\xa6"
-			  "\x15\xd7\x3f\x4f\x2f\x66\x69\x03"
-			  "\x9c\x1f\x54\x0f\xde\x1f\xf3\x65"
-			  "\x4c\x96\x12\xed\x7c\x92\x03\x01"
-			  "\x6f\xbc\x35\x93\xac\xf1\x27\xf1"
-			  "\xb4\x96\x82\x5a\x5f\xb0\xa0\x50"
-			  "\x89\xa4\x8e\x66\x44\x85\xcc\xfd"
-			  "\x33\x14\x70\xe3\x96\xb2\xc3\xd3"
-			  "\xbb\x54\x5a\x1a\xf9\x74\xa2\xc5"
-			  "\x2d\x64\x75\xdd\xb4\x54\xe6\x74"
-			  "\x8c\xd3\x9d\x9e\x86\xab\x51\x53"
-			  "\xb7\x93\x3e\x6f\xd0\x4e\x2c\x40"
-			  "\xf6\xa8\x2e\x3e\x9d\xf4\x66\xa5"
-			  "\x76\x12\x73\x44\x1a\x56\xd7\x72"
-			  "\x88\xcd\x21\x8c\x4c\x0f\xfe\xda"
-			  "\x95\xe0\x3a\xa6\xa5\x84\x46\xcd"
-			  "\xd5\x3e\x9d\x3a\xe2\x67\xe6\x60"
-			  "\x1a\xe2\x70\x85\x58\xc2\x1b\x09"
-			  "\xe1\xd7\x2c\xca\xad\xa8\x8f\xf9"
-			  "\xac\xb3\x0e\xdb\xca\x2e\xe2\xb8"
-			  "\x51\x71\xd9\x3c\x6c\xf1\x56\xf8"
-			  "\xea\x9c\xf1\xfb\x0c\xe6\xb7\x10"
-			  "\x1c\xf8\xa9\x7c\xe8\x53\x35\xc1"
-			  "\x90\x3e\x76\x4a\x74\xa4\x21\x2c"
-			  "\xf6\x2c\x4e\x0f\x94\x3a\x88\x2e"
-			  "\x41\x09\x6a\x33\x7d\xf6\xdd\x3f"
-			  "\x8d\x23\x31\x74\x84\xeb\x88\x6e"
-			  "\xcc\xb9\xbc\x22\x83\x19\x07\x22"
-			  "\xa5\x2d\xdf\xa5\xf3\x80\x85\x78"
-			  "\x84\x39\x6a\x6d\x6a\x99\x4f\xa5"
-			  "\x15\xfe\x46\xb0\xe4\x6c\xa5\x41"
-			  "\x3c\xce\x8f\x42\x60\x71\xa7\x75"
-			  "\x08\x40\x65\x8a\x82\xbf\xf5\x43"
-			  "\x71\x96\xa9\x4d\x44\x8a\x20\xbe"
-			  "\xfa\x4d\xbb\xc0\x7d\x31\x96\x65"
-			  "\xe7\x75\xe5\x3e\xfd\x92\x3b\xc9"
-			  "\x55\xbb\x16\x7e\xf7\xc2\x8c\xa4"
-			  "\x40\x1d\xe5\xef\x0e\xdf\xe4\x9a"
-			  "\x62\x73\x65\xfd\x46\x63\x25\x3d"
-			  "\x2b\xaf\xe5\x64\xfe\xa5\x5c\xcf"
-			  "\x24\xf3\xb4\xac\x64\xba\xdf\x4b"
-			  "\xc6\x96\x7d\x81\x2d\x8d\x97\xf7"
-			  "\xc5\x68\x77\x84\x32\x2b\xcc\x85"
-			  "\x74\x96\xf0\x12\x77\x61\xb9\xeb"
-			  "\x71\xaa\x82\xcb\x1c\xdb\x89\xc8"
-			  "\xc6\xb5\xe3\x5c\x7d\x39\x07\x24"
-			  "\xda\x39\x87\x45\xc0\x2b\xbb\x01"
-			  "\xac\xbc\x2a\x5c\x7f\xfc\xe8\xce"
-			  "\x6d\x9c\x6f\xed\xd3\xc1\xa1\xd6"
-			  "\xc5\x55\xa9\x66\x2f\xe1\xc8\x32"
-			  "\xa6\x5d\xa4\x3a\x98\x73\xe8\x45"
-			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
-			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
-			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
-		.ilen	= 512,
-		.result	= "\xe3\x5a\x38\x0f\x4d\x92\x3a\x74"
-			  "\x15\xb1\x50\x8c\x9a\xd8\x99\x1d"
-			  "\x82\xec\xf1\x5f\x03\x6d\x02\x58"
-			  "\x90\x67\xfc\xdd\x8d\xe1\x38\x08"
-			  "\x7b\xc9\x9b\x4b\x04\x09\x50\x15"
-			  "\xce\xab\xda\x33\x30\x20\x12\xfa"
-			  "\x83\xc4\xa6\x9a\x2e\x7d\x90\xd9"
-			  "\xa6\xa6\x67\x43\xb4\xa7\xa8\x5c"
-			  "\xbb\x6a\x49\x2b\x8b\xf8\xd0\x22"
-			  "\xe5\x9e\xba\xe8\x8c\x67\xb8\x5b"
-			  "\x60\xbc\xf5\xa4\x95\x4e\x66\xe5"
-			  "\x6d\x8e\xa9\xf6\x65\x2e\x04\xf5"
-			  "\xba\xb5\xdb\x88\xc2\xf6\x7a\x4b"
-			  "\x89\x58\x7c\x9a\xae\x26\xe8\xb7"
-			  "\xb7\x28\xcc\xd6\xcc\xa5\x98\x4d"
-			  "\xb9\x91\xcb\xb4\xe4\x8b\x96\x47"
-			  "\x5f\x03\x8b\xdd\x94\xd1\xee\x12"
-			  "\xa7\x83\x80\xf2\xc1\x15\x74\x4f"
-			  "\x49\xf9\xb0\x7e\x6f\xdc\x73\x2f"
-			  "\xe2\xcf\xe0\x1b\x34\xa5\xa0\x52"
-			  "\xfb\x3c\x5d\x85\x91\xe6\x6d\x98"
-			  "\x04\xd6\xdd\x4c\x00\x64\xd9\x54"
-			  "\x5c\x3c\x08\x1d\x4c\x06\x9f\xb8"
-			  "\x1c\x4d\x8d\xdc\xa4\x3c\xb9\x3b"
-			  "\x9e\x85\xce\xc3\xa8\x4a\x0c\xd9"
-			  "\x04\xc3\x6f\x17\x66\xa9\x1f\x59"
-			  "\xd9\xe2\x19\x36\xa3\x88\xb8\x0b"
-			  "\x0f\x4a\x4d\xf8\xc8\x6f\xd5\x43"
-			  "\xeb\xa0\xab\x1f\x61\xc0\x06\xeb"
-			  "\x93\xb7\xb8\x6f\x0d\xbd\x07\x49"
-			  "\xb3\xac\x5d\xcf\x31\xa0\x27\x26"
-			  "\x21\xbe\x94\x2e\x19\xea\xf4\xee"
-			  "\xb5\x13\x89\xf7\x94\x0b\xef\x59"
-			  "\x44\xc5\x78\x8b\x3c\x3b\x71\x20"
-			  "\xf9\x35\x0c\x70\x74\xdc\x5b\xc2"
-			  "\xb4\x11\x0e\x2c\x61\xa1\x52\x46"
-			  "\x18\x11\x16\xc6\x86\x44\xa7\xaf"
-			  "\xd5\x0c\x7d\xa6\x9e\x25\x2d\x1b"
-			  "\x9a\x8f\x0f\xf8\x6a\x61\xa0\xea"
-			  "\x3f\x0e\x90\xd6\x8f\x83\x30\x64"
-			  "\xb5\x51\x2d\x08\x3c\xcd\x99\x36"
-			  "\x96\xd4\xb1\xb5\x48\x30\xca\x48"
-			  "\xf7\x11\xa8\xf5\x97\x8a\x6a\x6d"
-			  "\x12\x33\x2f\xc0\xe8\xda\xec\x8a"
-			  "\xe1\x88\x72\x63\xde\x20\xa3\xe1"
-			  "\x8e\xac\x84\x37\x35\xf5\xf7\x3f"
-			  "\x00\x02\x0e\xe4\xc1\x53\x68\x3f"
-			  "\xaa\xd5\xac\x52\x3d\x20\x2f\x4d"
-			  "\x7c\x83\xd0\xbd\xaa\x97\x35\x36"
-			  "\x98\x88\x59\x5d\xe7\x24\xe3\x90"
-			  "\x9d\x30\x47\xa7\xc3\x60\x35\xf4"
-			  "\xd5\xdb\x0e\x4d\x44\xc1\x81\x8b"
-			  "\xfd\xbd\xc3\x2b\xba\x68\xfe\x8d"
-			  "\x49\x5a\x3c\x8a\xa3\x01\xae\x25"
-			  "\x42\xab\xd2\x87\x1b\x35\xd6\xd2"
-			  "\xd7\x70\x1c\x1f\x72\xd1\xe1\x39"
-			  "\x1c\x58\xa2\xb4\xd0\x78\x55\x72"
-			  "\x76\x59\xea\xd9\xd7\x6e\x63\x8b"
-			  "\xcc\x9b\xa7\x74\x89\xfc\xa3\x68"
-			  "\x86\x28\xd1\xbb\x54\x8d\x66\xad"
-			  "\x2a\x92\xf9\x4e\x04\x3d\xae\xfd"
-			  "\x1b\x2b\x7f\xc3\x2f\x1a\x78\x0a"
-			  "\x5c\xc6\x84\xfe\x7c\xcb\x26\xfd"
-			  "\xd9\x51\x0f\xd7\x94\x2f\xc5\xa7",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec serpent_lrw_dec_tv_template[] = {
-	/* Generated from AES-LRW test vectors */
-	/* same as enc vectors with input and result reversed */
-	{
-		.key	= "\x45\x62\xac\x25\xf8\x28\x17\x6d"
-			  "\x4c\x26\x84\x14\xb5\x68\x01\x85"
-			  "\x25\x8e\x2a\x05\xe7\x3e\x9d\x03"
-			  "\xee\x5a\x83\x0c\xcc\x09\x4c\x87",
+static const struct cipher_testvec speck128_dec_tv_template[] = {
+	{ /* Speck128/128 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.klen	= 16,
+		.input	= "\x18\x0d\x57\x5c\xdf\xfe\x60\x78"
+			  "\x65\x32\x78\x79\x51\x98\x5d\xa6",
+		.ilen	= 16,
+		.result	= "\x20\x6d\x61\x64\x65\x20\x69\x74"
+			  "\x20\x65\x71\x75\x69\x76\x61\x6c",
+		.rlen	= 16,
+	}, { /* Speck128/192 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17",
+		.klen	= 24,
+		.input	= "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9"
+			  "\x66\x55\x13\x13\x3a\xcf\xe4\x1b",
+		.ilen	= 16,
+		.result	= "\x65\x6e\x74\x20\x74\x6f\x20\x43"
+			  "\x68\x69\x65\x66\x20\x48\x61\x72",
+		.rlen	= 16,
+	}, { /* Speck128/256 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x6f\xbf\xd4\xa4\x5d\x71\x16\x79"
-			  "\x63\x9c\xa6\x8e\x40\xbe\x0d\x8a",
+		.input	= "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e"
+			  "\x3e\xf5\xc0\x05\x04\x01\x09\x41",
 		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.result	= "\x70\x6f\x6f\x6e\x65\x72\x2e\x20"
+			  "\x49\x6e\x20\x74\x68\x6f\x73\x65",
 		.rlen	= 16,
+	},
+};
+
+/*
+ * Speck128-XTS test vectors, taken from the AES-XTS test vectors with the
+ * result recomputed with Speck128 as the cipher
+ */
+
+static const struct cipher_testvec speck128_xts_enc_tv_template[] = {
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ilen	= 32,
+		.result	= "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62"
+			  "\x3b\x99\x4a\x64\x74\x77\xac\xed"
+			  "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42"
+			  "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54",
+		.rlen	= 32,
 	}, {
-		.key	= "\x59\x70\x47\x14\xf5\x57\x47\x8c"
-			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
-			  "\x0d\x48\xf0\xb7\xb1\x5a\x53\xea"
-			  "\x1c\xaa\x6b\x29\xc2\xca\xfb\xaf",
+		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.ilen	= 32,
+		.result	= "\xfb\x53\x81\x75\x6f\x9f\x34\xad"
+			  "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a"
+			  "\xd4\x84\xa4\x53\xd5\x88\x73\x1b"
+			  "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6",
+		.rlen	= 32,
+	}, {
+		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.ilen	= 32,
+		.result	= "\x21\x52\x84\x15\xd1\xf7\x21\x55"
+			  "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d"
+			  "\xda\x63\xb2\xf1\x82\xb0\x89\x59"
+			  "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92",
+		.rlen	= 32,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95",
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x02",
-		.input	= "\xfd\xb2\x66\x98\x80\x96\x55\xad"
-			  "\x08\x94\x54\x9c\x21\x7c\x69\xe3",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ilen	= 512,
+		.result	= "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82"
+			  "\x53\xd0\xed\x2d\x30\xc1\x20\xef"
+			  "\x70\x67\x5e\xff\x09\x70\xbb\xc1"
+			  "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48"
+			  "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7"
+			  "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9"
+			  "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44"
+			  "\x19\xc5\x58\x84\x63\xb9\x12\x68"
+			  "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c"
+			  "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd"
+			  "\x74\x79\x2e\xb4\x44\xd7\x69\xc4"
+			  "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d"
+			  "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb"
+			  "\x6d\x13\x65\xa0\xf9\x31\x12\xe2"
+			  "\x26\xd1\xec\x2b\x0a\x8b\x59\x99"
+			  "\xa7\x49\xa0\x0e\x09\x33\x85\x50"
+			  "\xc3\x23\xca\x7a\xdd\x13\x45\x5f"
+			  "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f"
+			  "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6"
+			  "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f"
+			  "\x79\x91\x8d\x36\x13\x7b\xd0\x4a"
+			  "\x6c\x39\xfb\x53\xb8\x6f\x02\x51"
+			  "\xa5\x20\xac\x24\x1c\x73\x59\x73"
+			  "\x58\x61\x3a\x87\x58\xb3\x20\x56"
+			  "\x39\x06\x2b\x4d\xd3\x20\x2b\x89"
+			  "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd"
+			  "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91"
+			  "\x09\x35\x71\x50\x65\xac\x92\xe3"
+			  "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92"
+			  "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9"
+			  "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d"
+			  "\x77\x04\x80\xa9\xbf\x38\xb5\xbd"
+			  "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8"
+			  "\x2a\x26\xcc\x49\x14\x6d\x55\x01"
+			  "\x06\x94\xd8\xb2\x2d\x53\x83\x1b"
+			  "\x8f\xd4\xdd\x57\x12\x7e\x18\xba"
+			  "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d"
+			  "\x24\xa9\x60\xa4\x97\x85\x86\x2a"
+			  "\x01\x00\x09\xf1\xcb\x4a\x24\x1c"
+			  "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4"
+			  "\x97\x1c\x10\xc6\x4d\x66\x4f\x98"
+			  "\x87\x30\xac\xd5\xea\x73\x49\x10"
+			  "\x80\xea\xe5\x5f\x4d\x5f\x03\x33"
+			  "\x66\x02\x35\x3d\x60\x06\x36\x4f"
+			  "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8"
+			  "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28"
+			  "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93"
+			  "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30"
+			  "\xcc\x75\xcf\x16\x26\xa9\x26\x3b"
+			  "\xe7\x68\x2f\x15\x21\x5b\xe4\x00"
+			  "\xbd\x48\x50\xcd\x75\x70\xc4\x62"
+			  "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b"
+			  "\x51\x66\x02\x69\x04\x97\x36\xd4"
+			  "\x75\xae\x0b\xa3\x42\xf8\xca\x79"
+			  "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2"
+			  "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd"
+			  "\xea\x15\x5a\xa0\x85\x7e\x81\x0d"
+			  "\x03\xe7\x05\x39\xf5\x05\x26\xee"
+			  "\xec\xaa\x1f\x3d\xc9\x98\x76\x01"
+			  "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4"
+			  "\x50\x65\x50\x6d\x04\x1f\xdf\x5a"
+			  "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca"
+			  "\x47\x26\xef\x39\xb8\xb4\xf2\xd1"
+			  "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf",
+		.rlen	= 512,
 	}, {
-		.key	= "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
-			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
-			  "\xcd\xf9\x0b\x16\x0c\x64\x8f\xb6"
-			  "\xb0\x0d\x0d\x1b\xae\x85\x87\x1f",
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x62\x49\x77\x57\x24\x70\x93\x69"
+			  "\x99\x59\x57\x49\x66\x96\x76\x27"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95"
+			  "\x02\x88\x41\x97\x16\x93\x99\x37"
+			  "\x51\x05\x82\x09\x74\x94\x45\x92",
+		.klen	= 64,
+		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ilen	= 512,
+		.result	= "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1"
+			  "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb"
+			  "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73"
+			  "\x92\x99\xde\xd3\x76\xed\xcd\x63"
+			  "\x64\x3a\x22\x57\xc1\x43\x49\xd4"
+			  "\x79\x36\x31\x19\x62\xae\x10\x7e"
+			  "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa"
+			  "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0"
+			  "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00"
+			  "\xfc\x81\x99\x8a\x14\x62\xf5\x7e"
+			  "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec"
+			  "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6"
+			  "\x62\x62\x37\xfe\x0a\x4c\x4a\x37"
+			  "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e"
+			  "\x85\x3c\x4f\x26\x64\x85\xbc\x68"
+			  "\xb0\xe0\x86\x5e\x26\x41\xce\x11"
+			  "\x50\xda\x97\x14\xe9\x9e\xc7\x6d"
+			  "\x3b\xdc\x43\xde\x2b\x27\x69\x7d"
+			  "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31"
+			  "\x14\x4d\xf0\x74\x37\xfd\x07\x25"
+			  "\x96\x55\xe5\xfc\x9e\x27\x2a\x74"
+			  "\x1b\x83\x4d\x15\x83\xac\x57\xa0"
+			  "\xac\xa5\xd0\x38\xef\x19\x56\x53"
+			  "\x25\x4b\xfc\xce\x04\x23\xe5\x6b"
+			  "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5"
+			  "\xed\x22\x34\x1c\x5d\xed\x17\x06"
+			  "\x36\xa3\xe6\x77\xb9\x97\x46\xb8"
+			  "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc"
+			  "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82"
+			  "\x35\x91\x3d\x1b\xe4\x97\x9f\x92"
+			  "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1"
+			  "\x8d\x39\xfc\x42\xfb\x38\x80\xb9"
+			  "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1"
+			  "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7"
+			  "\xa1\xbf\xf7\xda\x95\x93\x4b\x78"
+			  "\x19\xf5\x94\xf9\xd2\x00\x33\x37"
+			  "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee"
+			  "\x42\xb2\x9e\x2c\x5f\x48\x23\x26"
+			  "\x15\x25\x17\x03\x3d\xfe\x2c\xfc"
+			  "\xeb\xba\xda\xe0\x00\x05\xb6\xa6"
+			  "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf"
+			  "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a"
+			  "\x49\xa1\xc3\xfa\x10\x52\xb9\x14"
+			  "\xad\xb7\x73\xf8\x78\x12\xc8\x59"
+			  "\x17\x80\x4c\x57\x39\xf1\x6d\x80"
+			  "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21"
+			  "\xec\xce\xb7\xc8\x02\x8a\xed\x53"
+			  "\x2c\x25\x68\x2e\x1f\x85\x5e\x67"
+			  "\xd1\x07\x7a\x3a\x89\x08\xe0\x34"
+			  "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40"
+			  "\x31\x15\x72\xa0\xf0\x73\xd9\x3b"
+			  "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2"
+			  "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8"
+			  "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6"
+			  "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58"
+			  "\xcc\x1f\x48\x49\x65\x47\x75\xe9"
+			  "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07"
+			  "\xf2\xec\x76\xd8\x8f\x09\xf3\x16"
+			  "\xa1\x51\x89\x3b\xeb\x96\x42\xac"
+			  "\x65\xe0\x67\x63\x29\xdc\xb4\x7d"
+			  "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb"
+			  "\x66\x8d\x13\xca\xe0\x59\x2a\x00"
+			  "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5"
+			  "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c",
+		.rlen	= 512,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 512 - 20, 4, 16 },
+	}
+};
+
+static const struct cipher_testvec speck128_xts_dec_tv_template[] = {
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x14\x5e\x3d\x70\xc0\x6e\x9c\x34"
-			  "\x5b\x5e\xcf\x0f\xe4\x8c\x21\x5c",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
-			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
-			  "\xca\xc6\xbc\x35\x4d\x4a\x65\x54"
-			  "\x90\xae\x61\xcf\x7b\xae\xbd\xcc"
-			  "\xad\xe4\x94\xc5\x4a\x29\xae\x70",
-		.klen	= 40,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x25\x39\xaa\xa5\xf0\x65\xc8\xdc"
-			  "\x5d\x45\x95\x30\x8f\xff\x2f\x1b",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62"
+			  "\x3b\x99\x4a\x64\x74\x77\xac\xed"
+			  "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42"
+			  "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54",
+		.ilen	= 32,
+		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.rlen	= 32,
 	}, {
-		.key	= "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
-			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
-			  "\xa0\x19\x07\xc0\x9d\xf7\xbb\xdd"
-			  "\x52\x13\xb2\xb7\xf0\xff\x11\xd8"
-			  "\xd6\x08\xd0\xcd\x2e\xb1\x17\x6f",
-		.klen	= 40,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x0c\x20\x20\x63\xd6\x8b\xfc\x8f"
-			  "\xc0\xe2\x17\xbb\xd2\x59\x6f\x26",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
+		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xfb\x53\x81\x75\x6f\x9f\x34\xad"
+			  "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a"
+			  "\xd4\x84\xa4\x53\xd5\x88\x73\x1b"
+			  "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6",
+		.ilen	= 32,
+		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.rlen	= 32,
 	}, {
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\xc1\x35\x2e\x53\xf0\x96\x4d\x9c"
-			  "\x2e\x18\xe6\x99\xcd\xd3\x15\x68",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
+		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x21\x52\x84\x15\xd1\xf7\x21\x55"
+			  "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d"
+			  "\xda\x63\xb2\xf1\x82\xb0\x89\x59"
+			  "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92",
+		.ilen	= 32,
+		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.rlen	= 32,
 	}, {
-		.key	= "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
-			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
-			  "\xb2\xfb\x64\xce\x60\x97\x87\x8d"
-			  "\x17\xfc\xe4\x5a\x49\xe8\x30\xb7"
-			  "\x6e\x78\x17\xe7\x2d\x5e\x12\xd4"
-			  "\x60\x64\x04\x7a\xf1\x2f\x9e\x0c",
-		.klen	= 48,
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95",
+		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x86\x0a\xc6\xa9\x1a\x9f\xe7\xe6"
-			  "\x64\x3b\x33\xd6\xd5\x84\xd6\xdf",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82"
+			  "\x53\xd0\xed\x2d\x30\xc1\x20\xef"
+			  "\x70\x67\x5e\xff\x09\x70\xbb\xc1"
+			  "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48"
+			  "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7"
+			  "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9"
+			  "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44"
+			  "\x19\xc5\x58\x84\x63\xb9\x12\x68"
+			  "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c"
+			  "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd"
+			  "\x74\x79\x2e\xb4\x44\xd7\x69\xc4"
+			  "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d"
+			  "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb"
+			  "\x6d\x13\x65\xa0\xf9\x31\x12\xe2"
+			  "\x26\xd1\xec\x2b\x0a\x8b\x59\x99"
+			  "\xa7\x49\xa0\x0e\x09\x33\x85\x50"
+			  "\xc3\x23\xca\x7a\xdd\x13\x45\x5f"
+			  "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f"
+			  "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6"
+			  "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f"
+			  "\x79\x91\x8d\x36\x13\x7b\xd0\x4a"
+			  "\x6c\x39\xfb\x53\xb8\x6f\x02\x51"
+			  "\xa5\x20\xac\x24\x1c\x73\x59\x73"
+			  "\x58\x61\x3a\x87\x58\xb3\x20\x56"
+			  "\x39\x06\x2b\x4d\xd3\x20\x2b\x89"
+			  "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd"
+			  "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91"
+			  "\x09\x35\x71\x50\x65\xac\x92\xe3"
+			  "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92"
+			  "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9"
+			  "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d"
+			  "\x77\x04\x80\xa9\xbf\x38\xb5\xbd"
+			  "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8"
+			  "\x2a\x26\xcc\x49\x14\x6d\x55\x01"
+			  "\x06\x94\xd8\xb2\x2d\x53\x83\x1b"
+			  "\x8f\xd4\xdd\x57\x12\x7e\x18\xba"
+			  "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d"
+			  "\x24\xa9\x60\xa4\x97\x85\x86\x2a"
+			  "\x01\x00\x09\xf1\xcb\x4a\x24\x1c"
+			  "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4"
+			  "\x97\x1c\x10\xc6\x4d\x66\x4f\x98"
+			  "\x87\x30\xac\xd5\xea\x73\x49\x10"
+			  "\x80\xea\xe5\x5f\x4d\x5f\x03\x33"
+			  "\x66\x02\x35\x3d\x60\x06\x36\x4f"
+			  "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8"
+			  "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28"
+			  "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93"
+			  "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30"
+			  "\xcc\x75\xcf\x16\x26\xa9\x26\x3b"
+			  "\xe7\x68\x2f\x15\x21\x5b\xe4\x00"
+			  "\xbd\x48\x50\xcd\x75\x70\xc4\x62"
+			  "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b"
+			  "\x51\x66\x02\x69\x04\x97\x36\xd4"
+			  "\x75\xae\x0b\xa3\x42\xf8\xca\x79"
+			  "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2"
+			  "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd"
+			  "\xea\x15\x5a\xa0\x85\x7e\x81\x0d"
+			  "\x03\xe7\x05\x39\xf5\x05\x26\xee"
+			  "\xec\xaa\x1f\x3d\xc9\x98\x76\x01"
+			  "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4"
+			  "\x50\x65\x50\x6d\x04\x1f\xdf\x5a"
+			  "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca"
+			  "\x47\x26\xef\x39\xb8\xb4\xf2\xd1"
+			  "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf",
+		.ilen	= 512,
+		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.rlen	= 512,
 	}, {
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\xe3\x5a\x38\x0f\x4d\x92\x3a\x74"
-			  "\x15\xb1\x50\x8c\x9a\xd8\x99\x1d"
-			  "\x82\xec\xf1\x5f\x03\x6d\x02\x58"
-			  "\x90\x67\xfc\xdd\x8d\xe1\x38\x08"
-			  "\x7b\xc9\x9b\x4b\x04\x09\x50\x15"
-			  "\xce\xab\xda\x33\x30\x20\x12\xfa"
-			  "\x83\xc4\xa6\x9a\x2e\x7d\x90\xd9"
-			  "\xa6\xa6\x67\x43\xb4\xa7\xa8\x5c"
-			  "\xbb\x6a\x49\x2b\x8b\xf8\xd0\x22"
-			  "\xe5\x9e\xba\xe8\x8c\x67\xb8\x5b"
-			  "\x60\xbc\xf5\xa4\x95\x4e\x66\xe5"
-			  "\x6d\x8e\xa9\xf6\x65\x2e\x04\xf5"
-			  "\xba\xb5\xdb\x88\xc2\xf6\x7a\x4b"
-			  "\x89\x58\x7c\x9a\xae\x26\xe8\xb7"
-			  "\xb7\x28\xcc\xd6\xcc\xa5\x98\x4d"
-			  "\xb9\x91\xcb\xb4\xe4\x8b\x96\x47"
-			  "\x5f\x03\x8b\xdd\x94\xd1\xee\x12"
-			  "\xa7\x83\x80\xf2\xc1\x15\x74\x4f"
-			  "\x49\xf9\xb0\x7e\x6f\xdc\x73\x2f"
-			  "\xe2\xcf\xe0\x1b\x34\xa5\xa0\x52"
-			  "\xfb\x3c\x5d\x85\x91\xe6\x6d\x98"
-			  "\x04\xd6\xdd\x4c\x00\x64\xd9\x54"
-			  "\x5c\x3c\x08\x1d\x4c\x06\x9f\xb8"
-			  "\x1c\x4d\x8d\xdc\xa4\x3c\xb9\x3b"
-			  "\x9e\x85\xce\xc3\xa8\x4a\x0c\xd9"
-			  "\x04\xc3\x6f\x17\x66\xa9\x1f\x59"
-			  "\xd9\xe2\x19\x36\xa3\x88\xb8\x0b"
-			  "\x0f\x4a\x4d\xf8\xc8\x6f\xd5\x43"
-			  "\xeb\xa0\xab\x1f\x61\xc0\x06\xeb"
-			  "\x93\xb7\xb8\x6f\x0d\xbd\x07\x49"
-			  "\xb3\xac\x5d\xcf\x31\xa0\x27\x26"
-			  "\x21\xbe\x94\x2e\x19\xea\xf4\xee"
-			  "\xb5\x13\x89\xf7\x94\x0b\xef\x59"
-			  "\x44\xc5\x78\x8b\x3c\x3b\x71\x20"
-			  "\xf9\x35\x0c\x70\x74\xdc\x5b\xc2"
-			  "\xb4\x11\x0e\x2c\x61\xa1\x52\x46"
-			  "\x18\x11\x16\xc6\x86\x44\xa7\xaf"
-			  "\xd5\x0c\x7d\xa6\x9e\x25\x2d\x1b"
-			  "\x9a\x8f\x0f\xf8\x6a\x61\xa0\xea"
-			  "\x3f\x0e\x90\xd6\x8f\x83\x30\x64"
-			  "\xb5\x51\x2d\x08\x3c\xcd\x99\x36"
-			  "\x96\xd4\xb1\xb5\x48\x30\xca\x48"
-			  "\xf7\x11\xa8\xf5\x97\x8a\x6a\x6d"
-			  "\x12\x33\x2f\xc0\xe8\xda\xec\x8a"
-			  "\xe1\x88\x72\x63\xde\x20\xa3\xe1"
-			  "\x8e\xac\x84\x37\x35\xf5\xf7\x3f"
-			  "\x00\x02\x0e\xe4\xc1\x53\x68\x3f"
-			  "\xaa\xd5\xac\x52\x3d\x20\x2f\x4d"
-			  "\x7c\x83\xd0\xbd\xaa\x97\x35\x36"
-			  "\x98\x88\x59\x5d\xe7\x24\xe3\x90"
-			  "\x9d\x30\x47\xa7\xc3\x60\x35\xf4"
-			  "\xd5\xdb\x0e\x4d\x44\xc1\x81\x8b"
-			  "\xfd\xbd\xc3\x2b\xba\x68\xfe\x8d"
-			  "\x49\x5a\x3c\x8a\xa3\x01\xae\x25"
-			  "\x42\xab\xd2\x87\x1b\x35\xd6\xd2"
-			  "\xd7\x70\x1c\x1f\x72\xd1\xe1\x39"
-			  "\x1c\x58\xa2\xb4\xd0\x78\x55\x72"
-			  "\x76\x59\xea\xd9\xd7\x6e\x63\x8b"
-			  "\xcc\x9b\xa7\x74\x89\xfc\xa3\x68"
-			  "\x86\x28\xd1\xbb\x54\x8d\x66\xad"
-			  "\x2a\x92\xf9\x4e\x04\x3d\xae\xfd"
-			  "\x1b\x2b\x7f\xc3\x2f\x1a\x78\x0a"
-			  "\x5c\xc6\x84\xfe\x7c\xcb\x26\xfd"
-			  "\xd9\x51\x0f\xd7\x94\x2f\xc5\xa7",
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x62\x49\x77\x57\x24\x70\x93\x69"
+			  "\x99\x59\x57\x49\x66\x96\x76\x27"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95"
+			  "\x02\x88\x41\x97\x16\x93\x99\x37"
+			  "\x51\x05\x82\x09\x74\x94\x45\x92",
+		.klen	= 64,
+		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1"
+			  "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb"
+			  "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73"
+			  "\x92\x99\xde\xd3\x76\xed\xcd\x63"
+			  "\x64\x3a\x22\x57\xc1\x43\x49\xd4"
+			  "\x79\x36\x31\x19\x62\xae\x10\x7e"
+			  "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa"
+			  "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0"
+			  "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00"
+			  "\xfc\x81\x99\x8a\x14\x62\xf5\x7e"
+			  "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec"
+			  "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6"
+			  "\x62\x62\x37\xfe\x0a\x4c\x4a\x37"
+			  "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e"
+			  "\x85\x3c\x4f\x26\x64\x85\xbc\x68"
+			  "\xb0\xe0\x86\x5e\x26\x41\xce\x11"
+			  "\x50\xda\x97\x14\xe9\x9e\xc7\x6d"
+			  "\x3b\xdc\x43\xde\x2b\x27\x69\x7d"
+			  "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31"
+			  "\x14\x4d\xf0\x74\x37\xfd\x07\x25"
+			  "\x96\x55\xe5\xfc\x9e\x27\x2a\x74"
+			  "\x1b\x83\x4d\x15\x83\xac\x57\xa0"
+			  "\xac\xa5\xd0\x38\xef\x19\x56\x53"
+			  "\x25\x4b\xfc\xce\x04\x23\xe5\x6b"
+			  "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5"
+			  "\xed\x22\x34\x1c\x5d\xed\x17\x06"
+			  "\x36\xa3\xe6\x77\xb9\x97\x46\xb8"
+			  "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc"
+			  "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82"
+			  "\x35\x91\x3d\x1b\xe4\x97\x9f\x92"
+			  "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1"
+			  "\x8d\x39\xfc\x42\xfb\x38\x80\xb9"
+			  "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1"
+			  "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7"
+			  "\xa1\xbf\xf7\xda\x95\x93\x4b\x78"
+			  "\x19\xf5\x94\xf9\xd2\x00\x33\x37"
+			  "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee"
+			  "\x42\xb2\x9e\x2c\x5f\x48\x23\x26"
+			  "\x15\x25\x17\x03\x3d\xfe\x2c\xfc"
+			  "\xeb\xba\xda\xe0\x00\x05\xb6\xa6"
+			  "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf"
+			  "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a"
+			  "\x49\xa1\xc3\xfa\x10\x52\xb9\x14"
+			  "\xad\xb7\x73\xf8\x78\x12\xc8\x59"
+			  "\x17\x80\x4c\x57\x39\xf1\x6d\x80"
+			  "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21"
+			  "\xec\xce\xb7\xc8\x02\x8a\xed\x53"
+			  "\x2c\x25\x68\x2e\x1f\x85\x5e\x67"
+			  "\xd1\x07\x7a\x3a\x89\x08\xe0\x34"
+			  "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40"
+			  "\x31\x15\x72\xa0\xf0\x73\xd9\x3b"
+			  "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2"
+			  "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8"
+			  "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6"
+			  "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58"
+			  "\xcc\x1f\x48\x49\x65\x47\x75\xe9"
+			  "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07"
+			  "\xf2\xec\x76\xd8\x8f\x09\xf3\x16"
+			  "\xa1\x51\x89\x3b\xeb\x96\x42\xac"
+			  "\x65\xe0\x67\x63\x29\xdc\xb4\x7d"
+			  "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb"
+			  "\x66\x8d\x13\xca\xe0\x59\x2a\x00"
+			  "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5"
+			  "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c",
 		.ilen	= 512,
-		.result	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
-			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
-			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
-			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
-			  "\x8f\xcb\x8d\x6b\xe3\xa6\x29\x90"
-			  "\xfe\x2a\x62\x82\xae\x6d\x8b\xf6"
-			  "\xad\x1e\x9e\x20\x5f\x38\xbe\x04"
-			  "\xda\x10\x8e\xed\xa2\xa4\x87\xab"
-			  "\xda\x6b\xb4\x0c\x75\xba\xd3\x7c"
-			  "\xc9\xac\x42\x31\x95\x7c\xc9\x04"
-			  "\xeb\xd5\x6e\x32\x69\x8a\xdb\xa6"
-			  "\x15\xd7\x3f\x4f\x2f\x66\x69\x03"
-			  "\x9c\x1f\x54\x0f\xde\x1f\xf3\x65"
-			  "\x4c\x96\x12\xed\x7c\x92\x03\x01"
-			  "\x6f\xbc\x35\x93\xac\xf1\x27\xf1"
-			  "\xb4\x96\x82\x5a\x5f\xb0\xa0\x50"
-			  "\x89\xa4\x8e\x66\x44\x85\xcc\xfd"
-			  "\x33\x14\x70\xe3\x96\xb2\xc3\xd3"
-			  "\xbb\x54\x5a\x1a\xf9\x74\xa2\xc5"
-			  "\x2d\x64\x75\xdd\xb4\x54\xe6\x74"
-			  "\x8c\xd3\x9d\x9e\x86\xab\x51\x53"
-			  "\xb7\x93\x3e\x6f\xd0\x4e\x2c\x40"
-			  "\xf6\xa8\x2e\x3e\x9d\xf4\x66\xa5"
-			  "\x76\x12\x73\x44\x1a\x56\xd7\x72"
-			  "\x88\xcd\x21\x8c\x4c\x0f\xfe\xda"
-			  "\x95\xe0\x3a\xa6\xa5\x84\x46\xcd"
-			  "\xd5\x3e\x9d\x3a\xe2\x67\xe6\x60"
-			  "\x1a\xe2\x70\x85\x58\xc2\x1b\x09"
-			  "\xe1\xd7\x2c\xca\xad\xa8\x8f\xf9"
-			  "\xac\xb3\x0e\xdb\xca\x2e\xe2\xb8"
-			  "\x51\x71\xd9\x3c\x6c\xf1\x56\xf8"
-			  "\xea\x9c\xf1\xfb\x0c\xe6\xb7\x10"
-			  "\x1c\xf8\xa9\x7c\xe8\x53\x35\xc1"
-			  "\x90\x3e\x76\x4a\x74\xa4\x21\x2c"
-			  "\xf6\x2c\x4e\x0f\x94\x3a\x88\x2e"
-			  "\x41\x09\x6a\x33\x7d\xf6\xdd\x3f"
-			  "\x8d\x23\x31\x74\x84\xeb\x88\x6e"
-			  "\xcc\xb9\xbc\x22\x83\x19\x07\x22"
-			  "\xa5\x2d\xdf\xa5\xf3\x80\x85\x78"
-			  "\x84\x39\x6a\x6d\x6a\x99\x4f\xa5"
-			  "\x15\xfe\x46\xb0\xe4\x6c\xa5\x41"
-			  "\x3c\xce\x8f\x42\x60\x71\xa7\x75"
-			  "\x08\x40\x65\x8a\x82\xbf\xf5\x43"
-			  "\x71\x96\xa9\x4d\x44\x8a\x20\xbe"
-			  "\xfa\x4d\xbb\xc0\x7d\x31\x96\x65"
-			  "\xe7\x75\xe5\x3e\xfd\x92\x3b\xc9"
-			  "\x55\xbb\x16\x7e\xf7\xc2\x8c\xa4"
-			  "\x40\x1d\xe5\xef\x0e\xdf\xe4\x9a"
-			  "\x62\x73\x65\xfd\x46\x63\x25\x3d"
-			  "\x2b\xaf\xe5\x64\xfe\xa5\x5c\xcf"
-			  "\x24\xf3\xb4\xac\x64\xba\xdf\x4b"
-			  "\xc6\x96\x7d\x81\x2d\x8d\x97\xf7"
-			  "\xc5\x68\x77\x84\x32\x2b\xcc\x85"
-			  "\x74\x96\xf0\x12\x77\x61\xb9\xeb"
-			  "\x71\xaa\x82\xcb\x1c\xdb\x89\xc8"
-			  "\xc6\xb5\xe3\x5c\x7d\x39\x07\x24"
-			  "\xda\x39\x87\x45\xc0\x2b\xbb\x01"
-			  "\xac\xbc\x2a\x5c\x7f\xfc\xe8\xce"
-			  "\x6d\x9c\x6f\xed\xd3\xc1\xa1\xd6"
-			  "\xc5\x55\xa9\x66\x2f\xe1\xc8\x32"
-			  "\xa6\x5d\xa4\x3a\x98\x73\xe8\x45"
-			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
-			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
-			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
+		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
 		.rlen	= 512,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
+	}
+};
+
+static const struct cipher_testvec speck64_enc_tv_template[] = {
+	{ /* Speck64/96 */
+		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
+			  "\x10\x11\x12\x13",
+		.klen	= 12,
+		.input	= "\x65\x61\x6e\x73\x20\x46\x61\x74",
+		.ilen	= 8,
+		.result	= "\x6c\x94\x75\x41\xec\x52\x79\x9f",
+		.rlen	= 8,
+	}, { /* Speck64/128 */
+		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
+			  "\x10\x11\x12\x13\x18\x19\x1a\x1b",
+		.klen	= 16,
+		.input	= "\x2d\x43\x75\x74\x74\x65\x72\x3b",
+		.ilen	= 8,
+		.result	= "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c",
+		.rlen	= 8,
 	},
 };
 
-static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
-	/* Generated from AES-XTS test vectors */
+static const struct cipher_testvec speck64_dec_tv_template[] = {
+	{ /* Speck64/96 */
+		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
+			  "\x10\x11\x12\x13",
+		.klen	= 12,
+		.input	= "\x6c\x94\x75\x41\xec\x52\x79\x9f",
+		.ilen	= 8,
+		.result	= "\x65\x61\x6e\x73\x20\x46\x61\x74",
+		.rlen	= 8,
+	}, { /* Speck64/128 */
+		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
+			  "\x10\x11\x12\x13\x18\x19\x1a\x1b",
+		.klen	= 16,
+		.input	= "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c",
+		.ilen	= 8,
+		.result	= "\x2d\x43\x75\x74\x74\x65\x72\x3b",
+		.rlen	= 8,
+	},
+};
+
+/*
+ * Speck64-XTS test vectors, taken from the AES-XTS test vectors with the result
+ * recomputed with Speck64 as the cipher, and key lengths adjusted
+ */
+
+static const struct cipher_testvec speck64_xts_enc_tv_template[] = {
 	{
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 32,
+		.klen	= 24,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -13653,17 +15546,16 @@ static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.ilen	= 32,
-		.result	= "\xe1\x08\xb8\x1d\x2c\xf5\x33\x64"
-			  "\xc8\x12\x04\xc7\xb3\x70\xe8\xc4"
-			  "\x6a\x31\xc5\xf3\x00\xca\xb9\x16"
-			  "\xde\xe2\x77\x66\xf7\xfe\x62\x08",
+		.result	= "\x84\xaf\x54\x07\x19\xd4\x7c\xa6"
+			  "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2"
+			  "\x80\xf5\x72\xe7\xcd\xf0\x99\x22"
+			  "\x35\xa7\x2f\x06\xef\xdc\x51\xaa",
 		.rlen	= 32,
 	}, {
 		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
 			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
 			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
+		.klen	= 24,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
@@ -13671,17 +15563,16 @@ static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
 		.ilen	= 32,
-		.result	= "\x1a\x0a\x09\x5f\xcd\x07\x07\x98"
-			  "\x41\x86\x12\xaf\xb3\xd7\x68\x13"
-			  "\xed\x81\xcd\x06\x87\x43\x1a\xbb"
-			  "\x13\x3d\xd6\x1e\x2b\xe1\x77\xbe",
+		.result	= "\x12\x56\x73\xcd\x15\x87\xa8\x59"
+			  "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f"
+			  "\xb3\x12\x69\x7e\x36\xeb\x52\xff"
+			  "\x62\xdd\xba\x90\xb3\xe1\xee\x99",
 		.rlen	= 32,
 	}, {
 		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
 			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
 			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
+		.klen	= 24,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
@@ -13689,17 +15580,16 @@ static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
 		.ilen	= 32,
-		.result	= "\xf9\x9b\x28\xb8\x5c\xaf\x8c\x61"
-			  "\xb6\x1c\x81\x8f\x2c\x87\x60\x89"
-			  "\x0d\x8d\x7a\xe8\x60\x48\xcc\x86"
-			  "\xc1\x68\x45\xaa\x00\xe9\x24\xc5",
+		.result	= "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c"
+			  "\x27\x36\xc0\xbf\x5d\xea\x36\x37"
+			  "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b"
+			  "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34",
 		.rlen	= 32,
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95",
-		.klen	= 32,
+			  "\x31\x41\x59\x26\x53\x58\x97\x93",
+		.klen	= 24,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
@@ -13749,99 +15639,95 @@ static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
 			  "\x60\x61\x62\x63\x64\x65\x66\x67"
 			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
 			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\xfe\x47\x4a\xc8\x60\x7e\xb4\x8b"
-			  "\x0d\x10\xf4\xb0\x0d\xba\xf8\x53"
-			  "\x65\x6e\x38\x4b\xdb\xaa\xb1\x9e"
-			  "\x28\xca\xb0\x22\xb3\x85\x75\xf4"
-			  "\x00\x5c\x75\x14\x06\xd6\x25\x82"
-			  "\xe6\xcb\x08\xf7\x29\x90\x23\x8e"
-			  "\xa4\x68\x57\xe4\xf0\xd8\x32\xf3"
-			  "\x80\x51\x67\xb5\x0b\x85\x69\xe8"
-			  "\x19\xfe\xc4\xc7\x3e\xea\x90\xd3"
-			  "\x8f\xa3\xf2\x0a\xac\x17\x4b\xa0"
-			  "\x63\x5a\x16\x0f\xf0\xce\x66\x1f"
-			  "\x2c\x21\x07\xf1\xa4\x03\xa3\x44"
-			  "\x41\x61\x87\x5d\x6b\xb3\xef\xd4"
-			  "\xfc\xaa\x32\x7e\x55\x58\x04\x41"
-			  "\xc9\x07\x33\xc6\xa2\x68\xd6\x5a"
-			  "\x55\x79\x4b\x6f\xcf\x89\xb9\x19"
-			  "\xe5\x54\x13\x15\xb2\x1a\xfa\x15"
-			  "\xc2\xf0\x06\x59\xfa\xa0\x25\x05"
-			  "\x58\xfa\x43\x91\x16\x85\x40\xbb"
-			  "\x0d\x34\x4d\xc5\x1e\x20\xd5\x08"
-			  "\xcd\x22\x22\x41\x11\x9f\x6c\x7c"
-			  "\x8d\x57\xc9\xba\x57\xe8\x2c\xf7"
-			  "\xa0\x42\xa8\xde\xfc\xa3\xca\x98"
-			  "\x4b\x43\xb1\xce\x4b\xbf\x01\x67"
-			  "\x6e\x29\x60\xbd\x10\x14\x84\x82"
-			  "\x83\x82\x0c\x63\x73\x92\x02\x7c"
-			  "\x55\x37\x20\x80\x17\x51\xc8\xbc"
-			  "\x46\x02\xcb\x38\x07\x6d\xe2\x85"
-			  "\xaa\x29\xaf\x24\x58\x0d\xf0\x75"
-			  "\x08\x0a\xa5\x34\x25\x16\xf3\x74"
-			  "\xa7\x0b\x97\xbe\xc1\xa9\xdc\x29"
-			  "\x1a\x0a\x56\xc1\x1a\x91\x97\x8c"
-			  "\x0b\xc7\x16\xed\x5a\x22\xa6\x2e"
-			  "\x8c\x2b\x4f\x54\x76\x47\x53\x8e"
-			  "\xe8\x00\xec\x92\xb9\x55\xe6\xa2"
-			  "\xf3\xe2\x4f\x6a\x66\x60\xd0\x87"
-			  "\xe6\xd1\xcc\xe3\x6a\xc5\x2d\x21"
-			  "\xcc\x9d\x6a\xb6\x75\xaa\xe2\x19"
-			  "\x21\x9f\xa1\x5e\x4c\xfd\x72\xf9"
-			  "\x94\x4e\x63\xc7\xae\xfc\xed\x47"
-			  "\xe2\xfe\x7a\x63\x77\xfe\x97\x82"
-			  "\xb1\x10\x6e\x36\x1d\xe1\xc4\x80"
-			  "\xec\x69\x41\xec\xa7\x8a\xe0\x2f"
-			  "\xe3\x49\x26\xa2\x41\xb2\x08\x0f"
-			  "\x28\xb4\xa7\x39\xa1\x99\x2d\x1e"
-			  "\x43\x42\x35\xd0\xcf\xec\x77\x67"
-			  "\xb2\x3b\x9e\x1c\x35\xde\x4f\x5e"
-			  "\x73\x3f\x5d\x6f\x07\x4b\x2e\x50"
-			  "\xab\x6c\x6b\xff\xea\x00\x67\xaa"
-			  "\x0e\x82\x32\xdd\x3d\xb5\xe5\x76"
-			  "\x2b\x77\x3f\xbe\x12\x75\xfb\x92"
-			  "\xc6\x89\x67\x4d\xca\xf7\xd4\x50"
-			  "\xc0\x74\x47\xcc\xd9\x0a\xd4\xc6"
-			  "\x3b\x17\x2e\xe3\x35\xbb\x53\xb5"
-			  "\x86\xad\x51\xcc\xd5\x96\xb8\xdc"
-			  "\x03\x57\xe6\x98\x52\x2f\x61\x62"
-			  "\xc4\x5c\x9c\x36\x71\x07\xfb\x94"
-			  "\xe3\x02\xc4\x2b\x08\x75\xc7\x35"
-			  "\xfb\x2e\x88\x7b\xbb\x67\x00\xe1"
-			  "\xc9\xdd\x99\xb2\x13\x53\x1a\x4e"
-			  "\x76\x87\x19\x04\x1a\x2f\x38\x3e"
-			  "\xef\x91\x64\x1d\x18\x07\x4e\x31"
-			  "\x88\x21\x7c\xb0\xa5\x12\x4c\x3c"
-			  "\xb0\x20\xbd\xda\xdf\xf9\x7c\xdd",
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ilen	= 512,
+		.result	= "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e"
+			  "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09"
+			  "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3"
+			  "\x11\xc7\x39\x96\xd0\x95\xf4\x56"
+			  "\xf4\xdd\x03\x38\x01\x44\x2c\xcf"
+			  "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66"
+			  "\xfe\x3d\xc6\xfb\x01\x23\x51\x43"
+			  "\xd5\xd2\x13\x86\x94\x34\xe9\x62"
+			  "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef"
+			  "\x76\x35\x04\x3f\xdb\x23\x9d\x0b"
+			  "\x85\x42\xb9\x02\xd6\xcc\xdb\x96"
+			  "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d"
+			  "\xae\xd2\x04\xd5\xda\xc1\x7e\x24"
+			  "\x8c\x73\xbe\x48\x7e\xcf\x65\x28"
+			  "\x29\xe5\xbe\x54\x30\xcb\x46\x95"
+			  "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe"
+			  "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69"
+			  "\xa1\x09\x95\x71\x26\xe9\xc4\xdf"
+			  "\xe6\x31\xc3\x46\xda\xaf\x0b\x41"
+			  "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3"
+			  "\x82\xc0\x37\x27\xfc\x91\xa7\x05"
+			  "\xfb\xc5\xdc\x2b\x74\x96\x48\x43"
+			  "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f"
+			  "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a"
+			  "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c"
+			  "\x07\xff\xf3\x72\x74\x48\xb5\x40"
+			  "\x50\xb5\xdd\x90\x43\x31\x18\x15"
+			  "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a"
+			  "\x29\x93\x90\x8b\xda\x07\xf0\x35"
+			  "\x6d\x90\x88\x09\x4e\x83\xf5\x5b"
+			  "\x94\x12\xbb\x33\x27\x1d\x3f\x23"
+			  "\x51\xa8\x7c\x07\xa2\xae\x77\xa6"
+			  "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f"
+			  "\x66\xdd\xcd\x75\x24\x8b\x33\xf7"
+			  "\x20\xdb\x83\x9b\x4f\x11\x63\x6e"
+			  "\xcf\x37\xef\xc9\x11\x01\x5c\x45"
+			  "\x32\x99\x7c\x3c\x9e\x42\x89\xe3"
+			  "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05"
+			  "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc"
+			  "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d"
+			  "\xa0\xa8\x89\x3b\x73\x39\xa5\x94"
+			  "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89"
+			  "\x10\xff\xaf\xef\xca\xdd\x4f\x80"
+			  "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7"
+			  "\x33\xca\x00\x8b\x8b\x3f\xea\xec"
+			  "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f"
+			  "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5"
+			  "\x64\xa3\xf1\x1a\x76\x28\xcc\x35"
+			  "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b"
+			  "\xc7\x1b\x53\x17\x02\xea\xd1\xad"
+			  "\x13\x51\x73\xc0\xa0\xb2\x05\x32"
+			  "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19"
+			  "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d"
+			  "\x59\xda\xee\x1a\x22\x18\xda\x0d"
+			  "\x88\x0f\x55\x8b\x72\x62\xfd\xc1"
+			  "\x69\x13\xcd\x0d\x5f\xc1\x09\x52"
+			  "\xee\xd6\xe3\x84\x4d\xee\xf6\x88"
+			  "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f"
+			  "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54"
+			  "\x7d\x69\x8d\x00\x62\x77\x0d\x14"
+			  "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3"
+			  "\x50\xf7\x5f\xf4\xc2\xca\x41\x97"
+			  "\x37\xbe\x75\x74\xcd\xf0\x75\x6e"
+			  "\x25\x23\x94\xbd\xda\x8d\xb0\xd4",
 		.rlen	= 512,
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
 			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95"
-			  "\x02\x88\x41\x97\x16\x93\x99\x37"
-			  "\x51\x05\x82\x09\x74\x94\x45\x92",
-		.klen	= 64,
+			  "\x99\x59\x57\x49\x66\x96\x76\x27",
+		.klen	= 32,
 		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
@@ -13909,92 +15795,89 @@ static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
 		.ilen	= 512,
-		.result	= "\x2b\xc9\xb4\x6b\x10\x94\xa9\x32"
-			  "\xaa\xb0\x20\xc6\x44\x3d\x74\x1f"
-			  "\x75\x01\xa7\xf6\xf5\xf7\x62\x1b"
-			  "\x80\x1b\x82\xcb\x01\x59\x91\x7f"
-			  "\x80\x3a\x98\xf0\xd2\xca\xc4\xc3"
-			  "\x34\xfd\xe6\x11\xf9\x33\x45\x12"
-			  "\x48\xc5\x8c\x25\xf1\xc5\xc5\x23"
-			  "\xd3\x44\xb4\x73\xd5\x04\xc0\xb7"
-			  "\xca\x2f\xf5\xcd\xc5\xb4\xdd\xb0"
-			  "\xf4\x60\xe8\xfb\xc6\x9c\xc5\x78"
-			  "\xcd\xec\x7d\xdc\x19\x9c\x72\x64"
-			  "\x63\x0b\x38\x2e\x76\xdd\x2d\x36"
-			  "\x49\xb0\x1d\xea\x78\x9e\x00\xca"
-			  "\x20\xcc\x1b\x1e\x98\x74\xab\xed"
-			  "\x79\xf7\xd0\x6c\xd8\x93\x80\x29"
-			  "\xac\xa5\x5e\x34\xa9\xab\xa0\x55"
-			  "\x9a\xea\xaa\x95\x4d\x7b\xfe\x46"
-			  "\x26\x8a\xfd\x88\xa2\xa8\xa6\xae"
-			  "\x25\x42\x17\xbf\x76\x8f\x1c\x3d"
-			  "\xec\x9a\xda\x64\x96\xb5\x61\xff"
-			  "\x99\xeb\x12\x96\x85\x82\x9d\xd5"
-			  "\x81\x85\x14\xa8\x59\xac\x8c\x94"
-			  "\xbb\x3b\x85\x2b\xdf\xb3\x0c\xba"
-			  "\x82\xc6\x4d\xca\x86\xea\x53\x28"
-			  "\x4c\xe0\x4e\x31\xe3\x73\x2f\x79"
-			  "\x9d\x42\xe1\x03\xe3\x8b\xc4\xff"
-			  "\x05\xca\x81\x7b\xda\xa2\xde\x63"
-			  "\x3a\x10\xbe\xc2\xac\x32\xc4\x05"
-			  "\x47\x7e\xef\x67\xe2\x5f\x5b\xae"
-			  "\xed\xf1\x70\x34\x16\x9a\x07\x7b"
-			  "\xf2\x25\x2b\xb0\xf8\x3c\x15\x9a"
-			  "\xa6\x59\x55\x5f\xc1\xf4\x1e\xcd"
-			  "\x93\x1f\x06\xba\xd4\x9a\x22\x69"
-			  "\xfa\x8e\x95\x0d\xf3\x23\x59\x2c"
-			  "\xfe\x00\xba\xf0\x0e\xbc\x6d\xd6"
-			  "\x62\xf0\x7a\x0e\x83\x3e\xdb\x32"
-			  "\xfd\x43\x7d\xda\x42\x51\x87\x43"
-			  "\x9d\xf9\xef\xf4\x30\x97\xf8\x09"
-			  "\x88\xfc\x3f\x93\x70\xc1\x4a\xec"
-			  "\x27\x5f\x11\xac\x71\xc7\x48\x46"
-			  "\x2f\xf9\xdf\x8d\x9f\xf7\x2e\x56"
-			  "\x0d\x4e\xb0\x32\x76\xce\x86\x81"
-			  "\xcd\xdf\xe4\x00\xbf\xfd\x5f\x24"
-			  "\xaf\xf7\x9a\xde\xff\x18\xac\x14"
-			  "\x90\xc5\x01\x39\x34\x0f\x24\xf3"
-			  "\x13\x2f\x5e\x4f\x30\x9a\x36\x40"
-			  "\xec\xea\xbc\xcd\x9e\x0e\x5b\x23"
-			  "\x50\x88\x97\x40\x69\xb1\x37\xf5"
-			  "\xc3\x15\xf9\x3f\xb7\x79\x64\xe8"
-			  "\x7b\x10\x20\xb9\x2b\x46\x83\x5b"
-			  "\xd8\x39\xfc\xe4\xfa\x88\x52\xf2"
-			  "\x72\xb0\x97\x4e\x89\xb3\x48\x00"
-			  "\xc1\x16\x73\x50\x77\xba\xa6\x65"
-			  "\x20\x2d\xb0\x02\x27\x89\xda\x99"
-			  "\x45\xfb\xe9\xd3\x1d\x39\x2f\xd6"
-			  "\x2a\xda\x09\x12\x11\xaf\xe6\x57"
-			  "\x01\x04\x8a\xff\x86\x8b\xac\xf8"
-			  "\xee\xe4\x1c\x98\x5b\xcf\x6b\x76"
-			  "\xa3\x0e\x33\x74\x40\x18\x39\x72"
-			  "\x66\x50\x31\xfd\x70\xdf\xe8\x51"
-			  "\x96\x21\x36\xb2\x9b\xfa\x85\xd1"
-			  "\x30\x05\xc8\x92\x98\x80\xff\x7a"
-			  "\xaf\x43\x0b\xc5\x20\x41\x92\x20"
-			  "\xd4\xa0\x91\x98\x11\x5f\x4d\xb1",
+		.result	= "\x55\xed\x71\xd3\x02\x8e\x15\x3b"
+			  "\xc6\x71\x29\x2d\x3e\x89\x9f\x59"
+			  "\x68\x6a\xcc\x8a\x56\x97\xf3\x95"
+			  "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c"
+			  "\x78\x16\xea\x80\xdb\x33\x75\x94"
+			  "\xf9\x29\xc4\x2b\x76\x75\x97\xc7"
+			  "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b"
+			  "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee"
+			  "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a"
+			  "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c"
+			  "\xf5\xec\x32\x74\xa3\xb8\x03\x88"
+			  "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f"
+			  "\x84\x5e\x46\xed\x20\x89\xb6\x44"
+			  "\x8d\xd0\xed\x54\x47\x16\xbe\x95"
+			  "\x8a\xb3\x6b\x72\xc4\x32\x52\x13"
+			  "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6"
+			  "\x44\x18\xdd\x8c\x6e\xca\x6e\x45"
+			  "\x8f\x1e\x10\x07\x57\x25\x98\x7b"
+			  "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8"
+			  "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb"
+			  "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff"
+			  "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e"
+			  "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d"
+			  "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65"
+			  "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a"
+			  "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a"
+			  "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78"
+			  "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3"
+			  "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e"
+			  "\x35\x10\x30\x82\x0d\xe7\xc5\x9b"
+			  "\xde\x44\x18\xbd\x9f\xd1\x45\xa9"
+			  "\x7b\x7a\x4a\xad\x35\x65\x27\xca"
+			  "\xb2\xc3\xd4\x9b\x71\x86\x70\xee"
+			  "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf"
+			  "\xfc\x42\xc8\x31\x59\xbe\x16\x60"
+			  "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14"
+			  "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef"
+			  "\x52\x7f\x29\x51\x94\x20\x67\x3c"
+			  "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63"
+			  "\xe7\xff\x73\x25\xd1\xdd\x96\x8a"
+			  "\x98\x52\x6d\xf3\xac\x3e\xf2\x18"
+			  "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed"
+			  "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e"
+			  "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad"
+			  "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa"
+			  "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81"
+			  "\x65\x53\x0f\x41\x11\xbd\x98\x99"
+			  "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d"
+			  "\x84\x98\xf9\x34\xed\x33\x2a\x1f"
+			  "\x82\xed\xc1\x73\x98\xd3\x02\xdc"
+			  "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76"
+			  "\x63\x51\x34\x9d\x96\x12\xae\xce"
+			  "\x83\xc9\x76\x5e\xa4\x1b\x53\x37"
+			  "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d"
+			  "\x54\x27\x74\xbb\x10\x86\x57\x46"
+			  "\x68\xe1\xed\x14\xe7\x9d\xfc\x84"
+			  "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf"
+			  "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d"
+			  "\x7b\x4f\x38\x55\x36\x71\x64\xc1"
+			  "\xfc\x5c\x75\x52\x33\x02\x18\xf8"
+			  "\x17\xe1\x2b\xc2\x43\x39\xbd\x76"
+			  "\x9b\x63\x76\x32\x2f\x19\x72\x10"
+			  "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5"
+			  "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c",
 		.rlen	= 512,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
-	},
+	}
 };
 
-static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
-	/* Generated from AES-XTS test vectors */
-	/* same as enc vectors with input and result reversed */
+static const struct cipher_testvec speck64_xts_dec_tv_template[] = {
 	{
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 32,
+		.klen	= 24,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xe1\x08\xb8\x1d\x2c\xf5\x33\x64"
-			  "\xc8\x12\x04\xc7\xb3\x70\xe8\xc4"
-			  "\x6a\x31\xc5\xf3\x00\xca\xb9\x16"
-			  "\xde\xe2\x77\x66\xf7\xfe\x62\x08",
+		.input	= "\x84\xaf\x54\x07\x19\xd4\x7c\xa6"
+			  "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2"
+			  "\x80\xf5\x72\xe7\xcd\xf0\x99\x22"
+			  "\x35\xa7\x2f\x06\xef\xdc\x51\xaa",
 		.ilen	= 32,
 		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -14004,15 +15887,14 @@ static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
 	}, {
 		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
 			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
 			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
+		.klen	= 24,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x1a\x0a\x09\x5f\xcd\x07\x07\x98"
-			  "\x41\x86\x12\xaf\xb3\xd7\x68\x13"
-			  "\xed\x81\xcd\x06\x87\x43\x1a\xbb"
-			  "\x13\x3d\xd6\x1e\x2b\xe1\x77\xbe",
+		.input	= "\x12\x56\x73\xcd\x15\x87\xa8\x59"
+			  "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f"
+			  "\xb3\x12\x69\x7e\x36\xeb\x52\xff"
+			  "\x62\xdd\xba\x90\xb3\xe1\xee\x99",
 		.ilen	= 32,
 		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
@@ -14022,15 +15904,14 @@ static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
 	}, {
 		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
 			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
 			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
+		.klen	= 24,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xf9\x9b\x28\xb8\x5c\xaf\x8c\x61"
-			  "\xb6\x1c\x81\x8f\x2c\x87\x60\x89"
-			  "\x0d\x8d\x7a\xe8\x60\x48\xcc\x86"
-			  "\xc1\x68\x45\xaa\x00\xe9\x24\xc5",
+		.input	= "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c"
+			  "\x27\x36\xc0\xbf\x5d\xea\x36\x37"
+			  "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b"
+			  "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34",
 		.ilen	= 32,
 		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
@@ -14040,75 +15921,74 @@ static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95",
-		.klen	= 32,
+			  "\x31\x41\x59\x26\x53\x58\x97\x93",
+		.klen	= 24,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xfe\x47\x4a\xc8\x60\x7e\xb4\x8b"
-			  "\x0d\x10\xf4\xb0\x0d\xba\xf8\x53"
-			  "\x65\x6e\x38\x4b\xdb\xaa\xb1\x9e"
-			  "\x28\xca\xb0\x22\xb3\x85\x75\xf4"
-			  "\x00\x5c\x75\x14\x06\xd6\x25\x82"
-			  "\xe6\xcb\x08\xf7\x29\x90\x23\x8e"
-			  "\xa4\x68\x57\xe4\xf0\xd8\x32\xf3"
-			  "\x80\x51\x67\xb5\x0b\x85\x69\xe8"
-			  "\x19\xfe\xc4\xc7\x3e\xea\x90\xd3"
-			  "\x8f\xa3\xf2\x0a\xac\x17\x4b\xa0"
-			  "\x63\x5a\x16\x0f\xf0\xce\x66\x1f"
-			  "\x2c\x21\x07\xf1\xa4\x03\xa3\x44"
-			  "\x41\x61\x87\x5d\x6b\xb3\xef\xd4"
-			  "\xfc\xaa\x32\x7e\x55\x58\x04\x41"
-			  "\xc9\x07\x33\xc6\xa2\x68\xd6\x5a"
-			  "\x55\x79\x4b\x6f\xcf\x89\xb9\x19"
-			  "\xe5\x54\x13\x15\xb2\x1a\xfa\x15"
-			  "\xc2\xf0\x06\x59\xfa\xa0\x25\x05"
-			  "\x58\xfa\x43\x91\x16\x85\x40\xbb"
-			  "\x0d\x34\x4d\xc5\x1e\x20\xd5\x08"
-			  "\xcd\x22\x22\x41\x11\x9f\x6c\x7c"
-			  "\x8d\x57\xc9\xba\x57\xe8\x2c\xf7"
-			  "\xa0\x42\xa8\xde\xfc\xa3\xca\x98"
-			  "\x4b\x43\xb1\xce\x4b\xbf\x01\x67"
-			  "\x6e\x29\x60\xbd\x10\x14\x84\x82"
-			  "\x83\x82\x0c\x63\x73\x92\x02\x7c"
-			  "\x55\x37\x20\x80\x17\x51\xc8\xbc"
-			  "\x46\x02\xcb\x38\x07\x6d\xe2\x85"
-			  "\xaa\x29\xaf\x24\x58\x0d\xf0\x75"
-			  "\x08\x0a\xa5\x34\x25\x16\xf3\x74"
-			  "\xa7\x0b\x97\xbe\xc1\xa9\xdc\x29"
-			  "\x1a\x0a\x56\xc1\x1a\x91\x97\x8c"
-			  "\x0b\xc7\x16\xed\x5a\x22\xa6\x2e"
-			  "\x8c\x2b\x4f\x54\x76\x47\x53\x8e"
-			  "\xe8\x00\xec\x92\xb9\x55\xe6\xa2"
-			  "\xf3\xe2\x4f\x6a\x66\x60\xd0\x87"
-			  "\xe6\xd1\xcc\xe3\x6a\xc5\x2d\x21"
-			  "\xcc\x9d\x6a\xb6\x75\xaa\xe2\x19"
-			  "\x21\x9f\xa1\x5e\x4c\xfd\x72\xf9"
-			  "\x94\x4e\x63\xc7\xae\xfc\xed\x47"
-			  "\xe2\xfe\x7a\x63\x77\xfe\x97\x82"
-			  "\xb1\x10\x6e\x36\x1d\xe1\xc4\x80"
-			  "\xec\x69\x41\xec\xa7\x8a\xe0\x2f"
-			  "\xe3\x49\x26\xa2\x41\xb2\x08\x0f"
-			  "\x28\xb4\xa7\x39\xa1\x99\x2d\x1e"
-			  "\x43\x42\x35\xd0\xcf\xec\x77\x67"
-			  "\xb2\x3b\x9e\x1c\x35\xde\x4f\x5e"
-			  "\x73\x3f\x5d\x6f\x07\x4b\x2e\x50"
-			  "\xab\x6c\x6b\xff\xea\x00\x67\xaa"
-			  "\x0e\x82\x32\xdd\x3d\xb5\xe5\x76"
-			  "\x2b\x77\x3f\xbe\x12\x75\xfb\x92"
-			  "\xc6\x89\x67\x4d\xca\xf7\xd4\x50"
-			  "\xc0\x74\x47\xcc\xd9\x0a\xd4\xc6"
-			  "\x3b\x17\x2e\xe3\x35\xbb\x53\xb5"
-			  "\x86\xad\x51\xcc\xd5\x96\xb8\xdc"
-			  "\x03\x57\xe6\x98\x52\x2f\x61\x62"
-			  "\xc4\x5c\x9c\x36\x71\x07\xfb\x94"
-			  "\xe3\x02\xc4\x2b\x08\x75\xc7\x35"
-			  "\xfb\x2e\x88\x7b\xbb\x67\x00\xe1"
-			  "\xc9\xdd\x99\xb2\x13\x53\x1a\x4e"
-			  "\x76\x87\x19\x04\x1a\x2f\x38\x3e"
-			  "\xef\x91\x64\x1d\x18\x07\x4e\x31"
-			  "\x88\x21\x7c\xb0\xa5\x12\x4c\x3c"
-			  "\xb0\x20\xbd\xda\xdf\xf9\x7c\xdd",
+		.input	= "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e"
+			  "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09"
+			  "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3"
+			  "\x11\xc7\x39\x96\xd0\x95\xf4\x56"
+			  "\xf4\xdd\x03\x38\x01\x44\x2c\xcf"
+			  "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66"
+			  "\xfe\x3d\xc6\xfb\x01\x23\x51\x43"
+			  "\xd5\xd2\x13\x86\x94\x34\xe9\x62"
+			  "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef"
+			  "\x76\x35\x04\x3f\xdb\x23\x9d\x0b"
+			  "\x85\x42\xb9\x02\xd6\xcc\xdb\x96"
+			  "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d"
+			  "\xae\xd2\x04\xd5\xda\xc1\x7e\x24"
+			  "\x8c\x73\xbe\x48\x7e\xcf\x65\x28"
+			  "\x29\xe5\xbe\x54\x30\xcb\x46\x95"
+			  "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe"
+			  "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69"
+			  "\xa1\x09\x95\x71\x26\xe9\xc4\xdf"
+			  "\xe6\x31\xc3\x46\xda\xaf\x0b\x41"
+			  "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3"
+			  "\x82\xc0\x37\x27\xfc\x91\xa7\x05"
+			  "\xfb\xc5\xdc\x2b\x74\x96\x48\x43"
+			  "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f"
+			  "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a"
+			  "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c"
+			  "\x07\xff\xf3\x72\x74\x48\xb5\x40"
+			  "\x50\xb5\xdd\x90\x43\x31\x18\x15"
+			  "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a"
+			  "\x29\x93\x90\x8b\xda\x07\xf0\x35"
+			  "\x6d\x90\x88\x09\x4e\x83\xf5\x5b"
+			  "\x94\x12\xbb\x33\x27\x1d\x3f\x23"
+			  "\x51\xa8\x7c\x07\xa2\xae\x77\xa6"
+			  "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f"
+			  "\x66\xdd\xcd\x75\x24\x8b\x33\xf7"
+			  "\x20\xdb\x83\x9b\x4f\x11\x63\x6e"
+			  "\xcf\x37\xef\xc9\x11\x01\x5c\x45"
+			  "\x32\x99\x7c\x3c\x9e\x42\x89\xe3"
+			  "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05"
+			  "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc"
+			  "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d"
+			  "\xa0\xa8\x89\x3b\x73\x39\xa5\x94"
+			  "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89"
+			  "\x10\xff\xaf\xef\xca\xdd\x4f\x80"
+			  "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7"
+			  "\x33\xca\x00\x8b\x8b\x3f\xea\xec"
+			  "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f"
+			  "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5"
+			  "\x64\xa3\xf1\x1a\x76\x28\xcc\x35"
+			  "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b"
+			  "\xc7\x1b\x53\x17\x02\xea\xd1\xad"
+			  "\x13\x51\x73\xc0\xa0\xb2\x05\x32"
+			  "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19"
+			  "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d"
+			  "\x59\xda\xee\x1a\x22\x18\xda\x0d"
+			  "\x88\x0f\x55\x8b\x72\x62\xfd\xc1"
+			  "\x69\x13\xcd\x0d\x5f\xc1\x09\x52"
+			  "\xee\xd6\xe3\x84\x4d\xee\xf6\x88"
+			  "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f"
+			  "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54"
+			  "\x7d\x69\x8d\x00\x62\x77\x0d\x14"
+			  "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3"
+			  "\x50\xf7\x5f\xf4\xc2\xca\x41\x97"
+			  "\x37\xbe\x75\x74\xcd\xf0\x75\x6e"
+			  "\x25\x23\x94\xbd\xda\x8d\xb0\xd4",
 		.ilen	= 512,
 		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
@@ -14179,78 +16059,74 @@ static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
 			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95"
-			  "\x02\x88\x41\x97\x16\x93\x99\x37"
-			  "\x51\x05\x82\x09\x74\x94\x45\x92",
-		.klen	= 64,
+			  "\x99\x59\x57\x49\x66\x96\x76\x27",
+		.klen	= 32,
 		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x2b\xc9\xb4\x6b\x10\x94\xa9\x32"
-			  "\xaa\xb0\x20\xc6\x44\x3d\x74\x1f"
-			  "\x75\x01\xa7\xf6\xf5\xf7\x62\x1b"
-			  "\x80\x1b\x82\xcb\x01\x59\x91\x7f"
-			  "\x80\x3a\x98\xf0\xd2\xca\xc4\xc3"
-			  "\x34\xfd\xe6\x11\xf9\x33\x45\x12"
-			  "\x48\xc5\x8c\x25\xf1\xc5\xc5\x23"
-			  "\xd3\x44\xb4\x73\xd5\x04\xc0\xb7"
-			  "\xca\x2f\xf5\xcd\xc5\xb4\xdd\xb0"
-			  "\xf4\x60\xe8\xfb\xc6\x9c\xc5\x78"
-			  "\xcd\xec\x7d\xdc\x19\x9c\x72\x64"
-			  "\x63\x0b\x38\x2e\x76\xdd\x2d\x36"
-			  "\x49\xb0\x1d\xea\x78\x9e\x00\xca"
-			  "\x20\xcc\x1b\x1e\x98\x74\xab\xed"
-			  "\x79\xf7\xd0\x6c\xd8\x93\x80\x29"
-			  "\xac\xa5\x5e\x34\xa9\xab\xa0\x55"
-			  "\x9a\xea\xaa\x95\x4d\x7b\xfe\x46"
-			  "\x26\x8a\xfd\x88\xa2\xa8\xa6\xae"
-			  "\x25\x42\x17\xbf\x76\x8f\x1c\x3d"
-			  "\xec\x9a\xda\x64\x96\xb5\x61\xff"
-			  "\x99\xeb\x12\x96\x85\x82\x9d\xd5"
-			  "\x81\x85\x14\xa8\x59\xac\x8c\x94"
-			  "\xbb\x3b\x85\x2b\xdf\xb3\x0c\xba"
-			  "\x82\xc6\x4d\xca\x86\xea\x53\x28"
-			  "\x4c\xe0\x4e\x31\xe3\x73\x2f\x79"
-			  "\x9d\x42\xe1\x03\xe3\x8b\xc4\xff"
-			  "\x05\xca\x81\x7b\xda\xa2\xde\x63"
-			  "\x3a\x10\xbe\xc2\xac\x32\xc4\x05"
-			  "\x47\x7e\xef\x67\xe2\x5f\x5b\xae"
-			  "\xed\xf1\x70\x34\x16\x9a\x07\x7b"
-			  "\xf2\x25\x2b\xb0\xf8\x3c\x15\x9a"
-			  "\xa6\x59\x55\x5f\xc1\xf4\x1e\xcd"
-			  "\x93\x1f\x06\xba\xd4\x9a\x22\x69"
-			  "\xfa\x8e\x95\x0d\xf3\x23\x59\x2c"
-			  "\xfe\x00\xba\xf0\x0e\xbc\x6d\xd6"
-			  "\x62\xf0\x7a\x0e\x83\x3e\xdb\x32"
-			  "\xfd\x43\x7d\xda\x42\x51\x87\x43"
-			  "\x9d\xf9\xef\xf4\x30\x97\xf8\x09"
-			  "\x88\xfc\x3f\x93\x70\xc1\x4a\xec"
-			  "\x27\x5f\x11\xac\x71\xc7\x48\x46"
-			  "\x2f\xf9\xdf\x8d\x9f\xf7\x2e\x56"
-			  "\x0d\x4e\xb0\x32\x76\xce\x86\x81"
-			  "\xcd\xdf\xe4\x00\xbf\xfd\x5f\x24"
-			  "\xaf\xf7\x9a\xde\xff\x18\xac\x14"
-			  "\x90\xc5\x01\x39\x34\x0f\x24\xf3"
-			  "\x13\x2f\x5e\x4f\x30\x9a\x36\x40"
-			  "\xec\xea\xbc\xcd\x9e\x0e\x5b\x23"
-			  "\x50\x88\x97\x40\x69\xb1\x37\xf5"
-			  "\xc3\x15\xf9\x3f\xb7\x79\x64\xe8"
-			  "\x7b\x10\x20\xb9\x2b\x46\x83\x5b"
-			  "\xd8\x39\xfc\xe4\xfa\x88\x52\xf2"
-			  "\x72\xb0\x97\x4e\x89\xb3\x48\x00"
-			  "\xc1\x16\x73\x50\x77\xba\xa6\x65"
-			  "\x20\x2d\xb0\x02\x27\x89\xda\x99"
-			  "\x45\xfb\xe9\xd3\x1d\x39\x2f\xd6"
-			  "\x2a\xda\x09\x12\x11\xaf\xe6\x57"
-			  "\x01\x04\x8a\xff\x86\x8b\xac\xf8"
-			  "\xee\xe4\x1c\x98\x5b\xcf\x6b\x76"
-			  "\xa3\x0e\x33\x74\x40\x18\x39\x72"
-			  "\x66\x50\x31\xfd\x70\xdf\xe8\x51"
-			  "\x96\x21\x36\xb2\x9b\xfa\x85\xd1"
-			  "\x30\x05\xc8\x92\x98\x80\xff\x7a"
-			  "\xaf\x43\x0b\xc5\x20\x41\x92\x20"
-			  "\xd4\xa0\x91\x98\x11\x5f\x4d\xb1",
+		.input	= "\x55\xed\x71\xd3\x02\x8e\x15\x3b"
+			  "\xc6\x71\x29\x2d\x3e\x89\x9f\x59"
+			  "\x68\x6a\xcc\x8a\x56\x97\xf3\x95"
+			  "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c"
+			  "\x78\x16\xea\x80\xdb\x33\x75\x94"
+			  "\xf9\x29\xc4\x2b\x76\x75\x97\xc7"
+			  "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b"
+			  "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee"
+			  "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a"
+			  "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c"
+			  "\xf5\xec\x32\x74\xa3\xb8\x03\x88"
+			  "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f"
+			  "\x84\x5e\x46\xed\x20\x89\xb6\x44"
+			  "\x8d\xd0\xed\x54\x47\x16\xbe\x95"
+			  "\x8a\xb3\x6b\x72\xc4\x32\x52\x13"
+			  "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6"
+			  "\x44\x18\xdd\x8c\x6e\xca\x6e\x45"
+			  "\x8f\x1e\x10\x07\x57\x25\x98\x7b"
+			  "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8"
+			  "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb"
+			  "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff"
+			  "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e"
+			  "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d"
+			  "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65"
+			  "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a"
+			  "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a"
+			  "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78"
+			  "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3"
+			  "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e"
+			  "\x35\x10\x30\x82\x0d\xe7\xc5\x9b"
+			  "\xde\x44\x18\xbd\x9f\xd1\x45\xa9"
+			  "\x7b\x7a\x4a\xad\x35\x65\x27\xca"
+			  "\xb2\xc3\xd4\x9b\x71\x86\x70\xee"
+			  "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf"
+			  "\xfc\x42\xc8\x31\x59\xbe\x16\x60"
+			  "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14"
+			  "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef"
+			  "\x52\x7f\x29\x51\x94\x20\x67\x3c"
+			  "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63"
+			  "\xe7\xff\x73\x25\xd1\xdd\x96\x8a"
+			  "\x98\x52\x6d\xf3\xac\x3e\xf2\x18"
+			  "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed"
+			  "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e"
+			  "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad"
+			  "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa"
+			  "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81"
+			  "\x65\x53\x0f\x41\x11\xbd\x98\x99"
+			  "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d"
+			  "\x84\x98\xf9\x34\xed\x33\x2a\x1f"
+			  "\x82\xed\xc1\x73\x98\xd3\x02\xdc"
+			  "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76"
+			  "\x63\x51\x34\x9d\x96\x12\xae\xce"
+			  "\x83\xc9\x76\x5e\xa4\x1b\x53\x37"
+			  "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d"
+			  "\x54\x27\x74\xbb\x10\x86\x57\x46"
+			  "\x68\xe1\xed\x14\xe7\x9d\xfc\x84"
+			  "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf"
+			  "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d"
+			  "\x7b\x4f\x38\x55\x36\x71\x64\xc1"
+			  "\xfc\x5c\x75\x52\x33\x02\x18\xf8"
+			  "\x17\xe1\x2b\xc2\x43\x39\xbd\x76"
+			  "\x9b\x63\x76\x32\x2f\x19\x72\x10"
+			  "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5"
+			  "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c",
 		.ilen	= 512,
 		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
@@ -14320,7 +16196,7 @@ static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
-	},
+	}
 };
 
 /* Cast6 test vectors from RFC 2612 */

+ 0 - 72
crypto/xts.c

@@ -357,78 +357,6 @@ static int decrypt(struct skcipher_request *req)
 	return do_decrypt(req, init_crypt(req, decrypt_done));
 }
 
-int xts_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
-	      struct scatterlist *ssrc, unsigned int nbytes,
-	      struct xts_crypt_req *req)
-{
-	const unsigned int bsize = XTS_BLOCK_SIZE;
-	const unsigned int max_blks = req->tbuflen / bsize;
-	struct blkcipher_walk walk;
-	unsigned int nblocks;
-	le128 *src, *dst, *t;
-	le128 *t_buf = req->tbuf;
-	int err, i;
-
-	BUG_ON(max_blks < 1);
-
-	blkcipher_walk_init(&walk, sdst, ssrc, nbytes);
-
-	err = blkcipher_walk_virt(desc, &walk);
-	nbytes = walk.nbytes;
-	if (!nbytes)
-		return err;
-
-	nblocks = min(nbytes / bsize, max_blks);
-	src = (le128 *)walk.src.virt.addr;
-	dst = (le128 *)walk.dst.virt.addr;
-
-	/* calculate first value of T */
-	req->tweak_fn(req->tweak_ctx, (u8 *)&t_buf[0], walk.iv);
-
-	i = 0;
-	goto first;
-
-	for (;;) {
-		do {
-			for (i = 0; i < nblocks; i++) {
-				gf128mul_x_ble(&t_buf[i], t);
-first:
-				t = &t_buf[i];
-
-				/* PP <- T xor P */
-				le128_xor(dst + i, t, src + i);
-			}
-
-			/* CC <- E(Key2,PP) */
-			req->crypt_fn(req->crypt_ctx, (u8 *)dst,
-				      nblocks * bsize);
-
-			/* C <- T xor CC */
-			for (i = 0; i < nblocks; i++)
-				le128_xor(dst + i, dst + i, &t_buf[i]);
-
-			src += nblocks;
-			dst += nblocks;
-			nbytes -= nblocks * bsize;
-			nblocks = min(nbytes / bsize, max_blks);
-		} while (nblocks > 0);
-
-		*(le128 *)walk.iv = *t;
-
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-		nbytes = walk.nbytes;
-		if (!nbytes)
-			break;
-
-		nblocks = min(nbytes / bsize, max_blks);
-		src = (le128 *)walk.src.virt.addr;
-		dst = (le128 *)walk.dst.virt.addr;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(xts_crypt);
-
 static int init_tfm(struct crypto_skcipher *tfm)
 {
 	struct skcipher_instance *inst = skcipher_alg_instance(tfm);

+ 7 - 0
drivers/char/hw_random/Kconfig

@@ -452,3 +452,10 @@ config UML_RANDOM
 	  (check your distro, or download from
 	  http://sourceforge.net/projects/gkernel/).  rngd periodically reads
 	  /dev/hwrng and injects the entropy into /dev/random.
+
+config HW_RANDOM_KEYSTONE
+	depends on ARCH_KEYSTONE
+	default HW_RANDOM
+	tristate "TI Keystone NETCP SA Hardware random number generator"
+	help
+	  This option enables Keystone's hardware random generator.

+ 1 - 0
drivers/char/hw_random/Makefile

@@ -38,3 +38,4 @@ obj-$(CONFIG_HW_RANDOM_MESON) += meson-rng.o
 obj-$(CONFIG_HW_RANDOM_CAVIUM) += cavium-rng.o cavium-rng-vf.o
 obj-$(CONFIG_HW_RANDOM_MTK)	+= mtk-rng.o
 obj-$(CONFIG_HW_RANDOM_S390) += s390-trng.o
+obj-$(CONFIG_HW_RANDOM_KEYSTONE) += ks-sa-rng.o

+ 2 - 0
drivers/char/hw_random/bcm2835-rng.c

@@ -163,6 +163,8 @@ static int bcm2835_rng_probe(struct platform_device *pdev)
 
 	/* Clock is optional on most platforms */
 	priv->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
 
 	priv->rng.name = pdev->name;
 	priv->rng.init = bcm2835_rng_init;

+ 1 - 1
drivers/char/hw_random/cavium-rng-vf.c

@@ -77,7 +77,7 @@ static int cavium_rng_probe_vf(struct	pci_dev		*pdev,
 }
 
 /* Remove the VF */
-void  cavium_rng_remove_vf(struct pci_dev *pdev)
+static void  cavium_rng_remove_vf(struct pci_dev *pdev)
 {
 	struct cavium_rng *rng;
 

+ 1 - 1
drivers/char/hw_random/cavium-rng.c

@@ -62,7 +62,7 @@ static int cavium_rng_probe(struct pci_dev *pdev,
 }
 
 /* Disable VF and RNG Hardware */
-void  cavium_rng_remove(struct pci_dev *pdev)
+static void cavium_rng_remove(struct pci_dev *pdev)
 {
 	struct cavium_rng_pf *rng;
 

+ 1 - 1
drivers/char/hw_random/imx-rngc.c

@@ -300,7 +300,7 @@ static int __maybe_unused imx_rngc_resume(struct device *dev)
 	return 0;
 }
 
-SIMPLE_DEV_PM_OPS(imx_rngc_pm_ops, imx_rngc_suspend, imx_rngc_resume);
+static SIMPLE_DEV_PM_OPS(imx_rngc_pm_ops, imx_rngc_suspend, imx_rngc_resume);
 
 static const struct of_device_id imx_rngc_dt_ids[] = {
 	{ .compatible = "fsl,imx25-rngb", .data = NULL, },

+ 257 - 0
drivers/char/hw_random/ks-sa-rng.c

@@ -0,0 +1,257 @@
+/*
+ * Random Number Generator driver for the Keystone SOC
+ *
+ * Copyright (C) 2016 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Authors:	Sandeep Nair
+ *		Vitaly Andrianov
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/hw_random.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/pm_runtime.h>
+#include <linux/err.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+
+#define SA_CMD_STATUS_OFS			0x8
+
+/* TRNG enable control in SA System module*/
+#define SA_CMD_STATUS_REG_TRNG_ENABLE		BIT(3)
+
+/* TRNG start control in TRNG module */
+#define TRNG_CNTL_REG_TRNG_ENABLE		BIT(10)
+
+/* Data ready indicator in STATUS register */
+#define TRNG_STATUS_REG_READY			BIT(0)
+
+/* Data ready clear control in INTACK register */
+#define TRNG_INTACK_REG_READY			BIT(0)
+
+/*
+ * Number of samples taken to gather entropy during startup.
+ * If value is 0, the number of samples is 2^24 else
+ * equals value times 2^8.
+ */
+#define TRNG_DEF_STARTUP_CYCLES			0
+#define TRNG_CNTL_REG_STARTUP_CYCLES_SHIFT	16
+
+/*
+ * Minimum number of samples taken to regenerate entropy
+ * If value is 0, the number of samples is 2^24 else
+ * equals value times 2^6.
+ */
+#define TRNG_DEF_MIN_REFILL_CYCLES		1
+#define TRNG_CFG_REG_MIN_REFILL_CYCLES_SHIFT	0
+
+/*
+ * Maximum number of samples taken to regenerate entropy
+ * If value is 0, the number of samples is 2^24 else
+ * equals value times 2^8.
+ */
+#define TRNG_DEF_MAX_REFILL_CYCLES		0
+#define TRNG_CFG_REG_MAX_REFILL_CYCLES_SHIFT	16
+
+/* Number of CLK input cycles between samples */
+#define TRNG_DEF_CLK_DIV_CYCLES			0
+#define TRNG_CFG_REG_SAMPLE_DIV_SHIFT		8
+
+/* Maximum retries to get rng data */
+#define SA_MAX_RNG_DATA_RETRIES			5
+/* Delay between retries (in usecs) */
+#define SA_RNG_DATA_RETRY_DELAY			5
+
+struct trng_regs {
+	u32	output_l;
+	u32	output_h;
+	u32	status;
+	u32	intmask;
+	u32	intack;
+	u32	control;
+	u32	config;
+};
+
+struct ks_sa_rng {
+	struct device	*dev;
+	struct hwrng	rng;
+	struct clk	*clk;
+	struct regmap	*regmap_cfg;
+	struct trng_regs *reg_rng;
+};
+
+static int ks_sa_rng_init(struct hwrng *rng)
+{
+	u32 value;
+	struct device *dev = (struct device *)rng->priv;
+	struct ks_sa_rng *ks_sa_rng = dev_get_drvdata(dev);
+
+	/* Enable RNG module */
+	regmap_write_bits(ks_sa_rng->regmap_cfg, SA_CMD_STATUS_OFS,
+			  SA_CMD_STATUS_REG_TRNG_ENABLE,
+			  SA_CMD_STATUS_REG_TRNG_ENABLE);
+
+	/* Configure RNG module */
+	writel(0, &ks_sa_rng->reg_rng->control);
+	value = TRNG_DEF_STARTUP_CYCLES << TRNG_CNTL_REG_STARTUP_CYCLES_SHIFT;
+	writel(value, &ks_sa_rng->reg_rng->control);
+
+	value =	(TRNG_DEF_MIN_REFILL_CYCLES <<
+		 TRNG_CFG_REG_MIN_REFILL_CYCLES_SHIFT) |
+		(TRNG_DEF_MAX_REFILL_CYCLES <<
+		 TRNG_CFG_REG_MAX_REFILL_CYCLES_SHIFT) |
+		(TRNG_DEF_CLK_DIV_CYCLES <<
+		 TRNG_CFG_REG_SAMPLE_DIV_SHIFT);
+
+	writel(value, &ks_sa_rng->reg_rng->config);
+
+	/* Disable all interrupts from TRNG */
+	writel(0, &ks_sa_rng->reg_rng->intmask);
+
+	/* Enable RNG */
+	value = readl(&ks_sa_rng->reg_rng->control);
+	value |= TRNG_CNTL_REG_TRNG_ENABLE;
+	writel(value, &ks_sa_rng->reg_rng->control);
+
+	return 0;
+}
+
+static void ks_sa_rng_cleanup(struct hwrng *rng)
+{
+	struct device *dev = (struct device *)rng->priv;
+	struct ks_sa_rng *ks_sa_rng = dev_get_drvdata(dev);
+
+	/* Disable RNG */
+	writel(0, &ks_sa_rng->reg_rng->control);
+	regmap_write_bits(ks_sa_rng->regmap_cfg, SA_CMD_STATUS_OFS,
+			  SA_CMD_STATUS_REG_TRNG_ENABLE, 0);
+}
+
+static int ks_sa_rng_data_read(struct hwrng *rng, u32 *data)
+{
+	struct device *dev = (struct device *)rng->priv;
+	struct ks_sa_rng *ks_sa_rng = dev_get_drvdata(dev);
+
+	/* Read random data */
+	data[0] = readl(&ks_sa_rng->reg_rng->output_l);
+	data[1] = readl(&ks_sa_rng->reg_rng->output_h);
+
+	writel(TRNG_INTACK_REG_READY, &ks_sa_rng->reg_rng->intack);
+
+	return sizeof(u32) * 2;
+}
+
+static int ks_sa_rng_data_present(struct hwrng *rng, int wait)
+{
+	struct device *dev = (struct device *)rng->priv;
+	struct ks_sa_rng *ks_sa_rng = dev_get_drvdata(dev);
+
+	u32	ready;
+	int	j;
+
+	for (j = 0; j < SA_MAX_RNG_DATA_RETRIES; j++) {
+		ready = readl(&ks_sa_rng->reg_rng->status);
+		ready &= TRNG_STATUS_REG_READY;
+
+		if (ready || !wait)
+			break;
+
+		udelay(SA_RNG_DATA_RETRY_DELAY);
+	}
+
+	return ready;
+}
+
+static int ks_sa_rng_probe(struct platform_device *pdev)
+{
+	struct ks_sa_rng	*ks_sa_rng;
+	struct device		*dev = &pdev->dev;
+	int			ret;
+	struct resource		*mem;
+
+	ks_sa_rng = devm_kzalloc(dev, sizeof(*ks_sa_rng), GFP_KERNEL);
+	if (!ks_sa_rng)
+		return -ENOMEM;
+
+	ks_sa_rng->dev = dev;
+	ks_sa_rng->rng = (struct hwrng) {
+		.name = "ks_sa_hwrng",
+		.init = ks_sa_rng_init,
+		.data_read = ks_sa_rng_data_read,
+		.data_present = ks_sa_rng_data_present,
+		.cleanup = ks_sa_rng_cleanup,
+	};
+	ks_sa_rng->rng.priv = (unsigned long)dev;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ks_sa_rng->reg_rng = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(ks_sa_rng->reg_rng))
+		return PTR_ERR(ks_sa_rng->reg_rng);
+
+	ks_sa_rng->regmap_cfg =
+		syscon_regmap_lookup_by_phandle(dev->of_node,
+						"ti,syscon-sa-cfg");
+
+	if (IS_ERR(ks_sa_rng->regmap_cfg)) {
+		dev_err(dev, "syscon_node_to_regmap failed\n");
+		return -EINVAL;
+	}
+
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enable SA power-domain\n");
+		pm_runtime_disable(dev);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, ks_sa_rng);
+
+	return devm_hwrng_register(&pdev->dev, &ks_sa_rng->rng);
+}
+
+static int ks_sa_rng_remove(struct platform_device *pdev)
+{
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static const struct of_device_id ks_sa_rng_dt_match[] = {
+	{
+		.compatible = "ti,keystone-rng",
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ks_sa_rng_dt_match);
+
+static struct platform_driver ks_sa_rng_driver = {
+	.driver		= {
+		.name	= "ks-sa-rng",
+		.of_match_table = ks_sa_rng_dt_match,
+	},
+	.probe		= ks_sa_rng_probe,
+	.remove		= ks_sa_rng_remove,
+};
+
+module_platform_driver(ks_sa_rng_driver);
+
+MODULE_DESCRIPTION("Keystone NETCP SA H/W Random Number Generator driver");
+MODULE_AUTHOR("Vitaly Andrianov <vitalya@ti.com>");
+MODULE_LICENSE("GPL");

+ 14 - 9
drivers/char/hw_random/mxc-rnga.c

@@ -16,16 +16,13 @@
  * This driver is based on other RNG drivers.
  */
 
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
 #include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/ioport.h>
-#include <linux/platform_device.h>
-#include <linux/hw_random.h>
 #include <linux/delay.h>
+#include <linux/hw_random.h>
 #include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 /* RNGA Registers */
 #define RNGA_CONTROL			0x00
@@ -197,10 +194,18 @@ static int __exit mxc_rnga_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id mxc_rnga_of_match[] = {
+	{ .compatible = "fsl,imx21-rnga", },
+	{ .compatible = "fsl,imx31-rnga", },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, mxc_rnga_of_match);
+
 static struct platform_driver mxc_rnga_driver = {
 	.driver = {
-		   .name = "mxc_rnga",
-		   },
+		.name = "mxc_rnga",
+		.of_match_table = mxc_rnga_of_match,
+	},
 	.remove = __exit_p(mxc_rnga_remove),
 };
 

+ 18 - 4
drivers/char/hw_random/omap-rng.c

@@ -150,6 +150,7 @@ struct omap_rng_dev {
 	const struct omap_rng_pdata	*pdata;
 	struct hwrng rng;
 	struct clk 			*clk;
+	struct clk			*clk_reg;
 };
 
 static inline u32 omap_rng_read(struct omap_rng_dev *priv, u16 reg)
@@ -480,6 +481,19 @@ static int omap_rng_probe(struct platform_device *pdev)
 		}
 	}
 
+	priv->clk_reg = devm_clk_get(&pdev->dev, "reg");
+	if (IS_ERR(priv->clk_reg) && PTR_ERR(priv->clk_reg) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+	if (!IS_ERR(priv->clk_reg)) {
+		ret = clk_prepare_enable(priv->clk_reg);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"Unable to enable the register clk: %d\n",
+				ret);
+			goto err_register;
+		}
+	}
+
 	ret = (dev->of_node) ? of_get_omap_rng_device_details(priv, pdev) :
 				get_omap_rng_device_details(priv);
 	if (ret)
@@ -499,8 +513,8 @@ err_register:
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
-	if (!IS_ERR(priv->clk))
-		clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(priv->clk_reg);
+	clk_disable_unprepare(priv->clk);
 err_ioremap:
 	dev_err(dev, "initialization failed.\n");
 	return ret;
@@ -517,8 +531,8 @@ static int omap_rng_remove(struct platform_device *pdev)
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
-	if (!IS_ERR(priv->clk))
-		clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(priv->clk_reg);
 
 	return 0;
 }

+ 28 - 16
drivers/char/hw_random/stm32-rng.c

@@ -16,15 +16,18 @@
 #include <linux/delay.h>
 #include <linux/hw_random.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/pm_runtime.h>
+#include <linux/reset.h>
 #include <linux/slab.h>
 
 #define RNG_CR 0x00
 #define RNG_CR_RNGEN BIT(2)
+#define RNG_CR_CED BIT(5)
 
 #define RNG_SR 0x04
 #define RNG_SR_SEIS BIT(6)
@@ -33,19 +36,12 @@
 
 #define RNG_DR 0x08
 
-/*
- * It takes 40 cycles @ 48MHz to generate each random number (e.g. <1us).
- * At the time of writing STM32 parts max out at ~200MHz meaning a timeout
- * of 500 leaves us a very comfortable margin for error. The loop to which
- * the timeout applies takes at least 4 instructions per iteration so the
- * timeout is enough to take us up to multi-GHz parts!
- */
-#define RNG_TIMEOUT 500
-
 struct stm32_rng_private {
 	struct hwrng rng;
 	void __iomem *base;
 	struct clk *clk;
+	struct reset_control *rst;
+	bool ced;
 };
 
 static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
@@ -59,13 +55,16 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 
 	while (max > sizeof(u32)) {
 		sr = readl_relaxed(priv->base + RNG_SR);
+		/* Manage timeout which is based on timer and take */
+		/* care of initial delay time when enabling rng	*/
 		if (!sr && wait) {
-			unsigned int timeout = RNG_TIMEOUT;
-
-			do {
-				cpu_relax();
-				sr = readl_relaxed(priv->base + RNG_SR);
-			} while (!sr && --timeout);
+			retval = readl_relaxed_poll_timeout_atomic(priv->base
+								   + RNG_SR,
+								   sr, sr,
+								   10, 50000);
+			if (retval)
+				dev_err((struct device *)priv->rng.priv,
+					"%s: timeout %x!\n", __func__, sr);
 		}
 
 		/* If error detected or data not ready... */
@@ -99,7 +98,11 @@ static int stm32_rng_init(struct hwrng *rng)
 	if (err)
 		return err;
 
-	writel_relaxed(RNG_CR_RNGEN, priv->base + RNG_CR);
+	if (priv->ced)
+		writel_relaxed(RNG_CR_RNGEN, priv->base + RNG_CR);
+	else
+		writel_relaxed(RNG_CR_RNGEN | RNG_CR_CED,
+			       priv->base + RNG_CR);
 
 	/* clear error indicators */
 	writel_relaxed(0, priv->base + RNG_SR);
@@ -140,6 +143,15 @@ static int stm32_rng_probe(struct platform_device *ofdev)
 	if (IS_ERR(priv->clk))
 		return PTR_ERR(priv->clk);
 
+	priv->rst = devm_reset_control_get(&ofdev->dev, NULL);
+	if (!IS_ERR(priv->rst)) {
+		reset_control_assert(priv->rst);
+		udelay(2);
+		reset_control_deassert(priv->rst);
+	}
+
+	priv->ced = of_property_read_bool(np, "clock-error-detect");
+
 	dev_set_drvdata(dev, priv);
 
 	priv->rng.name = dev_driver_string(dev),

+ 27 - 7
drivers/crypto/Kconfig

@@ -464,13 +464,6 @@ if CRYPTO_DEV_UX500
 	source "drivers/crypto/ux500/Kconfig"
 endif # if CRYPTO_DEV_UX500
 
-config CRYPTO_DEV_BFIN_CRC
-	tristate "Support for Blackfin CRC hardware"
-	depends on BF60x
-	help
-	  Newer Blackfin processors have CRC hardware. Select this if you
-	  want to use the Blackfin CRC module.
-
 config CRYPTO_DEV_ATMEL_AUTHENC
 	tristate "Support for Atmel IPSEC/SSL hw accelerator"
 	depends on HAS_DMA
@@ -730,4 +723,31 @@ config CRYPTO_DEV_ARTPEC6
 
 	  To compile this driver as a module, choose M here.
 
+config CRYPTO_DEV_CCREE
+	tristate "Support for ARM TrustZone CryptoCell family of security processors"
+	depends on CRYPTO && CRYPTO_HW && OF && HAS_DMA
+	default n
+	select CRYPTO_HASH
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_DES
+	select CRYPTO_AEAD
+	select CRYPTO_AUTHENC
+	select CRYPTO_SHA1
+	select CRYPTO_MD5
+	select CRYPTO_SHA256
+	select CRYPTO_SHA512
+	select CRYPTO_HMAC
+	select CRYPTO_AES
+	select CRYPTO_CBC
+	select CRYPTO_ECB
+	select CRYPTO_CTR
+	select CRYPTO_XTS
+	help
+	  Say 'Y' to enable a driver for the REE interface of the Arm
+	  TrustZone CryptoCell family of processors. Currently the
+	  CryptoCell 712, 710 and 630 are supported.
+	  Choose this if you wish to use hardware acceleration of
+	  cryptographic operations on the system REE.
+	  If unsure say Y.
+
 endif # CRYPTO_HW

+ 1 - 1
drivers/crypto/Makefile

@@ -3,9 +3,9 @@ obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_ECC) += atmel-ecc.o
-obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o
 obj-$(CONFIG_CRYPTO_DEV_CAVIUM_ZIP) += cavium/
 obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
+obj-$(CONFIG_CRYPTO_DEV_CCREE) += ccree/
 obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
 obj-$(CONFIG_CRYPTO_DEV_CPT) += cavium/cpt/
 obj-$(CONFIG_CRYPTO_DEV_NITROX) += cavium/nitrox/

+ 2 - 6
drivers/crypto/atmel-aes.c

@@ -2155,7 +2155,7 @@ static int atmel_aes_authenc_setkey(struct crypto_aead *tfm, const u8 *key,
 
 badkey:
 	crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	memzero_explicit(&key, sizeof(keys));
+	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
 
@@ -2602,16 +2602,13 @@ static struct crypto_platform_data *atmel_aes_of_init(struct platform_device *pd
 	}
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata) {
-		dev_err(&pdev->dev, "could not allocate memory for pdata\n");
+	if (!pdata)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	pdata->dma_slave = devm_kzalloc(&pdev->dev,
 					sizeof(*(pdata->dma_slave)),
 					GFP_KERNEL);
 	if (!pdata->dma_slave) {
-		dev_err(&pdev->dev, "could not allocate memory for dma_slave\n");
 		devm_kfree(&pdev->dev, pdata);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -2649,7 +2646,6 @@ static int atmel_aes_probe(struct platform_device *pdev)
 
 	aes_dd = devm_kzalloc(&pdev->dev, sizeof(*aes_dd), GFP_KERNEL);
 	if (aes_dd == NULL) {
-		dev_err(dev, "unable to alloc data struct.\n");
 		err = -ENOMEM;
 		goto aes_dd_err;
 	}

+ 2 - 7
drivers/crypto/atmel-sha.c

@@ -2726,18 +2726,14 @@ static struct crypto_platform_data *atmel_sha_of_init(struct platform_device *pd
 	}
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata) {
-		dev_err(&pdev->dev, "could not allocate memory for pdata\n");
+	if (!pdata)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	pdata->dma_slave = devm_kzalloc(&pdev->dev,
 					sizeof(*(pdata->dma_slave)),
 					GFP_KERNEL);
-	if (!pdata->dma_slave) {
-		dev_err(&pdev->dev, "could not allocate memory for dma_slave\n");
+	if (!pdata->dma_slave)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	return pdata;
 }
@@ -2758,7 +2754,6 @@ static int atmel_sha_probe(struct platform_device *pdev)
 
 	sha_dd = devm_kzalloc(&pdev->dev, sizeof(*sha_dd), GFP_KERNEL);
 	if (sha_dd == NULL) {
-		dev_err(dev, "unable to alloc data struct.\n");
 		err = -ENOMEM;
 		goto sha_dd_err;
 	}

+ 2 - 7
drivers/crypto/atmel-tdes.c

@@ -1312,18 +1312,14 @@ static struct crypto_platform_data *atmel_tdes_of_init(struct platform_device *p
 	}
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata) {
-		dev_err(&pdev->dev, "could not allocate memory for pdata\n");
+	if (!pdata)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	pdata->dma_slave = devm_kzalloc(&pdev->dev,
 					sizeof(*(pdata->dma_slave)),
 					GFP_KERNEL);
-	if (!pdata->dma_slave) {
-		dev_err(&pdev->dev, "could not allocate memory for dma_slave\n");
+	if (!pdata->dma_slave)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	return pdata;
 }
@@ -1344,7 +1340,6 @@ static int atmel_tdes_probe(struct platform_device *pdev)
 
 	tdes_dd = devm_kmalloc(&pdev->dev, sizeof(*tdes_dd), GFP_KERNEL);
 	if (tdes_dd == NULL) {
-		dev_err(dev, "unable to alloc data struct.\n");
 		err = -ENOMEM;
 		goto tdes_dd_err;
 	}

+ 2 - 2
drivers/crypto/bcm/cipher.c

@@ -818,7 +818,7 @@ static int handle_ahash_req(struct iproc_reqctx_s *rctx)
 
 	/* AES hashing keeps key size in type field, so need to copy it here */
 	if (hash_parms.alg == HASH_ALG_AES)
-		hash_parms.type = cipher_parms.type;
+		hash_parms.type = (enum hash_type)cipher_parms.type;
 	else
 		hash_parms.type = spu->spu_hash_type(rctx->total_sent);
 
@@ -1409,7 +1409,7 @@ static int handle_aead_req(struct iproc_reqctx_s *rctx)
 						rctx->iv_ctr_len);
 
 	if (ctx->auth.alg == HASH_ALG_AES)
-		hash_parms.type = ctx->cipher_type;
+		hash_parms.type = (enum hash_type)ctx->cipher_type;
 
 	/* General case AAD padding (CCM and RFC4543 special cases below) */
 	aead_parms.aad_pad_len = spu->spu_gcm_ccm_pad_len(ctx->cipher.mode,

+ 0 - 1
drivers/crypto/bcm/util.c

@@ -279,7 +279,6 @@ int do_shash(unsigned char *name, unsigned char *result,
 	sdesc = kmalloc(size, GFP_KERNEL);
 	if (!sdesc) {
 		rc = -ENOMEM;
-		pr_err("%s: Memory allocation failure\n", __func__);
 		goto do_shash_err;
 	}
 	sdesc->shash.tfm = hash;

+ 0 - 743
drivers/crypto/bfin_crc.c

@@ -1,743 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Support Blackfin CRC HW acceleration.
- *
- * Copyright 2012 Analog Devices Inc.
- *
- * Licensed under the GPL-2.
- */
-
-#include <linux/err.h>
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
-#include <linux/delay.h>
-#include <linux/crypto.h>
-#include <linux/cryptohash.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/algapi.h>
-#include <crypto/hash.h>
-#include <crypto/internal/hash.h>
-#include <asm/unaligned.h>
-
-#include <asm/dma.h>
-#include <asm/portmux.h>
-#include <asm/io.h>
-
-#include "bfin_crc.h"
-
-#define CRC_CCRYPTO_QUEUE_LENGTH	5
-
-#define DRIVER_NAME "bfin-hmac-crc"
-#define CHKSUM_DIGEST_SIZE      4
-#define CHKSUM_BLOCK_SIZE       1
-
-#define CRC_MAX_DMA_DESC	100
-
-#define CRC_CRYPTO_STATE_UPDATE		1
-#define CRC_CRYPTO_STATE_FINALUPDATE	2
-#define CRC_CRYPTO_STATE_FINISH		3
-
-struct bfin_crypto_crc {
-	struct list_head	list;
-	struct device		*dev;
-	spinlock_t		lock;
-
-	int			irq;
-	int			dma_ch;
-	u32			poly;
-	struct crc_register	*regs;
-
-	struct ahash_request	*req; /* current request in operation */
-	struct dma_desc_array	*sg_cpu; /* virt addr of sg dma descriptors */
-	dma_addr_t		sg_dma; /* phy addr of sg dma descriptors */
-	u8			*sg_mid_buf;
-	dma_addr_t		sg_mid_dma; /* phy addr of sg mid buffer */
-
-	struct tasklet_struct	done_task;
-	struct crypto_queue	queue; /* waiting requests */
-
-	u8			busy:1; /* crc device in operation flag */
-};
-
-static struct bfin_crypto_crc_list {
-	struct list_head	dev_list;
-	spinlock_t		lock;
-} crc_list;
-
-struct bfin_crypto_crc_reqctx {
-	struct bfin_crypto_crc	*crc;
-
-	unsigned int		total;	/* total request bytes */
-	size_t			sg_buflen; /* bytes for this update */
-	unsigned int		sg_nents;
-	struct scatterlist	*sg; /* sg list head for this update*/
-	struct scatterlist	bufsl[2]; /* chained sg list */
-
-	size_t			bufnext_len;
-	size_t			buflast_len;
-	u8			bufnext[CHKSUM_DIGEST_SIZE]; /* extra bytes for next udpate */
-	u8			buflast[CHKSUM_DIGEST_SIZE]; /* extra bytes from last udpate */
-
-	u8			flag;
-};
-
-struct bfin_crypto_crc_ctx {
-	struct bfin_crypto_crc	*crc;
-	u32			key;
-};
-
-/*
- * get element in scatter list by given index
- */
-static struct scatterlist *sg_get(struct scatterlist *sg_list, unsigned int nents,
-				unsigned int index)
-{
-	struct scatterlist *sg = NULL;
-	int i;
-
-	for_each_sg(sg_list, sg, nents, i)
-		if (i == index)
-			break;
-
-	return sg;
-}
-
-static int bfin_crypto_crc_init_hw(struct bfin_crypto_crc *crc, u32 key)
-{
-	writel(0, &crc->regs->datacntrld);
-	writel(MODE_CALC_CRC << OPMODE_OFFSET, &crc->regs->control);
-	writel(key, &crc->regs->curresult);
-
-	/* setup CRC interrupts */
-	writel(CMPERRI | DCNTEXPI, &crc->regs->status);
-	writel(CMPERRI | DCNTEXPI, &crc->regs->intrenset);
-
-	return 0;
-}
-
-static int bfin_crypto_crc_init(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
-	struct bfin_crypto_crc *crc;
-
-	dev_dbg(ctx->crc->dev, "crc_init\n");
-	spin_lock_bh(&crc_list.lock);
-	list_for_each_entry(crc, &crc_list.dev_list, list) {
-		crc_ctx->crc = crc;
-		break;
-	}
-	spin_unlock_bh(&crc_list.lock);
-
-	if (sg_nents(req->src) > CRC_MAX_DMA_DESC) {
-		dev_dbg(ctx->crc->dev, "init: requested sg list is too big > %d\n",
-			CRC_MAX_DMA_DESC);
-		return -EINVAL;
-	}
-
-	ctx->crc = crc;
-	ctx->bufnext_len = 0;
-	ctx->buflast_len = 0;
-	ctx->sg_buflen = 0;
-	ctx->total = 0;
-	ctx->flag = 0;
-
-	/* init crc results */
-	put_unaligned_le32(crc_ctx->key, req->result);
-
-	dev_dbg(ctx->crc->dev, "init: digest size: %d\n",
-		crypto_ahash_digestsize(tfm));
-
-	return bfin_crypto_crc_init_hw(crc, crc_ctx->key);
-}
-
-static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc)
-{
-	struct scatterlist *sg;
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(crc->req);
-	int i = 0, j = 0;
-	unsigned long dma_config;
-	unsigned int dma_count;
-	unsigned int dma_addr;
-	unsigned int mid_dma_count = 0;
-	int dma_mod;
-
-	dma_map_sg(crc->dev, ctx->sg, ctx->sg_nents, DMA_TO_DEVICE);
-
-	for_each_sg(ctx->sg, sg, ctx->sg_nents, j) {
-		dma_addr = sg_dma_address(sg);
-		/* deduce extra bytes in last sg */
-		if (sg_is_last(sg))
-			dma_count = sg_dma_len(sg) - ctx->bufnext_len;
-		else
-			dma_count = sg_dma_len(sg);
-
-		if (mid_dma_count) {
-			/* Append last middle dma buffer to 4 bytes with first
-			   bytes in current sg buffer. Move addr of current
-			   sg and deduce the length of current sg.
-			 */
-			memcpy(crc->sg_mid_buf +(i << 2) + mid_dma_count,
-				sg_virt(sg),
-				CHKSUM_DIGEST_SIZE - mid_dma_count);
-			dma_addr += CHKSUM_DIGEST_SIZE - mid_dma_count;
-			dma_count -= CHKSUM_DIGEST_SIZE - mid_dma_count;
-
-			dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 |
-				DMAEN | PSIZE_32 | WDSIZE_32;
-
-			/* setup new dma descriptor for next middle dma */
-			crc->sg_cpu[i].start_addr = crc->sg_mid_dma + (i << 2);
-			crc->sg_cpu[i].cfg = dma_config;
-			crc->sg_cpu[i].x_count = 1;
-			crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE;
-			dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
-				"cfg:0x%x, x_count:0x%x, x_modify:0x%x\n",
-				i, crc->sg_cpu[i].start_addr,
-				crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
-				crc->sg_cpu[i].x_modify);
-			i++;
-		}
-
-		dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 | DMAEN | PSIZE_32;
-		/* chop current sg dma len to multiple of 32 bits */
-		mid_dma_count = dma_count % 4;
-		dma_count &= ~0x3;
-
-		if (dma_addr % 4 == 0) {
-			dma_config |= WDSIZE_32;
-			dma_count >>= 2;
-			dma_mod = 4;
-		} else if (dma_addr % 2 == 0) {
-			dma_config |= WDSIZE_16;
-			dma_count >>= 1;
-			dma_mod = 2;
-		} else {
-			dma_config |= WDSIZE_8;
-			dma_mod = 1;
-		}
-
-		crc->sg_cpu[i].start_addr = dma_addr;
-		crc->sg_cpu[i].cfg = dma_config;
-		crc->sg_cpu[i].x_count = dma_count;
-		crc->sg_cpu[i].x_modify = dma_mod;
-		dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
-			"cfg:0x%x, x_count:0x%x, x_modify:0x%x\n",
-			i, crc->sg_cpu[i].start_addr,
-			crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
-			crc->sg_cpu[i].x_modify);
-		i++;
-
-		if (mid_dma_count) {
-			/* copy extra bytes to next middle dma buffer */
-			memcpy(crc->sg_mid_buf + (i << 2),
-				(u8*)sg_virt(sg) + (dma_count << 2),
-				mid_dma_count);
-		}
-	}
-
-	dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 | DMAEN | PSIZE_32 | WDSIZE_32;
-	/* For final update req, append the buffer for next update as well*/
-	if (ctx->bufnext_len && (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE ||
-		ctx->flag == CRC_CRYPTO_STATE_FINISH)) {
-		crc->sg_cpu[i].start_addr = dma_map_single(crc->dev, ctx->bufnext,
-						CHKSUM_DIGEST_SIZE, DMA_TO_DEVICE);
-		crc->sg_cpu[i].cfg = dma_config;
-		crc->sg_cpu[i].x_count = 1;
-		crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE;
-		dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
-			"cfg:0x%x, x_count:0x%x, x_modify:0x%x\n",
-			i, crc->sg_cpu[i].start_addr,
-			crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
-			crc->sg_cpu[i].x_modify);
-		i++;
-	}
-
-	if (i == 0)
-		return;
-
-	/* Set the last descriptor to stop mode */
-	crc->sg_cpu[i - 1].cfg &= ~(DMAFLOW | NDSIZE);
-	crc->sg_cpu[i - 1].cfg |= DI_EN;
-	set_dma_curr_desc_addr(crc->dma_ch, (unsigned long *)crc->sg_dma);
-	set_dma_x_count(crc->dma_ch, 0);
-	set_dma_x_modify(crc->dma_ch, 0);
-	set_dma_config(crc->dma_ch, dma_config);
-}
-
-static int bfin_crypto_crc_handle_queue(struct bfin_crypto_crc *crc,
-				  struct ahash_request *req)
-{
-	struct crypto_async_request *async_req, *backlog;
-	struct bfin_crypto_crc_reqctx *ctx;
-	struct scatterlist *sg;
-	int ret = 0;
-	int nsg, i, j;
-	unsigned int nextlen;
-	unsigned long flags;
-	u32 reg;
-
-	spin_lock_irqsave(&crc->lock, flags);
-	if (req)
-		ret = ahash_enqueue_request(&crc->queue, req);
-	if (crc->busy) {
-		spin_unlock_irqrestore(&crc->lock, flags);
-		return ret;
-	}
-	backlog = crypto_get_backlog(&crc->queue);
-	async_req = crypto_dequeue_request(&crc->queue);
-	if (async_req)
-		crc->busy = 1;
-	spin_unlock_irqrestore(&crc->lock, flags);
-
-	if (!async_req)
-		return ret;
-
-	if (backlog)
-		backlog->complete(backlog, -EINPROGRESS);
-
-	req = ahash_request_cast(async_req);
-	crc->req = req;
-	ctx = ahash_request_ctx(req);
-	ctx->sg = NULL;
-	ctx->sg_buflen = 0;
-	ctx->sg_nents = 0;
-
-	dev_dbg(crc->dev, "handling new req, flag=%u, nbytes: %d\n",
-						ctx->flag, req->nbytes);
-
-	if (ctx->flag == CRC_CRYPTO_STATE_FINISH) {
-		if (ctx->bufnext_len == 0) {
-			crc->busy = 0;
-			return 0;
-		}
-
-		/* Pack last crc update buffer to 32bit */
-		memset(ctx->bufnext + ctx->bufnext_len, 0,
-				CHKSUM_DIGEST_SIZE - ctx->bufnext_len);
-	} else {
-		/* Pack small data which is less than 32bit to buffer for next update. */
-		if (ctx->bufnext_len + req->nbytes < CHKSUM_DIGEST_SIZE) {
-			memcpy(ctx->bufnext + ctx->bufnext_len,
-				sg_virt(req->src), req->nbytes);
-			ctx->bufnext_len += req->nbytes;
-			if (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE &&
-				ctx->bufnext_len) {
-				goto finish_update;
-			} else {
-				crc->busy = 0;
-				return 0;
-			}
-		}
-
-		if (ctx->bufnext_len) {
-			/* Chain in extra bytes of last update */
-			ctx->buflast_len = ctx->bufnext_len;
-			memcpy(ctx->buflast, ctx->bufnext, ctx->buflast_len);
-
-			nsg = ctx->sg_buflen ? 2 : 1;
-			sg_init_table(ctx->bufsl, nsg);
-			sg_set_buf(ctx->bufsl, ctx->buflast, ctx->buflast_len);
-			if (nsg > 1)
-				sg_chain(ctx->bufsl, nsg, req->src);
-			ctx->sg = ctx->bufsl;
-		} else
-			ctx->sg = req->src;
-
-		/* Chop crc buffer size to multiple of 32 bit */
-		nsg = sg_nents(ctx->sg);
-		ctx->sg_nents = nsg;
-		ctx->sg_buflen = ctx->buflast_len + req->nbytes;
-		ctx->bufnext_len = ctx->sg_buflen % 4;
-		ctx->sg_buflen &= ~0x3;
-
-		if (ctx->bufnext_len) {
-			/* copy extra bytes to buffer for next update */
-			memset(ctx->bufnext, 0, CHKSUM_DIGEST_SIZE);
-			nextlen = ctx->bufnext_len;
-			for (i = nsg - 1; i >= 0; i--) {
-				sg = sg_get(ctx->sg, nsg, i);
-				j = min(nextlen, sg_dma_len(sg));
-				memcpy(ctx->bufnext + nextlen - j,
-					sg_virt(sg) + sg_dma_len(sg) - j, j);
-				if (j == sg_dma_len(sg))
-					ctx->sg_nents--;
-				nextlen -= j;
-				if (nextlen == 0)
-					break;
-			}
-		}
-	}
-
-finish_update:
-	if (ctx->bufnext_len && (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE ||
-		ctx->flag == CRC_CRYPTO_STATE_FINISH))
-		ctx->sg_buflen += CHKSUM_DIGEST_SIZE;
-
-	/* set CRC data count before start DMA */
-	writel(ctx->sg_buflen >> 2, &crc->regs->datacnt);
-
-	/* setup and enable CRC DMA */
-	bfin_crypto_crc_config_dma(crc);
-
-	/* finally kick off CRC operation */
-	reg = readl(&crc->regs->control);
-	writel(reg | BLKEN, &crc->regs->control);
-
-	return -EINPROGRESS;
-}
-
-static int bfin_crypto_crc_update(struct ahash_request *req)
-{
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
-
-	if (!req->nbytes)
-		return 0;
-
-	dev_dbg(ctx->crc->dev, "crc_update\n");
-	ctx->total += req->nbytes;
-	ctx->flag = CRC_CRYPTO_STATE_UPDATE;
-
-	return bfin_crypto_crc_handle_queue(ctx->crc, req);
-}
-
-static int bfin_crypto_crc_final(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
-
-	dev_dbg(ctx->crc->dev, "crc_final\n");
-	ctx->flag = CRC_CRYPTO_STATE_FINISH;
-	crc_ctx->key = 0;
-
-	return bfin_crypto_crc_handle_queue(ctx->crc, req);
-}
-
-static int bfin_crypto_crc_finup(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
-
-	dev_dbg(ctx->crc->dev, "crc_finishupdate\n");
-	ctx->total += req->nbytes;
-	ctx->flag = CRC_CRYPTO_STATE_FINALUPDATE;
-	crc_ctx->key = 0;
-
-	return bfin_crypto_crc_handle_queue(ctx->crc, req);
-}
-
-static int bfin_crypto_crc_digest(struct ahash_request *req)
-{
-	int ret;
-
-	ret = bfin_crypto_crc_init(req);
-	if (ret)
-		return ret;
-
-	return bfin_crypto_crc_finup(req);
-}
-
-static int bfin_crypto_crc_setkey(struct crypto_ahash *tfm, const u8 *key,
-			unsigned int keylen)
-{
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
-
-	dev_dbg(crc_ctx->crc->dev, "crc_setkey\n");
-	if (keylen != CHKSUM_DIGEST_SIZE) {
-		crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	crc_ctx->key = get_unaligned_le32(key);
-
-	return 0;
-}
-
-static int bfin_crypto_crc_cra_init(struct crypto_tfm *tfm)
-{
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_tfm_ctx(tfm);
-
-	crc_ctx->key = 0;
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				 sizeof(struct bfin_crypto_crc_reqctx));
-
-	return 0;
-}
-
-static void bfin_crypto_crc_cra_exit(struct crypto_tfm *tfm)
-{
-}
-
-static struct ahash_alg algs = {
-	.init		= bfin_crypto_crc_init,
-	.update		= bfin_crypto_crc_update,
-	.final		= bfin_crypto_crc_final,
-	.finup		= bfin_crypto_crc_finup,
-	.digest		= bfin_crypto_crc_digest,
-	.setkey		= bfin_crypto_crc_setkey,
-	.halg.digestsize	= CHKSUM_DIGEST_SIZE,
-	.halg.base	= {
-		.cra_name		= "hmac(crc32)",
-		.cra_driver_name	= DRIVER_NAME,
-		.cra_priority		= 100,
-		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
-						CRYPTO_ALG_ASYNC |
-						CRYPTO_ALG_OPTIONAL_KEY,
-		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct bfin_crypto_crc_ctx),
-		.cra_alignmask		= 3,
-		.cra_module		= THIS_MODULE,
-		.cra_init		= bfin_crypto_crc_cra_init,
-		.cra_exit		= bfin_crypto_crc_cra_exit,
-	}
-};
-
-static void bfin_crypto_crc_done_task(unsigned long data)
-{
-	struct bfin_crypto_crc *crc = (struct bfin_crypto_crc *)data;
-
-	bfin_crypto_crc_handle_queue(crc, NULL);
-}
-
-static irqreturn_t bfin_crypto_crc_handler(int irq, void *dev_id)
-{
-	struct bfin_crypto_crc *crc = dev_id;
-	u32 reg;
-
-	if (readl(&crc->regs->status) & DCNTEXP) {
-		writel(DCNTEXP, &crc->regs->status);
-
-		/* prepare results */
-		put_unaligned_le32(readl(&crc->regs->result),
-			crc->req->result);
-
-		reg = readl(&crc->regs->control);
-		writel(reg & ~BLKEN, &crc->regs->control);
-		crc->busy = 0;
-
-		if (crc->req->base.complete)
-			crc->req->base.complete(&crc->req->base, 0);
-
-		tasklet_schedule(&crc->done_task);
-
-		return IRQ_HANDLED;
-	} else
-		return IRQ_NONE;
-}
-
-#ifdef CONFIG_PM
-/**
- *	bfin_crypto_crc_suspend - suspend crc device
- *	@pdev: device being suspended
- *	@state: requested suspend state
- */
-static int bfin_crypto_crc_suspend(struct platform_device *pdev, pm_message_t state)
-{
-	struct bfin_crypto_crc *crc = platform_get_drvdata(pdev);
-	int i = 100000;
-
-	while ((readl(&crc->regs->control) & BLKEN) && --i)
-		cpu_relax();
-
-	if (i == 0)
-		return -EBUSY;
-
-	return 0;
-}
-#else
-# define bfin_crypto_crc_suspend NULL
-#endif
-
-#define bfin_crypto_crc_resume NULL
-
-/**
- *	bfin_crypto_crc_probe - Initialize module
- *
- */
-static int bfin_crypto_crc_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct resource *res;
-	struct bfin_crypto_crc *crc;
-	unsigned int timeout = 100000;
-	int ret;
-
-	crc = devm_kzalloc(dev, sizeof(*crc), GFP_KERNEL);
-	if (!crc) {
-		dev_err(&pdev->dev, "fail to malloc bfin_crypto_crc\n");
-		return -ENOMEM;
-	}
-
-	crc->dev = dev;
-
-	INIT_LIST_HEAD(&crc->list);
-	spin_lock_init(&crc->lock);
-	tasklet_init(&crc->done_task, bfin_crypto_crc_done_task, (unsigned long)crc);
-	crypto_init_queue(&crc->queue, CRC_CCRYPTO_QUEUE_LENGTH);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	crc->regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR((void *)crc->regs)) {
-		dev_err(&pdev->dev, "Cannot map CRC IO\n");
-		return PTR_ERR((void *)crc->regs);
-	}
-
-	crc->irq = platform_get_irq(pdev, 0);
-	if (crc->irq < 0) {
-		dev_err(&pdev->dev, "No CRC DCNTEXP IRQ specified\n");
-		return -ENOENT;
-	}
-
-	ret = devm_request_irq(dev, crc->irq, bfin_crypto_crc_handler,
-			IRQF_SHARED, dev_name(dev), crc);
-	if (ret) {
-		dev_err(&pdev->dev, "Unable to request blackfin crc irq\n");
-		return ret;
-	}
-
-	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-	if (res == NULL) {
-		dev_err(&pdev->dev, "No CRC DMA channel specified\n");
-		return -ENOENT;
-	}
-	crc->dma_ch = res->start;
-
-	ret = request_dma(crc->dma_ch, dev_name(dev));
-	if (ret) {
-		dev_err(&pdev->dev, "Unable to attach Blackfin CRC DMA channel\n");
-		return ret;
-	}
-
-	crc->sg_cpu = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, &crc->sg_dma, GFP_KERNEL);
-	if (crc->sg_cpu == NULL) {
-		ret = -ENOMEM;
-		goto out_error_dma;
-	}
-	/*
-	 * need at most CRC_MAX_DMA_DESC sg + CRC_MAX_DMA_DESC middle  +
-	 * 1 last + 1 next dma descriptors
-	 */
-	crc->sg_mid_buf = (u8 *)(crc->sg_cpu + ((CRC_MAX_DMA_DESC + 1) << 1));
-	crc->sg_mid_dma = crc->sg_dma + sizeof(struct dma_desc_array)
-			* ((CRC_MAX_DMA_DESC + 1) << 1);
-
-	writel(0, &crc->regs->control);
-	crc->poly = (u32)pdev->dev.platform_data;
-	writel(crc->poly, &crc->regs->poly);
-
-	while (!(readl(&crc->regs->status) & LUTDONE) && (--timeout) > 0)
-		cpu_relax();
-
-	if (timeout == 0)
-		dev_info(&pdev->dev, "init crc poly timeout\n");
-
-	platform_set_drvdata(pdev, crc);
-
-	spin_lock(&crc_list.lock);
-	list_add(&crc->list, &crc_list.dev_list);
-	spin_unlock(&crc_list.lock);
-
-	if (list_is_singular(&crc_list.dev_list)) {
-		ret = crypto_register_ahash(&algs);
-		if (ret) {
-			dev_err(&pdev->dev,
-				"Can't register crypto ahash device\n");
-			goto out_error_dma;
-		}
-	}
-
-	dev_info(&pdev->dev, "initialized\n");
-
-	return 0;
-
-out_error_dma:
-	if (crc->sg_cpu)
-		dma_free_coherent(&pdev->dev, PAGE_SIZE, crc->sg_cpu, crc->sg_dma);
-	free_dma(crc->dma_ch);
-
-	return ret;
-}
-
-/**
- *	bfin_crypto_crc_remove - Initialize module
- *
- */
-static int bfin_crypto_crc_remove(struct platform_device *pdev)
-{
-	struct bfin_crypto_crc *crc = platform_get_drvdata(pdev);
-
-	if (!crc)
-		return -ENODEV;
-
-	spin_lock(&crc_list.lock);
-	list_del(&crc->list);
-	spin_unlock(&crc_list.lock);
-
-	crypto_unregister_ahash(&algs);
-	tasklet_kill(&crc->done_task);
-	free_dma(crc->dma_ch);
-
-	return 0;
-}
-
-static struct platform_driver bfin_crypto_crc_driver = {
-	.probe     = bfin_crypto_crc_probe,
-	.remove    = bfin_crypto_crc_remove,
-	.suspend   = bfin_crypto_crc_suspend,
-	.resume    = bfin_crypto_crc_resume,
-	.driver    = {
-		.name  = DRIVER_NAME,
-	},
-};
-
-/**
- *	bfin_crypto_crc_mod_init - Initialize module
- *
- *	Checks the module params and registers the platform driver.
- *	Real work is in the platform probe function.
- */
-static int __init bfin_crypto_crc_mod_init(void)
-{
-	int ret;
-
-	pr_info("Blackfin hardware CRC crypto driver\n");
-
-	INIT_LIST_HEAD(&crc_list.dev_list);
-	spin_lock_init(&crc_list.lock);
-
-	ret = platform_driver_register(&bfin_crypto_crc_driver);
-	if (ret) {
-		pr_err("unable to register driver\n");
-		return ret;
-	}
-
-	return 0;
-}
-
-/**
- *	bfin_crypto_crc_mod_exit - Deinitialize module
- */
-static void __exit bfin_crypto_crc_mod_exit(void)
-{
-	platform_driver_unregister(&bfin_crypto_crc_driver);
-}
-
-module_init(bfin_crypto_crc_mod_init);
-module_exit(bfin_crypto_crc_mod_exit);
-
-MODULE_AUTHOR("Sonic Zhang <sonic.zhang@analog.com>");
-MODULE_DESCRIPTION("Blackfin CRC hardware crypto driver");
-MODULE_LICENSE("GPL");

+ 0 - 124
drivers/crypto/bfin_crc.h

@@ -1,124 +0,0 @@
-/*
- * bfin_crc.h - interface to Blackfin CRC controllers
- *
- * Copyright 2012 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#ifndef __BFIN_CRC_H__
-#define __BFIN_CRC_H__
-
-/* Function driver which use hardware crc must initialize the structure */
-struct crc_info {
-	/* Input data address */
-	unsigned char *in_addr;
-	/* Output data address */
-	unsigned char *out_addr;
-	/* Input or output bytes */
-	unsigned long datasize;
-	union {
-	/* CRC to compare with that of input buffer */
-	unsigned long crc_compare;
-	/* Value to compare with input data */
-	unsigned long val_verify;
-	/* Value to fill */
-	unsigned long val_fill;
-	};
-	/* Value to program the 32b CRC Polynomial */
-	unsigned long crc_poly;
-	union {
-	/* CRC calculated from the input data */
-	unsigned long crc_result;
-	/* First failed position to verify input data */
-	unsigned long pos_verify;
-	};
-	/* CRC mirror flags */
-	unsigned int bitmirr:1;
-	unsigned int bytmirr:1;
-	unsigned int w16swp:1;
-	unsigned int fdsel:1;
-	unsigned int rsltmirr:1;
-	unsigned int polymirr:1;
-	unsigned int cmpmirr:1;
-};
-
-/* Userspace interface */
-#define CRC_IOC_MAGIC		'C'
-#define CRC_IOC_CALC_CRC	_IOWR('C', 0x01, unsigned int)
-#define CRC_IOC_MEMCPY_CRC	_IOWR('C', 0x02, unsigned int)
-#define CRC_IOC_VERIFY_VAL	_IOWR('C', 0x03, unsigned int)
-#define CRC_IOC_FILL_VAL	_IOWR('C', 0x04, unsigned int)
-
-
-#ifdef __KERNEL__
-
-#include <linux/types.h>
-#include <linux/spinlock.h>
-
-struct crc_register {
-	u32 control;
-	u32 datacnt;
-	u32 datacntrld;
-	u32 __pad_1[2];
-	u32 compare;
-	u32 fillval;
-	u32 datafifo;
-	u32 intren;
-	u32 intrenset;
-	u32 intrenclr;
-	u32 poly;
-	u32 __pad_2[4];
-	u32 status;
-	u32 datacntcap;
-	u32 __pad_3;
-	u32 result;
-	u32 curresult;
-	u32 __pad_4[3];
-	u32 revid;
-};
-
-/* CRC_STATUS Masks */
-#define CMPERR			0x00000002	/* Compare error */
-#define DCNTEXP			0x00000010	/* datacnt register expired */
-#define IBR			0x00010000	/* Input buffer ready */
-#define OBR			0x00020000	/* Output buffer ready */
-#define IRR			0x00040000	/* Immediate result readt */
-#define LUTDONE			0x00080000	/* Look-up table generation done */
-#define FSTAT			0x00700000	/* FIFO status */
-#define MAX_FIFO		4		/* Max fifo size */
-
-/* CRC_CONTROL Masks */
-#define BLKEN			0x00000001	/* Block enable */
-#define OPMODE			0x000000F0	/* Operation mode */
-#define OPMODE_OFFSET		4		/* Operation mode mask offset*/
-#define MODE_DMACPY_CRC		1		/* MTM CRC compute and compare */
-#define MODE_DATA_FILL		2		/* MTM data fill */
-#define MODE_CALC_CRC		3		/* MSM CRC compute and compare */
-#define MODE_DATA_VERIFY	4		/* MSM data verify */
-#define AUTOCLRZ		0x00000100	/* Auto clear to zero */
-#define AUTOCLRF		0x00000200	/* Auto clear to one */
-#define OBRSTALL		0x00001000	/* Stall on output buffer ready */
-#define IRRSTALL		0x00002000	/* Stall on immediate result ready */
-#define BITMIRR			0x00010000	/* Mirror bits within each byte of 32-bit input data */
-#define BITMIRR_OFFSET		16		/* Mirror bits offset */
-#define BYTMIRR			0x00020000	/* Mirror bytes of 32-bit input data */
-#define BYTMIRR_OFFSET		17		/* Mirror bytes offset */
-#define W16SWP			0x00040000	/* Mirror uppper and lower 16-bit word of 32-bit input data */
-#define W16SWP_OFFSET		18		/* Mirror 16-bit word offset */
-#define FDSEL			0x00080000	/* FIFO is written after input data is mirrored */
-#define FDSEL_OFFSET		19		/* Mirror FIFO offset */
-#define RSLTMIRR		0x00100000	/* CRC result registers are mirrored. */
-#define RSLTMIRR_OFFSET		20		/* Mirror CRC result offset. */
-#define POLYMIRR		0x00200000	/* CRC poly register is mirrored. */
-#define POLYMIRR_OFFSET		21		/* Mirror CRC poly offset. */
-#define CMPMIRR			0x00400000	/* CRC compare register is mirrored. */
-#define CMPMIRR_OFFSET		22		/* Mirror CRC compare offset. */
-
-/* CRC_INTREN Masks */
-#define CMPERRI 		0x02		/* CRC_ERROR_INTR */
-#define DCNTEXPI 		0x10		/* CRC_STATUS_INTR */
-
-#endif
-
-#endif

+ 15 - 6
drivers/crypto/caam/caamalg.c

@@ -328,6 +328,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 *desc;
 	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
 			ctx->cdata.keylen;
@@ -349,7 +350,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	}
 
 	desc = ctx->sh_desc_enc;
-	cnstr_shdsc_gcm_encap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_gcm_encap(desc, &ctx->cdata, ivsize, ctx->authsize, false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 				   desc_bytes(desc), ctx->dir);
 
@@ -366,7 +367,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	}
 
 	desc = ctx->sh_desc_dec;
-	cnstr_shdsc_gcm_decap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_gcm_decap(desc, &ctx->cdata, ivsize, ctx->authsize, false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 				   desc_bytes(desc), ctx->dir);
 
@@ -387,6 +388,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 *desc;
 	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
 			ctx->cdata.keylen;
@@ -408,7 +410,8 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	}
 
 	desc = ctx->sh_desc_enc;
-	cnstr_shdsc_rfc4106_encap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_rfc4106_encap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 				   desc_bytes(desc), ctx->dir);
 
@@ -425,7 +428,8 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	}
 
 	desc = ctx->sh_desc_dec;
-	cnstr_shdsc_rfc4106_decap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_rfc4106_decap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 				   desc_bytes(desc), ctx->dir);
 
@@ -447,6 +451,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 *desc;
 	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
 			ctx->cdata.keylen;
@@ -468,7 +473,8 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	}
 
 	desc = ctx->sh_desc_enc;
-	cnstr_shdsc_rfc4543_encap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_rfc4543_encap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 				   desc_bytes(desc), ctx->dir);
 
@@ -485,7 +491,8 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	}
 
 	desc = ctx->sh_desc_dec;
-	cnstr_shdsc_rfc4543_decap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_rfc4543_decap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 				   desc_bytes(desc), ctx->dir);
 
@@ -563,9 +570,11 @@ static int aead_setkey(struct crypto_aead *aead,
 
 skip_split_key:
 	ctx->cdata.keylen = keys.enckeylen;
+	memzero_explicit(&keys, sizeof(keys));
 	return aead_set_sh_desc(aead);
 badkey:
 	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
 

+ 152 - 13
drivers/crypto/caam/caamalg_desc.c

@@ -625,10 +625,13 @@ EXPORT_SYMBOL(cnstr_shdsc_aead_givencap);
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
 void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
-			   unsigned int icvsize)
+			   unsigned int ivsize, unsigned int icvsize,
+			   const bool is_qi)
 {
 	u32 *key_jump_cmd, *zero_payload_jump_cmd, *zero_assoc_jump_cmd1,
 	    *zero_assoc_jump_cmd2;
@@ -650,11 +653,35 @@ void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_ENCRYPT);
 
+	if (is_qi) {
+		u32 *wait_load_cmd;
+
+		/* REG3 = assoclen */
+		append_seq_load(desc, 4, LDST_CLASS_DECO |
+				LDST_SRCDST_WORD_DECO_MATH3 |
+				(4 << LDST_OFFSET_SHIFT));
+
+		wait_load_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_CALM | JUMP_COND_NCP |
+					    JUMP_COND_NOP | JUMP_COND_NIP |
+					    JUMP_COND_NIFP);
+		set_jump_tgt_here(desc, wait_load_cmd);
+
+		append_math_sub_imm_u32(desc, VARSEQOUTLEN, SEQINLEN, IMM,
+					ivsize);
+	} else {
+		append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0,
+				CAAM_CMD_SZ);
+	}
+
 	/* if assoclen + cryptlen is ZERO, skip to ICV write */
-	append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
 	zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL |
 						 JUMP_COND_MATH_Z);
 
+	if (is_qi)
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+
 	/* if assoclen is ZERO, skip reading the assoc data */
 	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
 	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
@@ -686,8 +713,11 @@ void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
 
-	/* jump the zero-payload commands */
-	append_jump(desc, JUMP_TEST_ALL | 2);
+	/* jump to ICV writing */
+	if (is_qi)
+		append_jump(desc, JUMP_TEST_ALL | 4);
+	else
+		append_jump(desc, JUMP_TEST_ALL | 2);
 
 	/* zero-payload commands */
 	set_jump_tgt_here(desc, zero_payload_jump_cmd);
@@ -695,10 +725,18 @@ void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 	/* read assoc data */
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST1);
+	if (is_qi)
+		/* jump to ICV writing */
+		append_jump(desc, JUMP_TEST_ALL | 2);
 
 	/* There is no input data */
 	set_jump_tgt_here(desc, zero_assoc_jump_cmd2);
 
+	if (is_qi)
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 |
+				     FIFOLD_TYPE_LAST1);
+
 	/* write ICV */
 	append_seq_store(desc, icvsize, LDST_CLASS_1_CCB |
 			 LDST_SRCDST_BYTE_CONTEXT);
@@ -715,10 +753,13 @@ EXPORT_SYMBOL(cnstr_shdsc_gcm_encap);
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
 void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
-			   unsigned int icvsize)
+			   unsigned int ivsize, unsigned int icvsize,
+			   const bool is_qi)
 {
 	u32 *key_jump_cmd, *zero_payload_jump_cmd, *zero_assoc_jump_cmd1;
 
@@ -739,6 +780,24 @@ void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 
+	if (is_qi) {
+		u32 *wait_load_cmd;
+
+		/* REG3 = assoclen */
+		append_seq_load(desc, 4, LDST_CLASS_DECO |
+				LDST_SRCDST_WORD_DECO_MATH3 |
+				(4 << LDST_OFFSET_SHIFT));
+
+		wait_load_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_CALM | JUMP_COND_NCP |
+					    JUMP_COND_NOP | JUMP_COND_NIP |
+					    JUMP_COND_NIFP);
+		set_jump_tgt_here(desc, wait_load_cmd);
+
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
 	/* if assoclen is ZERO, skip reading the assoc data */
 	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
 	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
@@ -791,10 +850,13 @@ EXPORT_SYMBOL(cnstr_shdsc_gcm_decap);
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
 void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize)
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi)
 {
 	u32 *key_jump_cmd;
 
@@ -815,7 +877,29 @@ void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_ENCRYPT);
 
-	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
+	if (is_qi) {
+		u32 *wait_load_cmd;
+
+		/* REG3 = assoclen */
+		append_seq_load(desc, 4, LDST_CLASS_DECO |
+				LDST_SRCDST_WORD_DECO_MATH3 |
+				(4 << LDST_OFFSET_SHIFT));
+
+		wait_load_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_CALM | JUMP_COND_NCP |
+					    JUMP_COND_NOP | JUMP_COND_NIP |
+					    JUMP_COND_NIFP);
+		set_jump_tgt_here(desc, wait_load_cmd);
+
+		/* Read salt and IV */
+		append_fifo_load_as_imm(desc, (void *)(cdata->key_virt +
+					cdata->keylen), 4, FIFOLD_CLASS_CLASS1 |
+					FIFOLD_TYPE_IV);
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
+	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, ivsize);
 	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
 	/* Read assoc data */
@@ -823,7 +907,7 @@ void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
 
 	/* Skip IV */
-	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_SKIP);
 
 	/* Will read cryptlen bytes */
 	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
@@ -862,10 +946,13 @@ EXPORT_SYMBOL(cnstr_shdsc_rfc4106_encap);
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
 void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize)
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi)
 {
 	u32 *key_jump_cmd;
 
@@ -887,7 +974,29 @@ void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 
-	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
+	if (is_qi) {
+		u32 *wait_load_cmd;
+
+		/* REG3 = assoclen */
+		append_seq_load(desc, 4, LDST_CLASS_DECO |
+				LDST_SRCDST_WORD_DECO_MATH3 |
+				(4 << LDST_OFFSET_SHIFT));
+
+		wait_load_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_CALM | JUMP_COND_NCP |
+					    JUMP_COND_NOP | JUMP_COND_NIP |
+					    JUMP_COND_NIFP);
+		set_jump_tgt_here(desc, wait_load_cmd);
+
+		/* Read salt and IV */
+		append_fifo_load_as_imm(desc, (void *)(cdata->key_virt +
+					cdata->keylen), 4, FIFOLD_CLASS_CLASS1 |
+					FIFOLD_TYPE_IV);
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
+	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, ivsize);
 	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
 	/* Read assoc data */
@@ -895,7 +1004,7 @@ void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
 
 	/* Skip IV */
-	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_SKIP);
 
 	/* Will read cryptlen bytes */
 	append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG3, CAAM_CMD_SZ);
@@ -934,10 +1043,13 @@ EXPORT_SYMBOL(cnstr_shdsc_rfc4106_decap);
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
 void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize)
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi)
 {
 	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
 
@@ -958,6 +1070,18 @@ void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_ENCRYPT);
 
+	if (is_qi) {
+		/* assoclen is not needed, skip it */
+		append_seq_fifo_load(desc, 4, FIFOLD_CLASS_SKIP);
+
+		/* Read salt and IV */
+		append_fifo_load_as_imm(desc, (void *)(cdata->key_virt +
+					cdata->keylen), 4, FIFOLD_CLASS_CLASS1 |
+					FIFOLD_TYPE_IV);
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
 	/* assoclen + cryptlen = seqinlen */
 	append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ);
 
@@ -1004,10 +1128,13 @@ EXPORT_SYMBOL(cnstr_shdsc_rfc4543_encap);
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
 void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize)
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi)
 {
 	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
 
@@ -1028,6 +1155,18 @@ void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 
+	if (is_qi) {
+		/* assoclen is not needed, skip it */
+		append_seq_fifo_load(desc, 4, FIFOLD_CLASS_SKIP);
+
+		/* Read salt and IV */
+		append_fifo_load_as_imm(desc, (void *)(cdata->key_virt +
+					cdata->keylen), 4, FIFOLD_CLASS_CLASS1 |
+					FIFOLD_TYPE_IV);
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
 	/* assoclen + cryptlen = seqoutlen */
 	append_math_sub(desc, REG3, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 

+ 18 - 6
drivers/crypto/caam/caamalg_desc.h

@@ -27,14 +27,20 @@
 #define DESC_GCM_BASE			(3 * CAAM_CMD_SZ)
 #define DESC_GCM_ENC_LEN		(DESC_GCM_BASE + 16 * CAAM_CMD_SZ)
 #define DESC_GCM_DEC_LEN		(DESC_GCM_BASE + 12 * CAAM_CMD_SZ)
+#define DESC_QI_GCM_ENC_LEN		(DESC_GCM_ENC_LEN + 6 * CAAM_CMD_SZ)
+#define DESC_QI_GCM_DEC_LEN		(DESC_GCM_DEC_LEN + 3 * CAAM_CMD_SZ)
 
 #define DESC_RFC4106_BASE		(3 * CAAM_CMD_SZ)
 #define DESC_RFC4106_ENC_LEN		(DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
 #define DESC_RFC4106_DEC_LEN		(DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
+#define DESC_QI_RFC4106_ENC_LEN		(DESC_RFC4106_ENC_LEN + 5 * CAAM_CMD_SZ)
+#define DESC_QI_RFC4106_DEC_LEN		(DESC_RFC4106_DEC_LEN + 5 * CAAM_CMD_SZ)
 
 #define DESC_RFC4543_BASE		(3 * CAAM_CMD_SZ)
 #define DESC_RFC4543_ENC_LEN		(DESC_RFC4543_BASE + 11 * CAAM_CMD_SZ)
 #define DESC_RFC4543_DEC_LEN		(DESC_RFC4543_BASE + 12 * CAAM_CMD_SZ)
+#define DESC_QI_RFC4543_ENC_LEN		(DESC_RFC4543_ENC_LEN + 4 * CAAM_CMD_SZ)
+#define DESC_QI_RFC4543_DEC_LEN		(DESC_RFC4543_DEC_LEN + 4 * CAAM_CMD_SZ)
 
 #define DESC_ABLKCIPHER_BASE		(3 * CAAM_CMD_SZ)
 #define DESC_ABLKCIPHER_ENC_LEN		(DESC_ABLKCIPHER_BASE + \
@@ -67,22 +73,28 @@ void cnstr_shdsc_aead_givencap(u32 * const desc, struct alginfo *cdata,
 			       const bool is_qi, int era);
 
 void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
-			   unsigned int icvsize);
+			   unsigned int ivsize, unsigned int icvsize,
+			   const bool is_qi);
 
 void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
-			   unsigned int icvsize);
+			   unsigned int ivsize, unsigned int icvsize,
+			   const bool is_qi);
 
 void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize);
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi);
 
 void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize);
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi);
 
 void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize);
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi);
 
 void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize);
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi);
 
 void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata,
 				  unsigned int ivsize, const bool is_rfc3686,

+ 387 - 1
drivers/crypto/caam/caamalg_qi.c

@@ -278,12 +278,317 @@ skip_split_key:
 		}
 	}
 
+	memzero_explicit(&keys, sizeof(keys));
 	return ret;
 badkey:
 	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 }
 
+static int gcm_set_sh_desc(struct crypto_aead *aead)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
+
+	if (!ctx->cdata.keylen || !ctx->authsize)
+		return 0;
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_GCM_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+		ctx->cdata.key_virt = ctx->key;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_gcm_encap(ctx->sh_desc_enc, &ctx->cdata, ivsize,
+			      ctx->authsize, true);
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_GCM_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+		ctx->cdata.key_virt = ctx->key;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_gcm_decap(ctx->sh_desc_dec, &ctx->cdata, ivsize,
+			      ctx->authsize, true);
+
+	return 0;
+}
+
+static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+
+	ctx->authsize = authsize;
+	gcm_set_sh_desc(authenc);
+
+	return 0;
+}
+
+static int gcm_setkey(struct crypto_aead *aead,
+		      const u8 *key, unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	int ret;
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+#endif
+
+	memcpy(ctx->key, key, keylen);
+	dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, ctx->dir);
+	ctx->cdata.keylen = keylen;
+
+	ret = gcm_set_sh_desc(aead);
+	if (ret)
+		return ret;
+
+	/* Now update the driver contexts with the new shared descriptor */
+	if (ctx->drv_ctx[ENCRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[ENCRYPT],
+					  ctx->sh_desc_enc);
+		if (ret) {
+			dev_err(jrdev, "driver enc context update failed\n");
+			return ret;
+		}
+	}
+
+	if (ctx->drv_ctx[DECRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[DECRYPT],
+					  ctx->sh_desc_dec);
+		if (ret) {
+			dev_err(jrdev, "driver dec context update failed\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int rfc4106_set_sh_desc(struct crypto_aead *aead)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
+
+	if (!ctx->cdata.keylen || !ctx->authsize)
+		return 0;
+
+	ctx->cdata.key_virt = ctx->key;
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4106_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_rfc4106_encap(ctx->sh_desc_enc, &ctx->cdata, ivsize,
+				  ctx->authsize, true);
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4106_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_rfc4106_decap(ctx->sh_desc_dec, &ctx->cdata, ivsize,
+				  ctx->authsize, true);
+
+	return 0;
+}
+
+static int rfc4106_setauthsize(struct crypto_aead *authenc,
+			       unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+
+	ctx->authsize = authsize;
+	rfc4106_set_sh_desc(authenc);
+
+	return 0;
+}
+
+static int rfc4106_setkey(struct crypto_aead *aead,
+			  const u8 *key, unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	int ret;
+
+	if (keylen < 4)
+		return -EINVAL;
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+#endif
+
+	memcpy(ctx->key, key, keylen);
+	/*
+	 * The last four bytes of the key material are used as the salt value
+	 * in the nonce. Update the AES key length.
+	 */
+	ctx->cdata.keylen = keylen - 4;
+	dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->cdata.keylen,
+				   ctx->dir);
+
+	ret = rfc4106_set_sh_desc(aead);
+	if (ret)
+		return ret;
+
+	/* Now update the driver contexts with the new shared descriptor */
+	if (ctx->drv_ctx[ENCRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[ENCRYPT],
+					  ctx->sh_desc_enc);
+		if (ret) {
+			dev_err(jrdev, "driver enc context update failed\n");
+			return ret;
+		}
+	}
+
+	if (ctx->drv_ctx[DECRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[DECRYPT],
+					  ctx->sh_desc_dec);
+		if (ret) {
+			dev_err(jrdev, "driver dec context update failed\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int rfc4543_set_sh_desc(struct crypto_aead *aead)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
+
+	if (!ctx->cdata.keylen || !ctx->authsize)
+		return 0;
+
+	ctx->cdata.key_virt = ctx->key;
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4543_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_rfc4543_encap(ctx->sh_desc_enc, &ctx->cdata, ivsize,
+				  ctx->authsize, true);
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4543_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_rfc4543_decap(ctx->sh_desc_dec, &ctx->cdata, ivsize,
+				  ctx->authsize, true);
+
+	return 0;
+}
+
+static int rfc4543_setauthsize(struct crypto_aead *authenc,
+			       unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+
+	ctx->authsize = authsize;
+	rfc4543_set_sh_desc(authenc);
+
+	return 0;
+}
+
+static int rfc4543_setkey(struct crypto_aead *aead,
+			  const u8 *key, unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	int ret;
+
+	if (keylen < 4)
+		return -EINVAL;
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+#endif
+
+	memcpy(ctx->key, key, keylen);
+	/*
+	 * The last four bytes of the key material are used as the salt value
+	 * in the nonce. Update the AES key length.
+	 */
+	ctx->cdata.keylen = keylen - 4;
+	dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->cdata.keylen,
+				   ctx->dir);
+
+	ret = rfc4543_set_sh_desc(aead);
+	if (ret)
+		return ret;
+
+	/* Now update the driver contexts with the new shared descriptor */
+	if (ctx->drv_ctx[ENCRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[ENCRYPT],
+					  ctx->sh_desc_enc);
+		if (ret) {
+			dev_err(jrdev, "driver enc context update failed\n");
+			return ret;
+		}
+	}
+
+	if (ctx->drv_ctx[DECRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[DECRYPT],
+					  ctx->sh_desc_dec);
+		if (ret) {
+			dev_err(jrdev, "driver dec context update failed\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 			     const u8 *key, unsigned int keylen)
 {
@@ -562,8 +867,18 @@ static void aead_done(struct caam_drv_req *drv_req, u32 status)
 	qidev = caam_ctx->qidev;
 
 	if (unlikely(status)) {
+		u32 ssrc = status & JRSTA_SSRC_MASK;
+		u8 err_id = status & JRSTA_CCBERR_ERRID_MASK;
+
 		caam_jr_strstatus(qidev, status);
-		ecode = -EIO;
+		/*
+		 * verify hw auth check passed else return -EBADMSG
+		 */
+		if (ssrc == JRSTA_SSRC_CCB_ERROR &&
+		    err_id == JRSTA_CCBERR_ERRID_ICVCHK)
+			ecode = -EBADMSG;
+		else
+			ecode = -EIO;
 	}
 
 	edesc = container_of(drv_req, typeof(*edesc), drv_req);
@@ -807,6 +1122,22 @@ static int aead_decrypt(struct aead_request *req)
 	return aead_crypt(req, false);
 }
 
+static int ipsec_gcm_encrypt(struct aead_request *req)
+{
+	if (req->assoclen < 8)
+		return -EINVAL;
+
+	return aead_crypt(req, true);
+}
+
+static int ipsec_gcm_decrypt(struct aead_request *req)
+{
+	if (req->assoclen < 8)
+		return -EINVAL;
+
+	return aead_crypt(req, false);
+}
+
 static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
 {
 	struct ablkcipher_edesc *edesc;
@@ -1327,6 +1658,61 @@ static struct caam_alg_template driver_algs[] = {
 };
 
 static struct caam_aead_alg driver_aeads[] = {
+	{
+		.aead = {
+			.base = {
+				.cra_name = "rfc4106(gcm(aes))",
+				.cra_driver_name = "rfc4106-gcm-aes-caam-qi",
+				.cra_blocksize = 1,
+			},
+			.setkey = rfc4106_setkey,
+			.setauthsize = rfc4106_setauthsize,
+			.encrypt = ipsec_gcm_encrypt,
+			.decrypt = ipsec_gcm_decrypt,
+			.ivsize = 8,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "rfc4543(gcm(aes))",
+				.cra_driver_name = "rfc4543-gcm-aes-caam-qi",
+				.cra_blocksize = 1,
+			},
+			.setkey = rfc4543_setkey,
+			.setauthsize = rfc4543_setauthsize,
+			.encrypt = ipsec_gcm_encrypt,
+			.decrypt = ipsec_gcm_decrypt,
+			.ivsize = 8,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		},
+	},
+	/* Galois Counter Mode */
+	{
+		.aead = {
+			.base = {
+				.cra_name = "gcm(aes)",
+				.cra_driver_name = "gcm-aes-caam-qi",
+				.cra_blocksize = 1,
+			},
+			.setkey = gcm_setkey,
+			.setauthsize = gcm_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = 12,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		}
+	},
 	/* single-pass ipsec_esp descriptor */
 	{
 		.aead = {

+ 24 - 18
drivers/crypto/caam/ctrl.c

@@ -337,7 +337,8 @@ static int caam_remove(struct platform_device *pdev)
 
 	/* shut clocks off before finalizing shutdown */
 	clk_disable_unprepare(ctrlpriv->caam_ipg);
-	clk_disable_unprepare(ctrlpriv->caam_mem);
+	if (ctrlpriv->caam_mem)
+		clk_disable_unprepare(ctrlpriv->caam_mem);
 	clk_disable_unprepare(ctrlpriv->caam_aclk);
 	if (ctrlpriv->caam_emi_slow)
 		clk_disable_unprepare(ctrlpriv->caam_emi_slow);
@@ -466,14 +467,17 @@ static int caam_probe(struct platform_device *pdev)
 	}
 	ctrlpriv->caam_ipg = clk;
 
-	clk = caam_drv_identify_clk(&pdev->dev, "mem");
-	if (IS_ERR(clk)) {
-		ret = PTR_ERR(clk);
-		dev_err(&pdev->dev,
-			"can't identify CAAM mem clk: %d\n", ret);
-		return ret;
+	if (!of_machine_is_compatible("fsl,imx7d") &&
+	    !of_machine_is_compatible("fsl,imx7s")) {
+		clk = caam_drv_identify_clk(&pdev->dev, "mem");
+		if (IS_ERR(clk)) {
+			ret = PTR_ERR(clk);
+			dev_err(&pdev->dev,
+				"can't identify CAAM mem clk: %d\n", ret);
+			return ret;
+		}
+		ctrlpriv->caam_mem = clk;
 	}
-	ctrlpriv->caam_mem = clk;
 
 	clk = caam_drv_identify_clk(&pdev->dev, "aclk");
 	if (IS_ERR(clk)) {
@@ -484,7 +488,9 @@ static int caam_probe(struct platform_device *pdev)
 	}
 	ctrlpriv->caam_aclk = clk;
 
-	if (!of_machine_is_compatible("fsl,imx6ul")) {
+	if (!of_machine_is_compatible("fsl,imx6ul") &&
+	    !of_machine_is_compatible("fsl,imx7d") &&
+	    !of_machine_is_compatible("fsl,imx7s")) {
 		clk = caam_drv_identify_clk(&pdev->dev, "emi_slow");
 		if (IS_ERR(clk)) {
 			ret = PTR_ERR(clk);
@@ -501,11 +507,13 @@ static int caam_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ret = clk_prepare_enable(ctrlpriv->caam_mem);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "can't enable CAAM secure mem clock: %d\n",
-			ret);
-		goto disable_caam_ipg;
+	if (ctrlpriv->caam_mem) {
+		ret = clk_prepare_enable(ctrlpriv->caam_mem);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "can't enable CAAM secure mem clock: %d\n",
+				ret);
+			goto disable_caam_ipg;
+		}
 	}
 
 	ret = clk_prepare_enable(ctrlpriv->caam_aclk);
@@ -815,9 +823,6 @@ static int caam_probe(struct platform_device *pdev)
 	return 0;
 
 caam_remove:
-#ifdef CONFIG_DEBUG_FS
-	debugfs_remove_recursive(ctrlpriv->dfs_root);
-#endif
 	caam_remove(pdev);
 	return ret;
 
@@ -829,7 +834,8 @@ disable_caam_emi_slow:
 disable_caam_aclk:
 	clk_disable_unprepare(ctrlpriv->caam_aclk);
 disable_caam_mem:
-	clk_disable_unprepare(ctrlpriv->caam_mem);
+	if (ctrlpriv->caam_mem)
+		clk_disable_unprepare(ctrlpriv->caam_mem);
 disable_caam_ipg:
 	clk_disable_unprepare(ctrlpriv->caam_ipg);
 	return ret;

+ 9 - 2
drivers/crypto/caam/qi.c

@@ -579,8 +579,15 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
 
 	fd = &dqrr->fd;
 	status = be32_to_cpu(fd->status);
-	if (unlikely(status))
-		dev_err(qidev, "Error: %#x in CAAM response FD\n", status);
+	if (unlikely(status)) {
+		u32 ssrc = status & JRSTA_SSRC_MASK;
+		u8 err_id = status & JRSTA_CCBERR_ERRID_MASK;
+
+		if (ssrc != JRSTA_SSRC_CCB_ERROR ||
+		    err_id != JRSTA_CCBERR_ERRID_ICVCHK)
+			dev_err(qidev, "Error: %#x in CAAM response FD\n",
+				status);
+	}
 
 	if (unlikely(qm_fd_get_format(fd) != qm_fd_compound)) {
 		dev_err(qidev, "Non-compound FD from CAAM\n");

+ 1 - 1
drivers/crypto/cavium/cpt/cptpf_main.c

@@ -436,7 +436,7 @@ static int cpt_device_init(struct cpt_device *cpt)
 
 	/* Reset the PF when probed first */
 	cpt_reset(cpt);
-	mdelay(100);
+	msleep(100);
 
 	/*Check BIST status*/
 	bist = (u64)cpt_check_bist_status(cpt);

+ 1 - 1
drivers/crypto/ccp/ccp-crypto-aes-cmac.c

@@ -46,7 +46,7 @@ static int ccp_aes_cmac_complete(struct crypto_async_request *async_req,
 	}
 
 	/* Update result area if supplied */
-	if (req->result)
+	if (req->result && rctx->final)
 		memcpy(req->result, rctx->iv, digest_size);
 
 e_free:

Một số tệp đã không được hiển thị bởi vì quá nhiều tập tin thay đổi trong này khác