Browse Source

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "API:

   - add AEAD support to crypto engine

   - allow batch registration in simd

  Algorithms:

   - add CFB mode

   - add speck block cipher

   - add sm4 block cipher

   - new test case for crct10dif

   - improve scheduling latency on ARM

   - scatter/gather support to gcm in aesni

   - convert x86 crypto algorithms to skcihper

  Drivers:

   - hmac(sha224/sha256) support in inside-secure

   - aes gcm/ccm support in stm32

   - stm32mp1 support in stm32

   - ccree driver from staging tree

   - gcm support over QI in caam

   - add ks-sa hwrng driver"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (212 commits)
  crypto: ccree - remove unused enums
  crypto: ahash - Fix early termination in hash walk
  crypto: brcm - explicitly cast cipher to hash type
  crypto: talitos - don't leak pointers to authenc keys
  crypto: qat - don't leak pointers to authenc keys
  crypto: picoxcell - don't leak pointers to authenc keys
  crypto: ixp4xx - don't leak pointers to authenc keys
  crypto: chelsio - don't leak pointers to authenc keys
  crypto: caam/qi - don't leak pointers to authenc keys
  crypto: caam - don't leak pointers to authenc keys
  crypto: lrw - Free rctx->ext with kzfree
  crypto: talitos - fix IPsec cipher in length
  crypto: Deduplicate le32_to_cpu_array() and cpu_to_le32_array()
  crypto: doc - clarify hash callbacks state machine
  crypto: api - Keep failed instances alive
  crypto: api - Make crypto_alg_lookup static
  crypto: api - Remove unused crypto_type lookup function
  crypto: chelsio - Remove declaration of static function from header
  crypto: inside-secure - hmac(sha224) support
  crypto: inside-secure - hmac(sha256) support
  ..
Linus Torvalds 7 năm trước cách đây
mục cha
commit
9eb31227cb
100 tập tin đã thay đổi với 9658 bổ sung8439 xóa
  1. 48 0
      Documentation/crypto/crypto_engine.rst
  2. 8 0
      Documentation/crypto/devel-algos.rst
  3. 2 1
      Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
  4. 5 1
      Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
  5. 5 6
      Documentation/devicetree/bindings/rng/imx-rng.txt
  6. 21 0
      Documentation/devicetree/bindings/rng/ks-sa-rng.txt
  7. 6 1
      Documentation/devicetree/bindings/rng/omap_rng.txt
  8. 4 0
      Documentation/devicetree/bindings/rng/st,stm32-rng.txt
  9. 11 4
      MAINTAINERS
  10. 6 0
      arch/arm/crypto/Kconfig
  11. 4 0
      arch/arm/crypto/Makefile
  12. 10 9
      arch/arm/crypto/aes-cipher-core.S
  13. 432 0
      arch/arm/crypto/speck-neon-core.S
  14. 288 0
      arch/arm/crypto/speck-neon-glue.c
  15. 6 0
      arch/arm64/crypto/Kconfig
  16. 5 3
      arch/arm64/crypto/Makefile
  17. 23 24
      arch/arm64/crypto/aes-ce-ccm-glue.c
  18. 46 49
      arch/arm64/crypto/aes-glue.c
  19. 118 237
      arch/arm64/crypto/aes-modes.S
  20. 22 26
      arch/arm64/crypto/aes-neonbs-glue.c
  21. 9 3
      arch/arm64/crypto/chacha20-neon-glue.c
  22. 23 13
      arch/arm64/crypto/sha256-glue.c
  23. 352 0
      arch/arm64/crypto/speck-neon-core.S
  24. 282 0
      arch/arm64/crypto/speck-neon-glue.c
  25. 693 721
      arch/x86/crypto/aesni-intel_asm.S
  26. 206 24
      arch/x86/crypto/aesni-intel_glue.c
  27. 113 117
      arch/x86/crypto/blowfish_glue.c
  28. 99 392
      arch/x86/crypto/camellia_aesni_avx2_glue.c
  29. 118 377
      arch/x86/crypto/camellia_aesni_avx_glue.c
  30. 79 277
      arch/x86/crypto/camellia_glue.c
  31. 127 225
      arch/x86/crypto/cast5_avx_glue.c
  32. 100 389
      arch/x86/crypto/cast6_avx_glue.c
  33. 119 119
      arch/x86/crypto/des3_ede_glue.c
  34. 125 266
      arch/x86/crypto/glue_helper.c
  35. 97 381
      arch/x86/crypto/serpent_avx2_glue.c
  36. 115 403
      arch/x86/crypto/serpent_avx_glue.c
  37. 69 450
      arch/x86/crypto/serpent_sse2_glue.c
  38. 3 25
      arch/x86/crypto/sha1-mb/sha1_mb.c
  39. 3 5
      arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
  40. 3 24
      arch/x86/crypto/sha256-mb/sha256_mb.c
  41. 3 5
      arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
  42. 3 27
      arch/x86/crypto/sha512-mb/sha512_mb.c
  43. 3 5
      arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
  44. 121 372
      arch/x86/crypto/twofish_avx_glue.c
  45. 67 272
      arch/x86/crypto/twofish_glue_3way.c
  46. 5 11
      arch/x86/include/asm/crypto/camellia.h
  47. 12 63
      arch/x86/include/asm/crypto/glue_helper.h
  48. 5 12
      arch/x86/include/asm/crypto/serpent-avx.h
  49. 0 19
      arch/x86/include/asm/crypto/twofish.h
  50. 69 60
      crypto/Kconfig
  51. 3 1
      crypto/Makefile
  52. 0 150
      crypto/ablk_helper.c
  53. 6 19
      crypto/ahash.c
  54. 0 8
      crypto/algapi.c
  55. 16 18
      crypto/api.c
  56. 353 0
      crypto/cfb.c
  57. 164 137
      crypto/crypto_engine.c
  58. 1 1
      crypto/crypto_user.c
  59. 17 6
      crypto/ecc.c
  60. 17 6
      crypto/ecdh.c
  61. 0 1
      crypto/internal.h
  62. 40 114
      crypto/lrw.c
  63. 4 30
      crypto/mcryptd.c
  64. 0 17
      crypto/md4.c
  65. 0 17
      crypto/md5.c
  66. 1 1
      crypto/rsa-pkcs1pad.c
  67. 50 0
      crypto/simd.c
  68. 244 0
      crypto/sm4_generic.c
  69. 307 0
      crypto/speck.c
  70. 3 0
      crypto/tcrypt.c
  71. 45 0
      crypto/testmgr.c
  72. 3321 1445
      crypto/testmgr.h
  73. 0 72
      crypto/xts.c
  74. 7 0
      drivers/char/hw_random/Kconfig
  75. 1 0
      drivers/char/hw_random/Makefile
  76. 2 0
      drivers/char/hw_random/bcm2835-rng.c
  77. 1 1
      drivers/char/hw_random/cavium-rng-vf.c
  78. 1 1
      drivers/char/hw_random/cavium-rng.c
  79. 1 1
      drivers/char/hw_random/imx-rngc.c
  80. 257 0
      drivers/char/hw_random/ks-sa-rng.c
  81. 14 9
      drivers/char/hw_random/mxc-rnga.c
  82. 18 4
      drivers/char/hw_random/omap-rng.c
  83. 28 16
      drivers/char/hw_random/stm32-rng.c
  84. 27 7
      drivers/crypto/Kconfig
  85. 1 1
      drivers/crypto/Makefile
  86. 2 6
      drivers/crypto/atmel-aes.c
  87. 2 7
      drivers/crypto/atmel-sha.c
  88. 2 7
      drivers/crypto/atmel-tdes.c
  89. 2 2
      drivers/crypto/bcm/cipher.c
  90. 0 1
      drivers/crypto/bcm/util.c
  91. 0 743
      drivers/crypto/bfin_crc.c
  92. 0 124
      drivers/crypto/bfin_crc.h
  93. 15 6
      drivers/crypto/caam/caamalg.c
  94. 152 13
      drivers/crypto/caam/caamalg_desc.c
  95. 18 6
      drivers/crypto/caam/caamalg_desc.h
  96. 387 1
      drivers/crypto/caam/caamalg_qi.c
  97. 24 18
      drivers/crypto/caam/ctrl.c
  98. 9 2
      drivers/crypto/caam/qi.c
  99. 1 1
      drivers/crypto/cavium/cpt/cptpf_main.c
  100. 1 1
      drivers/crypto/ccp/ccp-crypto-aes-cmac.c

+ 48 - 0
Documentation/crypto/crypto_engine.rst

@@ -0,0 +1,48 @@
+=============
+CRYPTO ENGINE
+=============
+
+Overview
+--------
+The crypto engine API (CE), is a crypto queue manager.
+
+Requirement
+-----------
+You have to put at start of your tfm_ctx the struct crypto_engine_ctx
+struct your_tfm_ctx {
+        struct crypto_engine_ctx enginectx;
+        ...
+};
+Why: Since CE manage only crypto_async_request, it cannot know the underlying
+request_type and so have access only on the TFM.
+So using container_of for accessing __ctx is impossible.
+Furthermore, the crypto engine cannot know the "struct your_tfm_ctx",
+so it must assume that crypto_engine_ctx is at start of it.
+
+Order of operations
+-------------------
+You have to obtain a struct crypto_engine via crypto_engine_alloc_init().
+And start it via crypto_engine_start().
+
+Before transferring any request, you have to fill the enginectx.
+- prepare_request: (taking a function pointer) If you need to do some processing before doing the request
+- unprepare_request: (taking a function pointer) Undoing what's done in prepare_request
+- do_one_request: (taking a function pointer) Do encryption for current request
+
+Note: that those three functions get the crypto_async_request associated with the received request.
+So your need to get the original request via container_of(areq, struct yourrequesttype_request, base);
+
+When your driver receive a crypto_request, you have to transfer it to
+the cryptoengine via one of:
+- crypto_transfer_ablkcipher_request_to_engine()
+- crypto_transfer_aead_request_to_engine()
+- crypto_transfer_akcipher_request_to_engine()
+- crypto_transfer_hash_request_to_engine()
+- crypto_transfer_skcipher_request_to_engine()
+
+At the end of the request process, a call to one of the following function is needed:
+- crypto_finalize_ablkcipher_request
+- crypto_finalize_aead_request
+- crypto_finalize_akcipher_request
+- crypto_finalize_hash_request
+- crypto_finalize_skcipher_request

+ 8 - 0
Documentation/crypto/devel-algos.rst

@@ -236,6 +236,14 @@ when used from another part of the kernel.
                                |
                                |
                                '---------------> HASH2
                                '---------------> HASH2
 
 
+Note that it is perfectly legal to "abandon" a request object:
+- call .init() and then (as many times) .update()
+- _not_ call any of .final(), .finup() or .export() at any point in future
+
+In other words implementations should mind the resource allocation and clean-up.
+No resources related to request objects should remain allocated after a call
+to .init() or .update(), since there might be no chance to free them.
+
 
 
 Specifics Of Asynchronous HASH Transformation
 Specifics Of Asynchronous HASH Transformation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

+ 2 - 1
Documentation/devicetree/bindings/crypto/arm-cryptocell.txt

@@ -1,7 +1,8 @@
 Arm TrustZone CryptoCell cryptographic engine
 Arm TrustZone CryptoCell cryptographic engine
 
 
 Required properties:
 Required properties:
-- compatible: Should be "arm,cryptocell-712-ree".
+- compatible: Should be one of: "arm,cryptocell-712-ree",
+  "arm,cryptocell-710-ree" or "arm,cryptocell-630p-ree".
 - reg: Base physical address of the engine and length of memory mapped region.
 - reg: Base physical address of the engine and length of memory mapped region.
 - interrupts: Interrupt number for the device.
 - interrupts: Interrupt number for the device.
 
 

+ 5 - 1
Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt

@@ -8,7 +8,11 @@ Required properties:
 - interrupt-names: Should be "ring0", "ring1", "ring2", "ring3", "eip", "mem".
 - interrupt-names: Should be "ring0", "ring1", "ring2", "ring3", "eip", "mem".
 
 
 Optional properties:
 Optional properties:
-- clocks: Reference to the crypto engine clock.
+- clocks: Reference to the crypto engine clocks, the second clock is
+          needed for the Armada 7K/8K SoCs.
+- clock-names: mandatory if there is a second clock, in this case the
+               name must be "core" for the first clock and "reg" for
+               the second one.
 
 
 Example:
 Example:
 
 

+ 5 - 6
Documentation/devicetree/bindings/rng/imx-rngc.txt → Documentation/devicetree/bindings/rng/imx-rng.txt

@@ -1,15 +1,14 @@
-Freescale RNGC (Random Number Generator Version C)
-
-The driver also supports version B, which is mostly compatible
-to version C.
+Freescale RNGA/RNGB/RNGC (Random Number Generator Versions A, B and C)
 
 
 Required properties:
 Required properties:
 - compatible : should be one of
 - compatible : should be one of
+               "fsl,imx21-rnga"
+               "fsl,imx31-rnga" (backward compatible with "fsl,imx21-rnga")
                "fsl,imx25-rngb"
                "fsl,imx25-rngb"
                "fsl,imx35-rngc"
                "fsl,imx35-rngc"
 - reg : offset and length of the register set of this block
 - reg : offset and length of the register set of this block
-- interrupts : the interrupt number for the RNGC block
-- clocks : the RNGC clk source
+- interrupts : the interrupt number for the RNG block
+- clocks : the RNG clk source
 
 
 Example:
 Example:
 
 

+ 21 - 0
Documentation/devicetree/bindings/rng/ks-sa-rng.txt

@@ -0,0 +1,21 @@
+Keystone SoC Hardware Random Number Generator(HWRNG) Module
+
+On Keystone SoCs HWRNG module is a submodule of the Security Accelerator.
+
+- compatible: should be "ti,keystone-rng"
+- ti,syscon-sa-cfg: phandle to syscon node of the SA configuration registers.
+		    This registers are shared between hwrng and crypto drivers.
+- clocks: phandle to the reference clocks for the subsystem
+- clock-names: functional clock name. Should be set to "fck"
+- reg: HWRNG module register space
+
+Example:
+/* K2HK */
+
+rng@24000 {
+	compatible = "ti,keystone-rng";
+	ti,syscon-sa-cfg = <&sa_config>;
+	clocks = <&clksa>;
+	clock-names = "fck";
+	reg = <0x24000 0x1000>;
+};

+ 6 - 1
Documentation/devicetree/bindings/rng/omap_rng.txt

@@ -13,7 +13,12 @@ Required properties:
 - interrupts : the interrupt number for the RNG module.
 - interrupts : the interrupt number for the RNG module.
 		Used for "ti,omap4-rng" and "inside-secure,safexcel-eip76"
 		Used for "ti,omap4-rng" and "inside-secure,safexcel-eip76"
 - clocks: the trng clock source. Only mandatory for the
 - clocks: the trng clock source. Only mandatory for the
-  "inside-secure,safexcel-eip76" compatible.
+  "inside-secure,safexcel-eip76" compatible, the second clock is
+  needed for the Armada 7K/8K SoCs
+- clock-names: mandatory if there is a second clock, in this case the
+  name must be "core" for the first clock and "reg" for the second
+  one
+
 
 
 Example:
 Example:
 /* AM335x */
 /* AM335x */

+ 4 - 0
Documentation/devicetree/bindings/rng/st,stm32-rng.txt

@@ -11,6 +11,10 @@ Required properties:
 - interrupts : The designated IRQ line for the RNG
 - interrupts : The designated IRQ line for the RNG
 - clocks : The clock needed to enable the RNG
 - clocks : The clock needed to enable the RNG
 
 
+Optional properties:
+- resets : The reset to properly start RNG
+- clock-error-detect : Enable the clock detection management
+
 Example:
 Example:
 
 
 	rng: rng@50060800 {
 	rng: rng@50060800 {

+ 11 - 4
MAINTAINERS

@@ -3252,12 +3252,11 @@ F:	drivers/net/ieee802154/cc2520.c
 F:	include/linux/spi/cc2520.h
 F:	include/linux/spi/cc2520.h
 F:	Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
 F:	Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
 
 
-CCREE ARM TRUSTZONE CRYPTOCELL 700 REE DRIVER
+CCREE ARM TRUSTZONE CRYPTOCELL REE DRIVER
 M:	Gilad Ben-Yossef <gilad@benyossef.com>
 M:	Gilad Ben-Yossef <gilad@benyossef.com>
 L:	linux-crypto@vger.kernel.org
 L:	linux-crypto@vger.kernel.org
-L:	driverdev-devel@linuxdriverproject.org
 S:	Supported
 S:	Supported
-F:	drivers/staging/ccree/
+F:	drivers/crypto/ccree/
 W:	https://developer.arm.com/products/system-ip/trustzone-cryptocell/cryptocell-700-family
 W:	https://developer.arm.com/products/system-ip/trustzone-cryptocell/cryptocell-700-family
 
 
 CEC FRAMEWORK
 CEC FRAMEWORK
@@ -6962,7 +6961,7 @@ F:	drivers/input/input-mt.c
 K:	\b(ABS|SYN)_MT_
 K:	\b(ABS|SYN)_MT_
 
 
 INSIDE SECURE CRYPTO DRIVER
 INSIDE SECURE CRYPTO DRIVER
-M:	Antoine Tenart <antoine.tenart@free-electrons.com>
+M:	Antoine Tenart <antoine.tenart@bootlin.com>
 F:	drivers/crypto/inside-secure/
 F:	drivers/crypto/inside-secure/
 S:	Maintained
 S:	Maintained
 L:	linux-crypto@vger.kernel.org
 L:	linux-crypto@vger.kernel.org
@@ -7200,6 +7199,14 @@ L:	linux-rdma@vger.kernel.org
 S:	Supported
 S:	Supported
 F:	drivers/infiniband/hw/i40iw/
 F:	drivers/infiniband/hw/i40iw/
 
 
+INTEL SHA MULTIBUFFER DRIVER
+M:	Megha Dey <megha.dey@linux.intel.com>
+R:	Tim Chen <tim.c.chen@linux.intel.com>
+L:	linux-crypto@vger.kernel.org
+S:	Supported
+F:	arch/x86/crypto/sha*-mb
+F:	crypto/mcryptd.c
+
 INTEL TELEMETRY DRIVER
 INTEL TELEMETRY DRIVER
 M:	Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
 M:	Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
 L:	platform-driver-x86@vger.kernel.org
 L:	platform-driver-x86@vger.kernel.org

+ 6 - 0
arch/arm/crypto/Kconfig

@@ -121,4 +121,10 @@ config CRYPTO_CHACHA20_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_CHACHA20
 	select CRYPTO_CHACHA20
 
 
+config CRYPTO_SPECK_NEON
+	tristate "NEON accelerated Speck cipher algorithms"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_SPECK
+
 endif
 endif

+ 4 - 0
arch/arm/crypto/Makefile

@@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
 obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
+obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
 
 
 ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -53,7 +54,9 @@ ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
 crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
 crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
 crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
 chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
 chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
+speck-neon-y := speck-neon-core.o speck-neon-glue.o
 
 
+ifdef REGENERATE_ARM_CRYPTO
 quiet_cmd_perl = PERL    $@
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)
       cmd_perl = $(PERL) $(<) > $(@)
 
 
@@ -62,5 +65,6 @@ $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
 
 
 $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
 $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
 	$(call cmd,perl)
 	$(call cmd,perl)
+endif
 
 
 .PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S
 .PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S

+ 10 - 9
arch/arm/crypto/aes-cipher-core.S

@@ -174,6 +174,16 @@
 	.ltorg
 	.ltorg
 	.endm
 	.endm
 
 
+ENTRY(__aes_arm_encrypt)
+	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
+ENDPROC(__aes_arm_encrypt)
+
+	.align		5
+ENTRY(__aes_arm_decrypt)
+	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
+ENDPROC(__aes_arm_decrypt)
+
+	.section	".rodata", "a"
 	.align		L1_CACHE_SHIFT
 	.align		L1_CACHE_SHIFT
 	.type		__aes_arm_inverse_sbox, %object
 	.type		__aes_arm_inverse_sbox, %object
 __aes_arm_inverse_sbox:
 __aes_arm_inverse_sbox:
@@ -210,12 +220,3 @@ __aes_arm_inverse_sbox:
 	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
 	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
 	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
 	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
 	.size		__aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
 	.size		__aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
-
-ENTRY(__aes_arm_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
-ENDPROC(__aes_arm_encrypt)
-
-	.align		5
-ENTRY(__aes_arm_decrypt)
-	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
-ENDPROC(__aes_arm_decrypt)

+ 432 - 0
arch/arm/crypto/speck-neon-core.S

@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
+ *
+ * Copyright (c) 2018 Google, Inc
+ *
+ * Author: Eric Biggers <ebiggers@google.com>
+ */
+
+#include <linux/linkage.h>
+
+	.text
+	.fpu		neon
+
+	// arguments
+	ROUND_KEYS	.req	r0	// const {u64,u32} *round_keys
+	NROUNDS		.req	r1	// int nrounds
+	DST		.req	r2	// void *dst
+	SRC		.req	r3	// const void *src
+	NBYTES		.req	r4	// unsigned int nbytes
+	TWEAK		.req	r5	// void *tweak
+
+	// registers which hold the data being encrypted/decrypted
+	X0		.req	q0
+	X0_L		.req	d0
+	X0_H		.req	d1
+	Y0		.req	q1
+	Y0_H		.req	d3
+	X1		.req	q2
+	X1_L		.req	d4
+	X1_H		.req	d5
+	Y1		.req	q3
+	Y1_H		.req	d7
+	X2		.req	q4
+	X2_L		.req	d8
+	X2_H		.req	d9
+	Y2		.req	q5
+	Y2_H		.req	d11
+	X3		.req	q6
+	X3_L		.req	d12
+	X3_H		.req	d13
+	Y3		.req	q7
+	Y3_H		.req	d15
+
+	// the round key, duplicated in all lanes
+	ROUND_KEY	.req	q8
+	ROUND_KEY_L	.req	d16
+	ROUND_KEY_H	.req	d17
+
+	// index vector for vtbl-based 8-bit rotates
+	ROTATE_TABLE	.req	d18
+
+	// multiplication table for updating XTS tweaks
+	GF128MUL_TABLE	.req	d19
+	GF64MUL_TABLE	.req	d19
+
+	// current XTS tweak value(s)
+	TWEAKV		.req	q10
+	TWEAKV_L	.req	d20
+	TWEAKV_H	.req	d21
+
+	TMP0		.req	q12
+	TMP0_L		.req	d24
+	TMP0_H		.req	d25
+	TMP1		.req	q13
+	TMP2		.req	q14
+	TMP3		.req	q15
+
+	.align		4
+.Lror64_8_table:
+	.byte		1, 2, 3, 4, 5, 6, 7, 0
+.Lror32_8_table:
+	.byte		1, 2, 3, 0, 5, 6, 7, 4
+.Lrol64_8_table:
+	.byte		7, 0, 1, 2, 3, 4, 5, 6
+.Lrol32_8_table:
+	.byte		3, 0, 1, 2, 7, 4, 5, 6
+.Lgf128mul_table:
+	.byte		0, 0x87
+	.fill		14
+.Lgf64mul_table:
+	.byte		0, 0x1b, (0x1b << 1), (0x1b << 1) ^ 0x1b
+	.fill		12
+
+/*
+ * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
+ *
+ * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
+ * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
+ * of ROUND_KEY.  'n' is the lane size: 64 for Speck128, or 32 for Speck64.
+ *
+ * The 8-bit rotates are implemented using vtbl instead of vshr + vsli because
+ * the vtbl approach is faster on some processors and the same speed on others.
+ */
+.macro _speck_round_128bytes	n
+
+	// x = ror(x, 8)
+	vtbl.8		X0_L, {X0_L}, ROTATE_TABLE
+	vtbl.8		X0_H, {X0_H}, ROTATE_TABLE
+	vtbl.8		X1_L, {X1_L}, ROTATE_TABLE
+	vtbl.8		X1_H, {X1_H}, ROTATE_TABLE
+	vtbl.8		X2_L, {X2_L}, ROTATE_TABLE
+	vtbl.8		X2_H, {X2_H}, ROTATE_TABLE
+	vtbl.8		X3_L, {X3_L}, ROTATE_TABLE
+	vtbl.8		X3_H, {X3_H}, ROTATE_TABLE
+
+	// x += y
+	vadd.u\n	X0, Y0
+	vadd.u\n	X1, Y1
+	vadd.u\n	X2, Y2
+	vadd.u\n	X3, Y3
+
+	// x ^= k
+	veor		X0, ROUND_KEY
+	veor		X1, ROUND_KEY
+	veor		X2, ROUND_KEY
+	veor		X3, ROUND_KEY
+
+	// y = rol(y, 3)
+	vshl.u\n	TMP0, Y0, #3
+	vshl.u\n	TMP1, Y1, #3
+	vshl.u\n	TMP2, Y2, #3
+	vshl.u\n	TMP3, Y3, #3
+	vsri.u\n	TMP0, Y0, #(\n - 3)
+	vsri.u\n	TMP1, Y1, #(\n - 3)
+	vsri.u\n	TMP2, Y2, #(\n - 3)
+	vsri.u\n	TMP3, Y3, #(\n - 3)
+
+	// y ^= x
+	veor		Y0, TMP0, X0
+	veor		Y1, TMP1, X1
+	veor		Y2, TMP2, X2
+	veor		Y3, TMP3, X3
+.endm
+
+/*
+ * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
+ *
+ * This is the inverse of _speck_round_128bytes().
+ */
+.macro _speck_unround_128bytes	n
+
+	// y ^= x
+	veor		TMP0, Y0, X0
+	veor		TMP1, Y1, X1
+	veor		TMP2, Y2, X2
+	veor		TMP3, Y3, X3
+
+	// y = ror(y, 3)
+	vshr.u\n	Y0, TMP0, #3
+	vshr.u\n	Y1, TMP1, #3
+	vshr.u\n	Y2, TMP2, #3
+	vshr.u\n	Y3, TMP3, #3
+	vsli.u\n	Y0, TMP0, #(\n - 3)
+	vsli.u\n	Y1, TMP1, #(\n - 3)
+	vsli.u\n	Y2, TMP2, #(\n - 3)
+	vsli.u\n	Y3, TMP3, #(\n - 3)
+
+	// x ^= k
+	veor		X0, ROUND_KEY
+	veor		X1, ROUND_KEY
+	veor		X2, ROUND_KEY
+	veor		X3, ROUND_KEY
+
+	// x -= y
+	vsub.u\n	X0, Y0
+	vsub.u\n	X1, Y1
+	vsub.u\n	X2, Y2
+	vsub.u\n	X3, Y3
+
+	// x = rol(x, 8);
+	vtbl.8		X0_L, {X0_L}, ROTATE_TABLE
+	vtbl.8		X0_H, {X0_H}, ROTATE_TABLE
+	vtbl.8		X1_L, {X1_L}, ROTATE_TABLE
+	vtbl.8		X1_H, {X1_H}, ROTATE_TABLE
+	vtbl.8		X2_L, {X2_L}, ROTATE_TABLE
+	vtbl.8		X2_H, {X2_H}, ROTATE_TABLE
+	vtbl.8		X3_L, {X3_L}, ROTATE_TABLE
+	vtbl.8		X3_H, {X3_H}, ROTATE_TABLE
+.endm
+
+.macro _xts128_precrypt_one	dst_reg, tweak_buf, tmp
+
+	// Load the next source block
+	vld1.8		{\dst_reg}, [SRC]!
+
+	// Save the current tweak in the tweak buffer
+	vst1.8		{TWEAKV}, [\tweak_buf:128]!
+
+	// XOR the next source block with the current tweak
+	veor		\dst_reg, TWEAKV
+
+	/*
+	 * Calculate the next tweak by multiplying the current one by x,
+	 * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
+	 */
+	vshr.u64	\tmp, TWEAKV, #63
+	vshl.u64	TWEAKV, #1
+	veor		TWEAKV_H, \tmp\()_L
+	vtbl.8		\tmp\()_H, {GF128MUL_TABLE}, \tmp\()_H
+	veor		TWEAKV_L, \tmp\()_H
+.endm
+
+.macro _xts64_precrypt_two	dst_reg, tweak_buf, tmp
+
+	// Load the next two source blocks
+	vld1.8		{\dst_reg}, [SRC]!
+
+	// Save the current two tweaks in the tweak buffer
+	vst1.8		{TWEAKV}, [\tweak_buf:128]!
+
+	// XOR the next two source blocks with the current two tweaks
+	veor		\dst_reg, TWEAKV
+
+	/*
+	 * Calculate the next two tweaks by multiplying the current ones by x^2,
+	 * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
+	 */
+	vshr.u64	\tmp, TWEAKV, #62
+	vshl.u64	TWEAKV, #2
+	vtbl.8		\tmp\()_L, {GF64MUL_TABLE}, \tmp\()_L
+	vtbl.8		\tmp\()_H, {GF64MUL_TABLE}, \tmp\()_H
+	veor		TWEAKV, \tmp
+.endm
+
+/*
+ * _speck_xts_crypt() - Speck-XTS encryption/decryption
+ *
+ * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
+ * using Speck-XTS, specifically the variant with a block size of '2n' and round
+ * count given by NROUNDS.  The expanded round keys are given in ROUND_KEYS, and
+ * the current XTS tweak value is given in TWEAK.  It's assumed that NBYTES is a
+ * nonzero multiple of 128.
+ */
+.macro _speck_xts_crypt	n, decrypting
+	push		{r4-r7}
+	mov		r7, sp
+
+	/*
+	 * The first four parameters were passed in registers r0-r3.  Load the
+	 * additional parameters, which were passed on the stack.
+	 */
+	ldr		NBYTES, [sp, #16]
+	ldr		TWEAK, [sp, #20]
+
+	/*
+	 * If decrypting, modify the ROUND_KEYS parameter to point to the last
+	 * round key rather than the first, since for decryption the round keys
+	 * are used in reverse order.
+	 */
+.if \decrypting
+.if \n == 64
+	add		ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #3
+	sub		ROUND_KEYS, #8
+.else
+	add		ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #2
+	sub		ROUND_KEYS, #4
+.endif
+.endif
+
+	// Load the index vector for vtbl-based 8-bit rotates
+.if \decrypting
+	ldr		r12, =.Lrol\n\()_8_table
+.else
+	ldr		r12, =.Lror\n\()_8_table
+.endif
+	vld1.8		{ROTATE_TABLE}, [r12:64]
+
+	// One-time XTS preparation
+
+	/*
+	 * Allocate stack space to store 128 bytes worth of tweaks.  For
+	 * performance, this space is aligned to a 16-byte boundary so that we
+	 * can use the load/store instructions that declare 16-byte alignment.
+	 */
+	sub		sp, #128
+	bic		sp, #0xf
+
+.if \n == 64
+	// Load first tweak
+	vld1.8		{TWEAKV}, [TWEAK]
+
+	// Load GF(2^128) multiplication table
+	ldr		r12, =.Lgf128mul_table
+	vld1.8		{GF128MUL_TABLE}, [r12:64]
+.else
+	// Load first tweak
+	vld1.8		{TWEAKV_L}, [TWEAK]
+
+	// Load GF(2^64) multiplication table
+	ldr		r12, =.Lgf64mul_table
+	vld1.8		{GF64MUL_TABLE}, [r12:64]
+
+	// Calculate second tweak, packing it together with the first
+	vshr.u64	TMP0_L, TWEAKV_L, #63
+	vtbl.u8		TMP0_L, {GF64MUL_TABLE}, TMP0_L
+	vshl.u64	TWEAKV_H, TWEAKV_L, #1
+	veor		TWEAKV_H, TMP0_L
+.endif
+
+.Lnext_128bytes_\@:
+
+	/*
+	 * Load the source blocks into {X,Y}[0-3], XOR them with their XTS tweak
+	 * values, and save the tweaks on the stack for later.  Then
+	 * de-interleave the 'x' and 'y' elements of each block, i.e. make it so
+	 * that the X[0-3] registers contain only the second halves of blocks,
+	 * and the Y[0-3] registers contain only the first halves of blocks.
+	 * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
+	 */
+	mov		r12, sp
+.if \n == 64
+	_xts128_precrypt_one	X0, r12, TMP0
+	_xts128_precrypt_one	Y0, r12, TMP0
+	_xts128_precrypt_one	X1, r12, TMP0
+	_xts128_precrypt_one	Y1, r12, TMP0
+	_xts128_precrypt_one	X2, r12, TMP0
+	_xts128_precrypt_one	Y2, r12, TMP0
+	_xts128_precrypt_one	X3, r12, TMP0
+	_xts128_precrypt_one	Y3, r12, TMP0
+	vswp		X0_L, Y0_H
+	vswp		X1_L, Y1_H
+	vswp		X2_L, Y2_H
+	vswp		X3_L, Y3_H
+.else
+	_xts64_precrypt_two	X0, r12, TMP0
+	_xts64_precrypt_two	Y0, r12, TMP0
+	_xts64_precrypt_two	X1, r12, TMP0
+	_xts64_precrypt_two	Y1, r12, TMP0
+	_xts64_precrypt_two	X2, r12, TMP0
+	_xts64_precrypt_two	Y2, r12, TMP0
+	_xts64_precrypt_two	X3, r12, TMP0
+	_xts64_precrypt_two	Y3, r12, TMP0
+	vuzp.32		Y0, X0
+	vuzp.32		Y1, X1
+	vuzp.32		Y2, X2
+	vuzp.32		Y3, X3
+.endif
+
+	// Do the cipher rounds
+
+	mov		r12, ROUND_KEYS
+	mov		r6, NROUNDS
+
+.Lnext_round_\@:
+.if \decrypting
+.if \n == 64
+	vld1.64		ROUND_KEY_L, [r12]
+	sub		r12, #8
+	vmov		ROUND_KEY_H, ROUND_KEY_L
+.else
+	vld1.32		{ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]
+	sub		r12, #4
+.endif
+	_speck_unround_128bytes	\n
+.else
+.if \n == 64
+	vld1.64		ROUND_KEY_L, [r12]!
+	vmov		ROUND_KEY_H, ROUND_KEY_L
+.else
+	vld1.32		{ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]!
+.endif
+	_speck_round_128bytes	\n
+.endif
+	subs		r6, r6, #1
+	bne		.Lnext_round_\@
+
+	// Re-interleave the 'x' and 'y' elements of each block
+.if \n == 64
+	vswp		X0_L, Y0_H
+	vswp		X1_L, Y1_H
+	vswp		X2_L, Y2_H
+	vswp		X3_L, Y3_H
+.else
+	vzip.32		Y0, X0
+	vzip.32		Y1, X1
+	vzip.32		Y2, X2
+	vzip.32		Y3, X3
+.endif
+
+	// XOR the encrypted/decrypted blocks with the tweaks we saved earlier
+	mov		r12, sp
+	vld1.8		{TMP0, TMP1}, [r12:128]!
+	vld1.8		{TMP2, TMP3}, [r12:128]!
+	veor		X0, TMP0
+	veor		Y0, TMP1
+	veor		X1, TMP2
+	veor		Y1, TMP3
+	vld1.8		{TMP0, TMP1}, [r12:128]!
+	vld1.8		{TMP2, TMP3}, [r12:128]!
+	veor		X2, TMP0
+	veor		Y2, TMP1
+	veor		X3, TMP2
+	veor		Y3, TMP3
+
+	// Store the ciphertext in the destination buffer
+	vst1.8		{X0, Y0}, [DST]!
+	vst1.8		{X1, Y1}, [DST]!
+	vst1.8		{X2, Y2}, [DST]!
+	vst1.8		{X3, Y3}, [DST]!
+
+	// Continue if there are more 128-byte chunks remaining, else return
+	subs		NBYTES, #128
+	bne		.Lnext_128bytes_\@
+
+	// Store the next tweak
+.if \n == 64
+	vst1.8		{TWEAKV}, [TWEAK]
+.else
+	vst1.8		{TWEAKV_L}, [TWEAK]
+.endif
+
+	mov		sp, r7
+	pop		{r4-r7}
+	bx		lr
+.endm
+
+ENTRY(speck128_xts_encrypt_neon)
+	_speck_xts_crypt	n=64, decrypting=0
+ENDPROC(speck128_xts_encrypt_neon)
+
+ENTRY(speck128_xts_decrypt_neon)
+	_speck_xts_crypt	n=64, decrypting=1
+ENDPROC(speck128_xts_decrypt_neon)
+
+ENTRY(speck64_xts_encrypt_neon)
+	_speck_xts_crypt	n=32, decrypting=0
+ENDPROC(speck64_xts_encrypt_neon)
+
+ENTRY(speck64_xts_decrypt_neon)
+	_speck_xts_crypt	n=32, decrypting=1
+ENDPROC(speck64_xts_decrypt_neon)

+ 288 - 0
arch/arm/crypto/speck-neon-glue.c

@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
+ *
+ * Copyright (c) 2018 Google, Inc
+ *
+ * Note: the NIST recommendation for XTS only specifies a 128-bit block size,
+ * but a 64-bit version (needed for Speck64) is fairly straightforward; the math
+ * is just done in GF(2^64) instead of GF(2^128), with the reducing polynomial
+ * x^64 + x^4 + x^3 + x + 1 from the original XEX paper (Rogaway, 2004:
+ * "Efficient Instantiations of Tweakable Blockciphers and Refinements to Modes
+ * OCB and PMAC"), represented as 0x1B.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/algapi.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/speck.h>
+#include <crypto/xts.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+/* The assembly functions only handle multiples of 128 bytes */
+#define SPECK_NEON_CHUNK_SIZE	128
+
+/* Speck128 */
+
+struct speck128_xts_tfm_ctx {
+	struct speck128_tfm_ctx main_key;
+	struct speck128_tfm_ctx tweak_key;
+};
+
+asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
+					  void *dst, const void *src,
+					  unsigned int nbytes, void *tweak);
+
+asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
+					  void *dst, const void *src,
+					  unsigned int nbytes, void *tweak);
+
+typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
+				     u8 *, const u8 *);
+typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
+					  const void *, unsigned int, void *);
+
+static __always_inline int
+__speck128_xts_crypt(struct skcipher_request *req,
+		     speck128_crypt_one_t crypt_one,
+		     speck128_xts_crypt_many_t crypt_many)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	le128 tweak;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+		u8 *dst = walk.dst.virt.addr;
+		const u8 *src = walk.src.virt.addr;
+
+		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
+			unsigned int count;
+
+			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
+			kernel_neon_begin();
+			(*crypt_many)(ctx->main_key.round_keys,
+				      ctx->main_key.nrounds,
+				      dst, src, count, &tweak);
+			kernel_neon_end();
+			dst += count;
+			src += count;
+			nbytes -= count;
+		}
+
+		/* Handle any remainder with generic code */
+		while (nbytes >= sizeof(tweak)) {
+			le128_xor((le128 *)dst, (const le128 *)src, &tweak);
+			(*crypt_one)(&ctx->main_key, dst, dst);
+			le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
+			gf128mul_x_ble(&tweak, &tweak);
+
+			dst += sizeof(tweak);
+			src += sizeof(tweak);
+			nbytes -= sizeof(tweak);
+		}
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+
+	return err;
+}
+
+static int speck128_xts_encrypt(struct skcipher_request *req)
+{
+	return __speck128_xts_crypt(req, crypto_speck128_encrypt,
+				    speck128_xts_encrypt_neon);
+}
+
+static int speck128_xts_decrypt(struct skcipher_request *req)
+{
+	return __speck128_xts_crypt(req, crypto_speck128_decrypt,
+				    speck128_xts_decrypt_neon);
+}
+
+static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int keylen)
+{
+	struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	keylen /= 2;
+
+	err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
+	if (err)
+		return err;
+
+	return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
+}
+
+/* Speck64 */
+
+struct speck64_xts_tfm_ctx {
+	struct speck64_tfm_ctx main_key;
+	struct speck64_tfm_ctx tweak_key;
+};
+
+asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
+					 void *dst, const void *src,
+					 unsigned int nbytes, void *tweak);
+
+asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
+					 void *dst, const void *src,
+					 unsigned int nbytes, void *tweak);
+
+typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
+				    u8 *, const u8 *);
+typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
+					 const void *, unsigned int, void *);
+
+static __always_inline int
+__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
+		    speck64_xts_crypt_many_t crypt_many)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	__le64 tweak;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+		u8 *dst = walk.dst.virt.addr;
+		const u8 *src = walk.src.virt.addr;
+
+		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
+			unsigned int count;
+
+			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
+			kernel_neon_begin();
+			(*crypt_many)(ctx->main_key.round_keys,
+				      ctx->main_key.nrounds,
+				      dst, src, count, &tweak);
+			kernel_neon_end();
+			dst += count;
+			src += count;
+			nbytes -= count;
+		}
+
+		/* Handle any remainder with generic code */
+		while (nbytes >= sizeof(tweak)) {
+			*(__le64 *)dst = *(__le64 *)src ^ tweak;
+			(*crypt_one)(&ctx->main_key, dst, dst);
+			*(__le64 *)dst ^= tweak;
+			tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
+					    ((tweak & cpu_to_le64(1ULL << 63)) ?
+					     0x1B : 0));
+			dst += sizeof(tweak);
+			src += sizeof(tweak);
+			nbytes -= sizeof(tweak);
+		}
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+
+	return err;
+}
+
+static int speck64_xts_encrypt(struct skcipher_request *req)
+{
+	return __speck64_xts_crypt(req, crypto_speck64_encrypt,
+				   speck64_xts_encrypt_neon);
+}
+
+static int speck64_xts_decrypt(struct skcipher_request *req)
+{
+	return __speck64_xts_crypt(req, crypto_speck64_decrypt,
+				   speck64_xts_decrypt_neon);
+}
+
+static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	keylen /= 2;
+
+	err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
+	if (err)
+		return err;
+
+	return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
+}
+
+static struct skcipher_alg speck_algs[] = {
+	{
+		.base.cra_name		= "xts(speck128)",
+		.base.cra_driver_name	= "xts-speck128-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= SPECK128_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct speck128_xts_tfm_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SPECK128_128_KEY_SIZE,
+		.max_keysize		= 2 * SPECK128_256_KEY_SIZE,
+		.ivsize			= SPECK128_BLOCK_SIZE,
+		.walksize		= SPECK_NEON_CHUNK_SIZE,
+		.setkey			= speck128_xts_setkey,
+		.encrypt		= speck128_xts_encrypt,
+		.decrypt		= speck128_xts_decrypt,
+	}, {
+		.base.cra_name		= "xts(speck64)",
+		.base.cra_driver_name	= "xts-speck64-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= SPECK64_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct speck64_xts_tfm_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SPECK64_96_KEY_SIZE,
+		.max_keysize		= 2 * SPECK64_128_KEY_SIZE,
+		.ivsize			= SPECK64_BLOCK_SIZE,
+		.walksize		= SPECK_NEON_CHUNK_SIZE,
+		.setkey			= speck64_xts_setkey,
+		.encrypt		= speck64_xts_encrypt,
+		.decrypt		= speck64_xts_decrypt,
+	}
+};
+
+static int __init speck_neon_module_init(void)
+{
+	if (!(elf_hwcap & HWCAP_NEON))
+		return -ENODEV;
+	return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+static void __exit speck_neon_module_exit(void)
+{
+	crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+module_init(speck_neon_module_init);
+module_exit(speck_neon_module_exit);
+
+MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("xts(speck128)");
+MODULE_ALIAS_CRYPTO("xts-speck128-neon");
+MODULE_ALIAS_CRYPTO("xts(speck64)");
+MODULE_ALIAS_CRYPTO("xts-speck64-neon");

+ 6 - 0
arch/arm64/crypto/Kconfig

@@ -113,4 +113,10 @@ config CRYPTO_AES_ARM64_BS
 	select CRYPTO_AES_ARM64
 	select CRYPTO_AES_ARM64
 	select CRYPTO_SIMD
 	select CRYPTO_SIMD
 
 
+config CRYPTO_SPECK_NEON
+	tristate "NEON accelerated Speck cipher algorithms"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_SPECK
+
 endif
 endif

+ 5 - 3
arch/arm64/crypto/Makefile

@@ -53,20 +53,21 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
 chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
 chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
 
 
+obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
+speck-neon-y := speck-neon-core.o speck-neon-glue.o
+
 obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
 obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
 aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
 aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
 
 
 obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o
 obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o
 aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
 aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
 
 
-AFLAGS_aes-ce.o		:= -DINTERLEAVE=4
-AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
-
 CFLAGS_aes-glue-ce.o	:= -DUSE_V8_CRYPTO_EXTENSIONS
 CFLAGS_aes-glue-ce.o	:= -DUSE_V8_CRYPTO_EXTENSIONS
 
 
 $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
 $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
 	$(call if_changed_rule,cc_o_c)
 	$(call if_changed_rule,cc_o_c)
 
 
+ifdef REGENERATE_ARM64_CRYPTO
 quiet_cmd_perlasm = PERLASM $@
 quiet_cmd_perlasm = PERLASM $@
       cmd_perlasm = $(PERL) $(<) void $(@)
       cmd_perlasm = $(PERL) $(<) void $(@)
 
 
@@ -75,5 +76,6 @@ $(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl
 
 
 $(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
 $(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
 	$(call cmd,perlasm)
 	$(call cmd,perlasm)
+endif
 
 
 .PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S
 .PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S

+ 23 - 24
arch/arm64/crypto/aes-ce-ccm-glue.c

@@ -107,11 +107,13 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
 }
 }
 
 
 static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
 static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
-			   u32 abytes, u32 *macp, bool use_neon)
+			   u32 abytes, u32 *macp)
 {
 {
-	if (likely(use_neon)) {
+	if (may_use_simd()) {
+		kernel_neon_begin();
 		ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
 		ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
 				     num_rounds(key));
 				     num_rounds(key));
+		kernel_neon_end();
 	} else {
 	} else {
 		if (*macp > 0 && *macp < AES_BLOCK_SIZE) {
 		if (*macp > 0 && *macp < AES_BLOCK_SIZE) {
 			int added = min(abytes, AES_BLOCK_SIZE - *macp);
 			int added = min(abytes, AES_BLOCK_SIZE - *macp);
@@ -143,8 +145,7 @@ static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
 	}
 	}
 }
 }
 
 
-static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
-				   bool use_neon)
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 {
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
@@ -163,7 +164,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
 		ltag.len = 6;
 		ltag.len = 6;
 	}
 	}
 
 
-	ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp, use_neon);
+	ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp);
 	scatterwalk_start(&walk, req->src);
 	scatterwalk_start(&walk, req->src);
 
 
 	do {
 	do {
@@ -175,7 +176,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
 			n = scatterwalk_clamp(&walk, len);
 			n = scatterwalk_clamp(&walk, len);
 		}
 		}
 		p = scatterwalk_map(&walk);
 		p = scatterwalk_map(&walk);
-		ccm_update_mac(ctx, mac, p, n, &macp, use_neon);
+		ccm_update_mac(ctx, mac, p, n, &macp);
 		len -= n;
 		len -= n;
 
 
 		scatterwalk_unmap(p);
 		scatterwalk_unmap(p);
@@ -242,43 +243,42 @@ static int ccm_encrypt(struct aead_request *req)
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen;
 	u32 len = req->cryptlen;
-	bool use_neon = may_use_simd();
 	int err;
 	int err;
 
 
 	err = ccm_init_mac(req, mac, len);
 	err = ccm_init_mac(req, mac, len);
 	if (err)
 	if (err)
 		return err;
 		return err;
 
 
-	if (likely(use_neon))
-		kernel_neon_begin();
-
 	if (req->assoclen)
 	if (req->assoclen)
-		ccm_calculate_auth_mac(req, mac, use_neon);
+		ccm_calculate_auth_mac(req, mac);
 
 
 	/* preserve the original iv for the final round */
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
 
 	err = skcipher_walk_aead_encrypt(&walk, req, true);
 	err = skcipher_walk_aead_encrypt(&walk, req, true);
 
 
-	if (likely(use_neon)) {
+	if (may_use_simd()) {
 		while (walk.nbytes) {
 		while (walk.nbytes) {
 			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
 
 			if (walk.nbytes == walk.total)
 			if (walk.nbytes == walk.total)
 				tail = 0;
 				tail = 0;
 
 
+			kernel_neon_begin();
 			ce_aes_ccm_encrypt(walk.dst.virt.addr,
 			ce_aes_ccm_encrypt(walk.dst.virt.addr,
 					   walk.src.virt.addr,
 					   walk.src.virt.addr,
 					   walk.nbytes - tail, ctx->key_enc,
 					   walk.nbytes - tail, ctx->key_enc,
 					   num_rounds(ctx), mac, walk.iv);
 					   num_rounds(ctx), mac, walk.iv);
+			kernel_neon_end();
 
 
 			err = skcipher_walk_done(&walk, tail);
 			err = skcipher_walk_done(&walk, tail);
 		}
 		}
-		if (!err)
+		if (!err) {
+			kernel_neon_begin();
 			ce_aes_ccm_final(mac, buf, ctx->key_enc,
 			ce_aes_ccm_final(mac, buf, ctx->key_enc,
 					 num_rounds(ctx));
 					 num_rounds(ctx));
-
-		kernel_neon_end();
+			kernel_neon_end();
+		}
 	} else {
 	} else {
 		err = ccm_crypt_fallback(&walk, mac, buf, ctx, true);
 		err = ccm_crypt_fallback(&walk, mac, buf, ctx, true);
 	}
 	}
@@ -301,43 +301,42 @@ static int ccm_decrypt(struct aead_request *req)
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen - authsize;
 	u32 len = req->cryptlen - authsize;
-	bool use_neon = may_use_simd();
 	int err;
 	int err;
 
 
 	err = ccm_init_mac(req, mac, len);
 	err = ccm_init_mac(req, mac, len);
 	if (err)
 	if (err)
 		return err;
 		return err;
 
 
-	if (likely(use_neon))
-		kernel_neon_begin();
-
 	if (req->assoclen)
 	if (req->assoclen)
-		ccm_calculate_auth_mac(req, mac, use_neon);
+		ccm_calculate_auth_mac(req, mac);
 
 
 	/* preserve the original iv for the final round */
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
 
 	err = skcipher_walk_aead_decrypt(&walk, req, true);
 	err = skcipher_walk_aead_decrypt(&walk, req, true);
 
 
-	if (likely(use_neon)) {
+	if (may_use_simd()) {
 		while (walk.nbytes) {
 		while (walk.nbytes) {
 			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
 
 			if (walk.nbytes == walk.total)
 			if (walk.nbytes == walk.total)
 				tail = 0;
 				tail = 0;
 
 
+			kernel_neon_begin();
 			ce_aes_ccm_decrypt(walk.dst.virt.addr,
 			ce_aes_ccm_decrypt(walk.dst.virt.addr,
 					   walk.src.virt.addr,
 					   walk.src.virt.addr,
 					   walk.nbytes - tail, ctx->key_enc,
 					   walk.nbytes - tail, ctx->key_enc,
 					   num_rounds(ctx), mac, walk.iv);
 					   num_rounds(ctx), mac, walk.iv);
+			kernel_neon_end();
 
 
 			err = skcipher_walk_done(&walk, tail);
 			err = skcipher_walk_done(&walk, tail);
 		}
 		}
-		if (!err)
+		if (!err) {
+			kernel_neon_begin();
 			ce_aes_ccm_final(mac, buf, ctx->key_enc,
 			ce_aes_ccm_final(mac, buf, ctx->key_enc,
 					 num_rounds(ctx));
 					 num_rounds(ctx));
-
-		kernel_neon_end();
+			kernel_neon_end();
+		}
 	} else {
 	} else {
 		err = ccm_crypt_fallback(&walk, mac, buf, ctx, false);
 		err = ccm_crypt_fallback(&walk, mac, buf, ctx, false);
 	}
 	}

+ 46 - 49
arch/arm64/crypto/aes-glue.c

@@ -64,17 +64,17 @@ MODULE_LICENSE("GPL v2");
 
 
 /* defined in aes-modes.S */
 /* defined in aes-modes.S */
 asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
 asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, int first);
+				int rounds, int blocks);
 asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
 asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, int first);
+				int rounds, int blocks);
 
 
 asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
 asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, u8 iv[], int first);
+				int rounds, int blocks, u8 iv[]);
 asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
 asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, u8 iv[], int first);
+				int rounds, int blocks, u8 iv[]);
 
 
 asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
 asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				int rounds, int blocks, u8 ctr[], int first);
+				int rounds, int blocks, u8 ctr[]);
 
 
 asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
 asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
 				int rounds, int blocks, u8 const rk2[], u8 iv[],
 				int rounds, int blocks, u8 const rk2[], u8 iv[],
@@ -133,19 +133,19 @@ static int ecb_encrypt(struct skcipher_request *req)
 {
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	struct skcipher_walk walk;
 	unsigned int blocks;
 	unsigned int blocks;
 
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	kernel_neon_begin();
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 		aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_enc, rounds, blocks, first);
+				(u8 *)ctx->key_enc, rounds, blocks);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	}
-	kernel_neon_end();
 	return err;
 	return err;
 }
 }
 
 
@@ -153,19 +153,19 @@ static int ecb_decrypt(struct skcipher_request *req)
 {
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	struct skcipher_walk walk;
 	unsigned int blocks;
 	unsigned int blocks;
 
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	kernel_neon_begin();
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 		aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_dec, rounds, blocks, first);
+				(u8 *)ctx->key_dec, rounds, blocks);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	}
-	kernel_neon_end();
 	return err;
 	return err;
 }
 }
 
 
@@ -173,20 +173,19 @@ static int cbc_encrypt(struct skcipher_request *req)
 {
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	struct skcipher_walk walk;
 	unsigned int blocks;
 	unsigned int blocks;
 
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	kernel_neon_begin();
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
-				first);
+				(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	}
-	kernel_neon_end();
 	return err;
 	return err;
 }
 }
 
 
@@ -194,20 +193,19 @@ static int cbc_decrypt(struct skcipher_request *req)
 {
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	struct skcipher_walk walk;
 	unsigned int blocks;
 	unsigned int blocks;
 
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	kernel_neon_begin();
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_dec, rounds, blocks, walk.iv,
-				first);
+				(u8 *)ctx->key_dec, rounds, blocks, walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	}
-	kernel_neon_end();
 	return err;
 	return err;
 }
 }
 
 
@@ -215,20 +213,18 @@ static int ctr_encrypt(struct skcipher_request *req)
 {
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, first, rounds = 6 + ctx->key_length / 4;
+	int err, rounds = 6 + ctx->key_length / 4;
 	struct skcipher_walk walk;
 	struct skcipher_walk walk;
 	int blocks;
 	int blocks;
 
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	first = 1;
-	kernel_neon_begin();
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 		aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
-				first);
+				(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
-		first = 0;
+		kernel_neon_end();
 	}
 	}
 	if (walk.nbytes) {
 	if (walk.nbytes) {
 		u8 __aligned(8) tail[AES_BLOCK_SIZE];
 		u8 __aligned(8) tail[AES_BLOCK_SIZE];
@@ -241,12 +237,13 @@ static int ctr_encrypt(struct skcipher_request *req)
 		 */
 		 */
 		blocks = -1;
 		blocks = -1;
 
 
+		kernel_neon_begin();
 		aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
 		aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
-				blocks, walk.iv, first);
+				blocks, walk.iv);
+		kernel_neon_end();
 		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
 		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
 		err = skcipher_walk_done(&walk, 0);
 		err = skcipher_walk_done(&walk, 0);
 	}
 	}
-	kernel_neon_end();
 
 
 	return err;
 	return err;
 }
 }
@@ -270,16 +267,16 @@ static int xts_encrypt(struct skcipher_request *req)
 	struct skcipher_walk walk;
 	struct skcipher_walk walk;
 	unsigned int blocks;
 	unsigned int blocks;
 
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	kernel_neon_begin();
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		kernel_neon_begin();
 		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key1.key_enc, rounds, blocks,
 				(u8 *)ctx->key1.key_enc, rounds, blocks,
 				(u8 *)ctx->key2.key_enc, walk.iv, first);
 				(u8 *)ctx->key2.key_enc, walk.iv, first);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	}
-	kernel_neon_end();
 
 
 	return err;
 	return err;
 }
 }
@@ -292,16 +289,16 @@ static int xts_decrypt(struct skcipher_request *req)
 	struct skcipher_walk walk;
 	struct skcipher_walk walk;
 	unsigned int blocks;
 	unsigned int blocks;
 
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	kernel_neon_begin();
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+		kernel_neon_begin();
 		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key1.key_dec, rounds, blocks,
 				(u8 *)ctx->key1.key_dec, rounds, blocks,
 				(u8 *)ctx->key2.key_enc, walk.iv, first);
 				(u8 *)ctx->key2.key_enc, walk.iv, first);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	}
-	kernel_neon_end();
 
 
 	return err;
 	return err;
 }
 }
@@ -425,7 +422,7 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
 
 
 	/* encrypt the zero vector */
 	/* encrypt the zero vector */
 	kernel_neon_begin();
 	kernel_neon_begin();
-	aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1, 1);
+	aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1);
 	kernel_neon_end();
 	kernel_neon_end();
 
 
 	cmac_gf128_mul_by_x(consts, consts);
 	cmac_gf128_mul_by_x(consts, consts);
@@ -454,8 +451,8 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
 		return err;
 		return err;
 
 
 	kernel_neon_begin();
 	kernel_neon_begin();
-	aes_ecb_encrypt(key, ks[0], rk, rounds, 1, 1);
-	aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2, 0);
+	aes_ecb_encrypt(key, ks[0], rk, rounds, 1);
+	aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2);
 	kernel_neon_end();
 	kernel_neon_end();
 
 
 	return cbcmac_setkey(tfm, key, sizeof(key));
 	return cbcmac_setkey(tfm, key, sizeof(key));

+ 118 - 237
arch/arm64/crypto/aes-modes.S

@@ -13,127 +13,39 @@
 	.text
 	.text
 	.align		4
 	.align		4
 
 
-/*
- * There are several ways to instantiate this code:
- * - no interleave, all inline
- * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
- * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
- * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
- * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
- *
- * Macros imported by this code:
- * - enc_prepare	- setup NEON registers for encryption
- * - dec_prepare	- setup NEON registers for decryption
- * - enc_switch_key	- change to new key after having prepared for encryption
- * - encrypt_block	- encrypt a single block
- * - decrypt block	- decrypt a single block
- * - encrypt_block2x	- encrypt 2 blocks in parallel (if INTERLEAVE == 2)
- * - decrypt_block2x	- decrypt 2 blocks in parallel (if INTERLEAVE == 2)
- * - encrypt_block4x	- encrypt 4 blocks in parallel (if INTERLEAVE == 4)
- * - decrypt_block4x	- decrypt 4 blocks in parallel (if INTERLEAVE == 4)
- */
-
-#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
-#define FRAME_PUSH	stp x29, x30, [sp,#-16]! ; mov x29, sp
-#define FRAME_POP	ldp x29, x30, [sp],#16
-
-#if INTERLEAVE == 2
-
-aes_encrypt_block2x:
-	encrypt_block2x	v0, v1, w3, x2, x6, w7
-	ret
-ENDPROC(aes_encrypt_block2x)
-
-aes_decrypt_block2x:
-	decrypt_block2x	v0, v1, w3, x2, x6, w7
-	ret
-ENDPROC(aes_decrypt_block2x)
-
-#elif INTERLEAVE == 4
-
 aes_encrypt_block4x:
 aes_encrypt_block4x:
-	encrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	encrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
 	ret
 	ret
 ENDPROC(aes_encrypt_block4x)
 ENDPROC(aes_encrypt_block4x)
 
 
 aes_decrypt_block4x:
 aes_decrypt_block4x:
-	decrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
+	decrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
 	ret
 	ret
 ENDPROC(aes_decrypt_block4x)
 ENDPROC(aes_decrypt_block4x)
 
 
-#else
-#error INTERLEAVE should equal 2 or 4
-#endif
-
-	.macro		do_encrypt_block2x
-	bl		aes_encrypt_block2x
-	.endm
-
-	.macro		do_decrypt_block2x
-	bl		aes_decrypt_block2x
-	.endm
-
-	.macro		do_encrypt_block4x
-	bl		aes_encrypt_block4x
-	.endm
-
-	.macro		do_decrypt_block4x
-	bl		aes_decrypt_block4x
-	.endm
-
-#else
-#define FRAME_PUSH
-#define FRAME_POP
-
-	.macro		do_encrypt_block2x
-	encrypt_block2x	v0, v1, w3, x2, x6, w7
-	.endm
-
-	.macro		do_decrypt_block2x
-	decrypt_block2x	v0, v1, w3, x2, x6, w7
-	.endm
-
-	.macro		do_encrypt_block4x
-	encrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
-	.endm
-
-	.macro		do_decrypt_block4x
-	decrypt_block4x	v0, v1, v2, v3, w3, x2, x6, w7
-	.endm
-
-#endif
-
 	/*
 	/*
 	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, int first)
+	 *		   int blocks)
 	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, int first)
+	 *		   int blocks)
 	 */
 	 */
 
 
 AES_ENTRY(aes_ecb_encrypt)
 AES_ENTRY(aes_ecb_encrypt)
-	FRAME_PUSH
-	cbz		w5, .LecbencloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 
 	enc_prepare	w3, x2, x5
 	enc_prepare	w3, x2, x5
 
 
 .LecbencloopNx:
 .LecbencloopNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lecbenc1x
 	bmi		.Lecbenc1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 pt blocks */
-	do_encrypt_block2x
-	st1		{v0.16b-v1.16b}, [x0], #32
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
-	do_encrypt_block4x
+	bl		aes_encrypt_block4x
 	st1		{v0.16b-v3.16b}, [x0], #64
 	st1		{v0.16b-v3.16b}, [x0], #64
-#endif
 	b		.LecbencloopNx
 	b		.LecbencloopNx
 .Lecbenc1x:
 .Lecbenc1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lecbencout
 	beq		.Lecbencout
-#endif
 .Lecbencloop:
 .Lecbencloop:
 	ld1		{v0.16b}, [x1], #16		/* get next pt block */
 	ld1		{v0.16b}, [x1], #16		/* get next pt block */
 	encrypt_block	v0, w3, x2, x5, w6
 	encrypt_block	v0, w3, x2, x5, w6
@@ -141,35 +53,27 @@ AES_ENTRY(aes_ecb_encrypt)
 	subs		w4, w4, #1
 	subs		w4, w4, #1
 	bne		.Lecbencloop
 	bne		.Lecbencloop
 .Lecbencout:
 .Lecbencout:
-	FRAME_POP
+	ldp		x29, x30, [sp], #16
 	ret
 	ret
 AES_ENDPROC(aes_ecb_encrypt)
 AES_ENDPROC(aes_ecb_encrypt)
 
 
 
 
 AES_ENTRY(aes_ecb_decrypt)
 AES_ENTRY(aes_ecb_decrypt)
-	FRAME_PUSH
-	cbz		w5, .LecbdecloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 
 	dec_prepare	w3, x2, x5
 	dec_prepare	w3, x2, x5
 
 
 .LecbdecloopNx:
 .LecbdecloopNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lecbdec1x
 	bmi		.Lecbdec1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
-	do_decrypt_block2x
-	st1		{v0.16b-v1.16b}, [x0], #32
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
-	do_decrypt_block4x
+	bl		aes_decrypt_block4x
 	st1		{v0.16b-v3.16b}, [x0], #64
 	st1		{v0.16b-v3.16b}, [x0], #64
-#endif
 	b		.LecbdecloopNx
 	b		.LecbdecloopNx
 .Lecbdec1x:
 .Lecbdec1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lecbdecout
 	beq		.Lecbdecout
-#endif
 .Lecbdecloop:
 .Lecbdecloop:
 	ld1		{v0.16b}, [x1], #16		/* get next ct block */
 	ld1		{v0.16b}, [x1], #16		/* get next ct block */
 	decrypt_block	v0, w3, x2, x5, w6
 	decrypt_block	v0, w3, x2, x5, w6
@@ -177,62 +81,68 @@ AES_ENTRY(aes_ecb_decrypt)
 	subs		w4, w4, #1
 	subs		w4, w4, #1
 	bne		.Lecbdecloop
 	bne		.Lecbdecloop
 .Lecbdecout:
 .Lecbdecout:
-	FRAME_POP
+	ldp		x29, x30, [sp], #16
 	ret
 	ret
 AES_ENDPROC(aes_ecb_decrypt)
 AES_ENDPROC(aes_ecb_decrypt)
 
 
 
 
 	/*
 	/*
 	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, u8 iv[], int first)
+	 *		   int blocks, u8 iv[])
 	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, u8 iv[], int first)
+	 *		   int blocks, u8 iv[])
 	 */
 	 */
 
 
 AES_ENTRY(aes_cbc_encrypt)
 AES_ENTRY(aes_cbc_encrypt)
-	cbz		w6, .Lcbcencloop
-
-	ld1		{v0.16b}, [x5]			/* get iv */
+	ld1		{v4.16b}, [x5]			/* get iv */
 	enc_prepare	w3, x2, x6
 	enc_prepare	w3, x2, x6
 
 
-.Lcbcencloop:
-	ld1		{v1.16b}, [x1], #16		/* get next pt block */
-	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with iv */
+.Lcbcencloop4x:
+	subs		w4, w4, #4
+	bmi		.Lcbcenc1x
+	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
+	eor		v0.16b, v0.16b, v4.16b		/* ..and xor with iv */
 	encrypt_block	v0, w3, x2, x6, w7
 	encrypt_block	v0, w3, x2, x6, w7
-	st1		{v0.16b}, [x0], #16
+	eor		v1.16b, v1.16b, v0.16b
+	encrypt_block	v1, w3, x2, x6, w7
+	eor		v2.16b, v2.16b, v1.16b
+	encrypt_block	v2, w3, x2, x6, w7
+	eor		v3.16b, v3.16b, v2.16b
+	encrypt_block	v3, w3, x2, x6, w7
+	st1		{v0.16b-v3.16b}, [x0], #64
+	mov		v4.16b, v3.16b
+	b		.Lcbcencloop4x
+.Lcbcenc1x:
+	adds		w4, w4, #4
+	beq		.Lcbcencout
+.Lcbcencloop:
+	ld1		{v0.16b}, [x1], #16		/* get next pt block */
+	eor		v4.16b, v4.16b, v0.16b		/* ..and xor with iv */
+	encrypt_block	v4, w3, x2, x6, w7
+	st1		{v4.16b}, [x0], #16
 	subs		w4, w4, #1
 	subs		w4, w4, #1
 	bne		.Lcbcencloop
 	bne		.Lcbcencloop
-	st1		{v0.16b}, [x5]			/* return iv */
+.Lcbcencout:
+	st1		{v4.16b}, [x5]			/* return iv */
 	ret
 	ret
 AES_ENDPROC(aes_cbc_encrypt)
 AES_ENDPROC(aes_cbc_encrypt)
 
 
 
 
 AES_ENTRY(aes_cbc_decrypt)
 AES_ENTRY(aes_cbc_decrypt)
-	FRAME_PUSH
-	cbz		w6, .LcbcdecloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 
 	ld1		{v7.16b}, [x5]			/* get iv */
 	ld1		{v7.16b}, [x5]			/* get iv */
 	dec_prepare	w3, x2, x6
 	dec_prepare	w3, x2, x6
 
 
 .LcbcdecloopNx:
 .LcbcdecloopNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lcbcdec1x
 	bmi		.Lcbcdec1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
-	mov		v2.16b, v0.16b
-	mov		v3.16b, v1.16b
-	do_decrypt_block2x
-	eor		v0.16b, v0.16b, v7.16b
-	eor		v1.16b, v1.16b, v2.16b
-	mov		v7.16b, v3.16b
-	st1		{v0.16b-v1.16b}, [x0], #32
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
 	mov		v4.16b, v0.16b
 	mov		v4.16b, v0.16b
 	mov		v5.16b, v1.16b
 	mov		v5.16b, v1.16b
 	mov		v6.16b, v2.16b
 	mov		v6.16b, v2.16b
-	do_decrypt_block4x
+	bl		aes_decrypt_block4x
 	sub		x1, x1, #16
 	sub		x1, x1, #16
 	eor		v0.16b, v0.16b, v7.16b
 	eor		v0.16b, v0.16b, v7.16b
 	eor		v1.16b, v1.16b, v4.16b
 	eor		v1.16b, v1.16b, v4.16b
@@ -240,12 +150,10 @@ AES_ENTRY(aes_cbc_decrypt)
 	eor		v2.16b, v2.16b, v5.16b
 	eor		v2.16b, v2.16b, v5.16b
 	eor		v3.16b, v3.16b, v6.16b
 	eor		v3.16b, v3.16b, v6.16b
 	st1		{v0.16b-v3.16b}, [x0], #64
 	st1		{v0.16b-v3.16b}, [x0], #64
-#endif
 	b		.LcbcdecloopNx
 	b		.LcbcdecloopNx
 .Lcbcdec1x:
 .Lcbcdec1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lcbcdecout
 	beq		.Lcbcdecout
-#endif
 .Lcbcdecloop:
 .Lcbcdecloop:
 	ld1		{v1.16b}, [x1], #16		/* get next ct block */
 	ld1		{v1.16b}, [x1], #16		/* get next ct block */
 	mov		v0.16b, v1.16b			/* ...and copy to v0 */
 	mov		v0.16b, v1.16b			/* ...and copy to v0 */
@@ -256,49 +164,33 @@ AES_ENTRY(aes_cbc_decrypt)
 	subs		w4, w4, #1
 	subs		w4, w4, #1
 	bne		.Lcbcdecloop
 	bne		.Lcbcdecloop
 .Lcbcdecout:
 .Lcbcdecout:
-	FRAME_POP
 	st1		{v7.16b}, [x5]			/* return iv */
 	st1		{v7.16b}, [x5]			/* return iv */
+	ldp		x29, x30, [sp], #16
 	ret
 	ret
 AES_ENDPROC(aes_cbc_decrypt)
 AES_ENDPROC(aes_cbc_decrypt)
 
 
 
 
 	/*
 	/*
 	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		   int blocks, u8 ctr[], int first)
+	 *		   int blocks, u8 ctr[])
 	 */
 	 */
 
 
 AES_ENTRY(aes_ctr_encrypt)
 AES_ENTRY(aes_ctr_encrypt)
-	FRAME_PUSH
-	cbz		w6, .Lctrnotfirst	/* 1st time around? */
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
+
 	enc_prepare	w3, x2, x6
 	enc_prepare	w3, x2, x6
 	ld1		{v4.16b}, [x5]
 	ld1		{v4.16b}, [x5]
 
 
-.Lctrnotfirst:
-	umov		x8, v4.d[1]		/* keep swabbed ctr in reg */
-	rev		x8, x8
-#if INTERLEAVE >= 2
-	cmn		w8, w4			/* 32 bit overflow? */
+	umov		x6, v4.d[1]		/* keep swabbed ctr in reg */
+	rev		x6, x6
+	cmn		w6, w4			/* 32 bit overflow? */
 	bcs		.Lctrloop
 	bcs		.Lctrloop
 .LctrloopNx:
 .LctrloopNx:
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lctr1x
 	bmi		.Lctr1x
-#if INTERLEAVE == 2
-	mov		v0.8b, v4.8b
-	mov		v1.8b, v4.8b
-	rev		x7, x8
-	add		x8, x8, #1
-	ins		v0.d[1], x7
-	rev		x7, x8
-	add		x8, x8, #1
-	ins		v1.d[1], x7
-	ld1		{v2.16b-v3.16b}, [x1], #32	/* get 2 input blocks */
-	do_encrypt_block2x
-	eor		v0.16b, v0.16b, v2.16b
-	eor		v1.16b, v1.16b, v3.16b
-	st1		{v0.16b-v1.16b}, [x0], #32
-#else
 	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
 	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
-	dup		v7.4s, w8
+	dup		v7.4s, w6
 	mov		v0.16b, v4.16b
 	mov		v0.16b, v4.16b
 	add		v7.4s, v7.4s, v8.4s
 	add		v7.4s, v7.4s, v8.4s
 	mov		v1.16b, v4.16b
 	mov		v1.16b, v4.16b
@@ -309,29 +201,27 @@ AES_ENTRY(aes_ctr_encrypt)
 	mov		v2.s[3], v8.s[1]
 	mov		v2.s[3], v8.s[1]
 	mov		v3.s[3], v8.s[2]
 	mov		v3.s[3], v8.s[2]
 	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
 	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
-	do_encrypt_block4x
+	bl		aes_encrypt_block4x
 	eor		v0.16b, v5.16b, v0.16b
 	eor		v0.16b, v5.16b, v0.16b
 	ld1		{v5.16b}, [x1], #16		/* get 1 input block  */
 	ld1		{v5.16b}, [x1], #16		/* get 1 input block  */
 	eor		v1.16b, v6.16b, v1.16b
 	eor		v1.16b, v6.16b, v1.16b
 	eor		v2.16b, v7.16b, v2.16b
 	eor		v2.16b, v7.16b, v2.16b
 	eor		v3.16b, v5.16b, v3.16b
 	eor		v3.16b, v5.16b, v3.16b
 	st1		{v0.16b-v3.16b}, [x0], #64
 	st1		{v0.16b-v3.16b}, [x0], #64
-	add		x8, x8, #INTERLEAVE
-#endif
-	rev		x7, x8
+	add		x6, x6, #4
+	rev		x7, x6
 	ins		v4.d[1], x7
 	ins		v4.d[1], x7
 	cbz		w4, .Lctrout
 	cbz		w4, .Lctrout
 	b		.LctrloopNx
 	b		.LctrloopNx
 .Lctr1x:
 .Lctr1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lctrout
 	beq		.Lctrout
-#endif
 .Lctrloop:
 .Lctrloop:
 	mov		v0.16b, v4.16b
 	mov		v0.16b, v4.16b
-	encrypt_block	v0, w3, x2, x6, w7
+	encrypt_block	v0, w3, x2, x8, w7
 
 
-	adds		x8, x8, #1		/* increment BE ctr */
-	rev		x7, x8
+	adds		x6, x6, #1		/* increment BE ctr */
+	rev		x7, x6
 	ins		v4.d[1], x7
 	ins		v4.d[1], x7
 	bcs		.Lctrcarry		/* overflow? */
 	bcs		.Lctrcarry		/* overflow? */
 
 
@@ -345,12 +235,12 @@ AES_ENTRY(aes_ctr_encrypt)
 
 
 .Lctrout:
 .Lctrout:
 	st1		{v4.16b}, [x5]		/* return next CTR value */
 	st1		{v4.16b}, [x5]		/* return next CTR value */
-	FRAME_POP
+	ldp		x29, x30, [sp], #16
 	ret
 	ret
 
 
 .Lctrtailblock:
 .Lctrtailblock:
 	st1		{v0.16b}, [x0]
 	st1		{v0.16b}, [x0]
-	FRAME_POP
+	ldp		x29, x30, [sp], #16
 	ret
 	ret
 
 
 .Lctrcarry:
 .Lctrcarry:
@@ -384,39 +274,26 @@ CPU_LE(	.quad		1, 0x87		)
 CPU_BE(	.quad		0x87, 1		)
 CPU_BE(	.quad		0x87, 1		)
 
 
 AES_ENTRY(aes_xts_encrypt)
 AES_ENTRY(aes_xts_encrypt)
-	FRAME_PUSH
-	cbz		w7, .LxtsencloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 
 	ld1		{v4.16b}, [x6]
 	ld1		{v4.16b}, [x6]
-	enc_prepare	w3, x5, x6
-	encrypt_block	v4, w3, x5, x6, w7		/* first tweak */
-	enc_switch_key	w3, x2, x6
+	cbz		w7, .Lxtsencnotfirst
+
+	enc_prepare	w3, x5, x8
+	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
+	enc_switch_key	w3, x2, x8
 	ldr		q7, .Lxts_mul_x
 	ldr		q7, .Lxts_mul_x
 	b		.LxtsencNx
 	b		.LxtsencNx
 
 
+.Lxtsencnotfirst:
+	enc_prepare	w3, x2, x8
 .LxtsencloopNx:
 .LxtsencloopNx:
 	ldr		q7, .Lxts_mul_x
 	ldr		q7, .Lxts_mul_x
 	next_tweak	v4, v4, v7, v8
 	next_tweak	v4, v4, v7, v8
 .LxtsencNx:
 .LxtsencNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lxtsenc1x
 	bmi		.Lxtsenc1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 pt blocks */
-	next_tweak	v5, v4, v7, v8
-	eor		v0.16b, v0.16b, v4.16b
-	eor		v1.16b, v1.16b, v5.16b
-	do_encrypt_block2x
-	eor		v0.16b, v0.16b, v4.16b
-	eor		v1.16b, v1.16b, v5.16b
-	st1		{v0.16b-v1.16b}, [x0], #32
-	cbz		w4, .LxtsencoutNx
-	next_tweak	v4, v5, v7, v8
-	b		.LxtsencNx
-.LxtsencoutNx:
-	mov		v4.16b, v5.16b
-	b		.Lxtsencout
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
 	next_tweak	v5, v4, v7, v8
 	next_tweak	v5, v4, v7, v8
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v0.16b, v0.16b, v4.16b
@@ -425,7 +302,7 @@ AES_ENTRY(aes_xts_encrypt)
 	eor		v2.16b, v2.16b, v6.16b
 	eor		v2.16b, v2.16b, v6.16b
 	next_tweak	v7, v6, v7, v8
 	next_tweak	v7, v6, v7, v8
 	eor		v3.16b, v3.16b, v7.16b
 	eor		v3.16b, v3.16b, v7.16b
-	do_encrypt_block4x
+	bl		aes_encrypt_block4x
 	eor		v3.16b, v3.16b, v7.16b
 	eor		v3.16b, v3.16b, v7.16b
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v1.16b, v1.16b, v5.16b
 	eor		v1.16b, v1.16b, v5.16b
@@ -434,15 +311,13 @@ AES_ENTRY(aes_xts_encrypt)
 	mov		v4.16b, v7.16b
 	mov		v4.16b, v7.16b
 	cbz		w4, .Lxtsencout
 	cbz		w4, .Lxtsencout
 	b		.LxtsencloopNx
 	b		.LxtsencloopNx
-#endif
 .Lxtsenc1x:
 .Lxtsenc1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lxtsencout
 	beq		.Lxtsencout
-#endif
 .Lxtsencloop:
 .Lxtsencloop:
 	ld1		{v1.16b}, [x1], #16
 	ld1		{v1.16b}, [x1], #16
 	eor		v0.16b, v1.16b, v4.16b
 	eor		v0.16b, v1.16b, v4.16b
-	encrypt_block	v0, w3, x2, x6, w7
+	encrypt_block	v0, w3, x2, x8, w7
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v0.16b, v0.16b, v4.16b
 	st1		{v0.16b}, [x0], #16
 	st1		{v0.16b}, [x0], #16
 	subs		w4, w4, #1
 	subs		w4, w4, #1
@@ -450,45 +325,33 @@ AES_ENTRY(aes_xts_encrypt)
 	next_tweak	v4, v4, v7, v8
 	next_tweak	v4, v4, v7, v8
 	b		.Lxtsencloop
 	b		.Lxtsencloop
 .Lxtsencout:
 .Lxtsencout:
-	FRAME_POP
+	st1		{v4.16b}, [x6]
+	ldp		x29, x30, [sp], #16
 	ret
 	ret
 AES_ENDPROC(aes_xts_encrypt)
 AES_ENDPROC(aes_xts_encrypt)
 
 
 
 
 AES_ENTRY(aes_xts_decrypt)
 AES_ENTRY(aes_xts_decrypt)
-	FRAME_PUSH
-	cbz		w7, .LxtsdecloopNx
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
 
 
 	ld1		{v4.16b}, [x6]
 	ld1		{v4.16b}, [x6]
-	enc_prepare	w3, x5, x6
-	encrypt_block	v4, w3, x5, x6, w7		/* first tweak */
-	dec_prepare	w3, x2, x6
+	cbz		w7, .Lxtsdecnotfirst
+
+	enc_prepare	w3, x5, x8
+	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
+	dec_prepare	w3, x2, x8
 	ldr		q7, .Lxts_mul_x
 	ldr		q7, .Lxts_mul_x
 	b		.LxtsdecNx
 	b		.LxtsdecNx
 
 
+.Lxtsdecnotfirst:
+	dec_prepare	w3, x2, x8
 .LxtsdecloopNx:
 .LxtsdecloopNx:
 	ldr		q7, .Lxts_mul_x
 	ldr		q7, .Lxts_mul_x
 	next_tweak	v4, v4, v7, v8
 	next_tweak	v4, v4, v7, v8
 .LxtsdecNx:
 .LxtsdecNx:
-#if INTERLEAVE >= 2
-	subs		w4, w4, #INTERLEAVE
+	subs		w4, w4, #4
 	bmi		.Lxtsdec1x
 	bmi		.Lxtsdec1x
-#if INTERLEAVE == 2
-	ld1		{v0.16b-v1.16b}, [x1], #32	/* get 2 ct blocks */
-	next_tweak	v5, v4, v7, v8
-	eor		v0.16b, v0.16b, v4.16b
-	eor		v1.16b, v1.16b, v5.16b
-	do_decrypt_block2x
-	eor		v0.16b, v0.16b, v4.16b
-	eor		v1.16b, v1.16b, v5.16b
-	st1		{v0.16b-v1.16b}, [x0], #32
-	cbz		w4, .LxtsdecoutNx
-	next_tweak	v4, v5, v7, v8
-	b		.LxtsdecNx
-.LxtsdecoutNx:
-	mov		v4.16b, v5.16b
-	b		.Lxtsdecout
-#else
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
 	next_tweak	v5, v4, v7, v8
 	next_tweak	v5, v4, v7, v8
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v0.16b, v0.16b, v4.16b
@@ -497,7 +360,7 @@ AES_ENTRY(aes_xts_decrypt)
 	eor		v2.16b, v2.16b, v6.16b
 	eor		v2.16b, v2.16b, v6.16b
 	next_tweak	v7, v6, v7, v8
 	next_tweak	v7, v6, v7, v8
 	eor		v3.16b, v3.16b, v7.16b
 	eor		v3.16b, v3.16b, v7.16b
-	do_decrypt_block4x
+	bl		aes_decrypt_block4x
 	eor		v3.16b, v3.16b, v7.16b
 	eor		v3.16b, v3.16b, v7.16b
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v1.16b, v1.16b, v5.16b
 	eor		v1.16b, v1.16b, v5.16b
@@ -506,15 +369,13 @@ AES_ENTRY(aes_xts_decrypt)
 	mov		v4.16b, v7.16b
 	mov		v4.16b, v7.16b
 	cbz		w4, .Lxtsdecout
 	cbz		w4, .Lxtsdecout
 	b		.LxtsdecloopNx
 	b		.LxtsdecloopNx
-#endif
 .Lxtsdec1x:
 .Lxtsdec1x:
-	adds		w4, w4, #INTERLEAVE
+	adds		w4, w4, #4
 	beq		.Lxtsdecout
 	beq		.Lxtsdecout
-#endif
 .Lxtsdecloop:
 .Lxtsdecloop:
 	ld1		{v1.16b}, [x1], #16
 	ld1		{v1.16b}, [x1], #16
 	eor		v0.16b, v1.16b, v4.16b
 	eor		v0.16b, v1.16b, v4.16b
-	decrypt_block	v0, w3, x2, x6, w7
+	decrypt_block	v0, w3, x2, x8, w7
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v0.16b, v0.16b, v4.16b
 	st1		{v0.16b}, [x0], #16
 	st1		{v0.16b}, [x0], #16
 	subs		w4, w4, #1
 	subs		w4, w4, #1
@@ -522,7 +383,8 @@ AES_ENTRY(aes_xts_decrypt)
 	next_tweak	v4, v4, v7, v8
 	next_tweak	v4, v4, v7, v8
 	b		.Lxtsdecloop
 	b		.Lxtsdecloop
 .Lxtsdecout:
 .Lxtsdecout:
-	FRAME_POP
+	st1		{v4.16b}, [x6]
+	ldp		x29, x30, [sp], #16
 	ret
 	ret
 AES_ENDPROC(aes_xts_decrypt)
 AES_ENDPROC(aes_xts_decrypt)
 
 
@@ -533,8 +395,28 @@ AES_ENDPROC(aes_xts_decrypt)
 AES_ENTRY(aes_mac_update)
 AES_ENTRY(aes_mac_update)
 	ld1		{v0.16b}, [x4]			/* get dg */
 	ld1		{v0.16b}, [x4]			/* get dg */
 	enc_prepare	w2, x1, x7
 	enc_prepare	w2, x1, x7
-	cbnz		w5, .Lmacenc
+	cbz		w5, .Lmacloop4x
+
+	encrypt_block	v0, w2, x1, x7, w8
 
 
+.Lmacloop4x:
+	subs		w3, w3, #4
+	bmi		.Lmac1x
+	ld1		{v1.16b-v4.16b}, [x0], #64	/* get next pt block */
+	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
+	encrypt_block	v0, w2, x1, x7, w8
+	eor		v0.16b, v0.16b, v2.16b
+	encrypt_block	v0, w2, x1, x7, w8
+	eor		v0.16b, v0.16b, v3.16b
+	encrypt_block	v0, w2, x1, x7, w8
+	eor		v0.16b, v0.16b, v4.16b
+	cmp		w3, wzr
+	csinv		x5, x6, xzr, eq
+	cbz		w5, .Lmacout
+	encrypt_block	v0, w2, x1, x7, w8
+	b		.Lmacloop4x
+.Lmac1x:
+	add		w3, w3, #4
 .Lmacloop:
 .Lmacloop:
 	cbz		w3, .Lmacout
 	cbz		w3, .Lmacout
 	ld1		{v1.16b}, [x0], #16		/* get next pt block */
 	ld1		{v1.16b}, [x0], #16		/* get next pt block */
@@ -544,7 +426,6 @@ AES_ENTRY(aes_mac_update)
 	csinv		x5, x6, xzr, eq
 	csinv		x5, x6, xzr, eq
 	cbz		w5, .Lmacout
 	cbz		w5, .Lmacout
 
 
-.Lmacenc:
 	encrypt_block	v0, w2, x1, x7, w8
 	encrypt_block	v0, w2, x1, x7, w8
 	b		.Lmacloop
 	b		.Lmacloop
 
 

+ 22 - 26
arch/arm64/crypto/aes-neonbs-glue.c

@@ -46,10 +46,9 @@ asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
 
 
 /* borrowed from aes-neon-blk.ko */
 /* borrowed from aes-neon-blk.ko */
 asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
 asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
-				     int rounds, int blocks, int first);
+				     int rounds, int blocks);
 asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
 asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
-				     int rounds, int blocks, u8 iv[],
-				     int first);
+				     int rounds, int blocks, u8 iv[]);
 
 
 struct aesbs_ctx {
 struct aesbs_ctx {
 	u8	rk[13 * (8 * AES_BLOCK_SIZE) + 32];
 	u8	rk[13 * (8 * AES_BLOCK_SIZE) + 32];
@@ -100,9 +99,8 @@ static int __ecb_crypt(struct skcipher_request *req,
 	struct skcipher_walk walk;
 	struct skcipher_walk walk;
 	int err;
 	int err;
 
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	kernel_neon_begin();
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 
 
@@ -110,12 +108,13 @@ static int __ecb_crypt(struct skcipher_request *req,
 			blocks = round_down(blocks,
 			blocks = round_down(blocks,
 					    walk.stride / AES_BLOCK_SIZE);
 					    walk.stride / AES_BLOCK_SIZE);
 
 
+		kernel_neon_begin();
 		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
 		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
 		   ctx->rounds, blocks);
 		   ctx->rounds, blocks);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk,
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
 	}
-	kernel_neon_end();
 
 
 	return err;
 	return err;
 }
 }
@@ -157,22 +156,21 @@ static int cbc_encrypt(struct skcipher_request *req)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_walk walk;
 	struct skcipher_walk walk;
-	int err, first = 1;
+	int err;
 
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	kernel_neon_begin();
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 
 
 		/* fall back to the non-bitsliced NEON implementation */
 		/* fall back to the non-bitsliced NEON implementation */
+		kernel_neon_begin();
 		neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 		neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				     ctx->enc, ctx->key.rounds, blocks, walk.iv,
-				     first);
+				     ctx->enc, ctx->key.rounds, blocks,
+				     walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
-		first = 0;
 	}
 	}
-	kernel_neon_end();
 	return err;
 	return err;
 }
 }
 
 
@@ -183,9 +181,8 @@ static int cbc_decrypt(struct skcipher_request *req)
 	struct skcipher_walk walk;
 	struct skcipher_walk walk;
 	int err;
 	int err;
 
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	kernel_neon_begin();
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 
 
@@ -193,13 +190,14 @@ static int cbc_decrypt(struct skcipher_request *req)
 			blocks = round_down(blocks,
 			blocks = round_down(blocks,
 					    walk.stride / AES_BLOCK_SIZE);
 					    walk.stride / AES_BLOCK_SIZE);
 
 
+		kernel_neon_begin();
 		aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 		aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				  ctx->key.rk, ctx->key.rounds, blocks,
 				  ctx->key.rk, ctx->key.rounds, blocks,
 				  walk.iv);
 				  walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk,
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
 	}
-	kernel_neon_end();
 
 
 	return err;
 	return err;
 }
 }
@@ -231,9 +229,8 @@ static int ctr_encrypt(struct skcipher_request *req)
 	u8 buf[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	int err;
 	int err;
 
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	kernel_neon_begin();
 	while (walk.nbytes > 0) {
 	while (walk.nbytes > 0) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 		u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
 		u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
@@ -244,8 +241,10 @@ static int ctr_encrypt(struct skcipher_request *req)
 			final = NULL;
 			final = NULL;
 		}
 		}
 
 
+		kernel_neon_begin();
 		aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 		aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				  ctx->rk, ctx->rounds, blocks, walk.iv, final);
 				  ctx->rk, ctx->rounds, blocks, walk.iv, final);
+		kernel_neon_end();
 
 
 		if (final) {
 		if (final) {
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
@@ -260,8 +259,6 @@ static int ctr_encrypt(struct skcipher_request *req)
 		err = skcipher_walk_done(&walk,
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
 	}
-	kernel_neon_end();
-
 	return err;
 	return err;
 }
 }
 
 
@@ -306,12 +303,11 @@ static int __xts_crypt(struct skcipher_request *req,
 	struct skcipher_walk walk;
 	struct skcipher_walk walk;
 	int err;
 	int err;
 
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
 	kernel_neon_begin();
 	kernel_neon_begin();
-
-	neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey,
-			     ctx->key.rounds, 1, 1);
+	neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1);
+	kernel_neon_end();
 
 
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -320,13 +316,13 @@ static int __xts_crypt(struct skcipher_request *req,
 			blocks = round_down(blocks,
 			blocks = round_down(blocks,
 					    walk.stride / AES_BLOCK_SIZE);
 					    walk.stride / AES_BLOCK_SIZE);
 
 
+		kernel_neon_begin();
 		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
 		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
 		   ctx->key.rounds, blocks, walk.iv);
 		   ctx->key.rounds, blocks, walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk,
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
 	}
-	kernel_neon_end();
-
 	return err;
 	return err;
 }
 }
 
 

+ 9 - 3
arch/arm64/crypto/chacha20-neon-glue.c

@@ -37,12 +37,19 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
 	u8 buf[CHACHA20_BLOCK_SIZE];
 	u8 buf[CHACHA20_BLOCK_SIZE];
 
 
 	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
 	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
+		kernel_neon_begin();
 		chacha20_4block_xor_neon(state, dst, src);
 		chacha20_4block_xor_neon(state, dst, src);
+		kernel_neon_end();
 		bytes -= CHACHA20_BLOCK_SIZE * 4;
 		bytes -= CHACHA20_BLOCK_SIZE * 4;
 		src += CHACHA20_BLOCK_SIZE * 4;
 		src += CHACHA20_BLOCK_SIZE * 4;
 		dst += CHACHA20_BLOCK_SIZE * 4;
 		dst += CHACHA20_BLOCK_SIZE * 4;
 		state[12] += 4;
 		state[12] += 4;
 	}
 	}
+
+	if (!bytes)
+		return;
+
+	kernel_neon_begin();
 	while (bytes >= CHACHA20_BLOCK_SIZE) {
 	while (bytes >= CHACHA20_BLOCK_SIZE) {
 		chacha20_block_xor_neon(state, dst, src);
 		chacha20_block_xor_neon(state, dst, src);
 		bytes -= CHACHA20_BLOCK_SIZE;
 		bytes -= CHACHA20_BLOCK_SIZE;
@@ -55,6 +62,7 @@ static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
 		chacha20_block_xor_neon(state, buf, buf);
 		chacha20_block_xor_neon(state, buf, buf);
 		memcpy(dst, buf, bytes);
 		memcpy(dst, buf, bytes);
 	}
 	}
+	kernel_neon_end();
 }
 }
 
 
 static int chacha20_neon(struct skcipher_request *req)
 static int chacha20_neon(struct skcipher_request *req)
@@ -68,11 +76,10 @@ static int chacha20_neon(struct skcipher_request *req)
 	if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
 	if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
 		return crypto_chacha20_crypt(req);
 		return crypto_chacha20_crypt(req);
 
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
 	crypto_chacha20_init(state, ctx, walk.iv);
 	crypto_chacha20_init(state, ctx, walk.iv);
 
 
-	kernel_neon_begin();
 	while (walk.nbytes > 0) {
 	while (walk.nbytes > 0) {
 		unsigned int nbytes = walk.nbytes;
 		unsigned int nbytes = walk.nbytes;
 
 
@@ -83,7 +90,6 @@ static int chacha20_neon(struct skcipher_request *req)
 				nbytes);
 				nbytes);
 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 	}
-	kernel_neon_end();
 
 
 	return err;
 	return err;
 }
 }

+ 23 - 13
arch/arm64/crypto/sha256-glue.c

@@ -89,21 +89,32 @@ static struct shash_alg algs[] = { {
 static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
 static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
 			      unsigned int len)
 			      unsigned int len)
 {
 {
-	/*
-	 * Stacking and unstacking a substantial slice of the NEON register
-	 * file may significantly affect performance for small updates when
-	 * executing in interrupt context, so fall back to the scalar code
-	 * in that case.
-	 */
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
 	if (!may_use_simd())
 	if (!may_use_simd())
 		return sha256_base_do_update(desc, data, len,
 		return sha256_base_do_update(desc, data, len,
 				(sha256_block_fn *)sha256_block_data_order);
 				(sha256_block_fn *)sha256_block_data_order);
 
 
-	kernel_neon_begin();
-	sha256_base_do_update(desc, data, len,
-				(sha256_block_fn *)sha256_block_neon);
-	kernel_neon_end();
+	while (len > 0) {
+		unsigned int chunk = len;
+
+		/*
+		 * Don't hog the CPU for the entire time it takes to process all
+		 * input when running on a preemptible kernel, but process the
+		 * data block by block instead.
+		 */
+		if (IS_ENABLED(CONFIG_PREEMPT) &&
+		    chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE)
+			chunk = SHA256_BLOCK_SIZE -
+				sctx->count % SHA256_BLOCK_SIZE;
 
 
+		kernel_neon_begin();
+		sha256_base_do_update(desc, data, chunk,
+				      (sha256_block_fn *)sha256_block_neon);
+		kernel_neon_end();
+		data += chunk;
+		len -= chunk;
+	}
 	return 0;
 	return 0;
 }
 }
 
 
@@ -117,10 +128,9 @@ static int sha256_finup_neon(struct shash_desc *desc, const u8 *data,
 		sha256_base_do_finalize(desc,
 		sha256_base_do_finalize(desc,
 				(sha256_block_fn *)sha256_block_data_order);
 				(sha256_block_fn *)sha256_block_data_order);
 	} else {
 	} else {
-		kernel_neon_begin();
 		if (len)
 		if (len)
-			sha256_base_do_update(desc, data, len,
-				(sha256_block_fn *)sha256_block_neon);
+			sha256_update_neon(desc, data, len);
+		kernel_neon_begin();
 		sha256_base_do_finalize(desc,
 		sha256_base_do_finalize(desc,
 				(sha256_block_fn *)sha256_block_neon);
 				(sha256_block_fn *)sha256_block_neon);
 		kernel_neon_end();
 		kernel_neon_end();

+ 352 - 0
arch/arm64/crypto/speck-neon-core.S

@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
+ *
+ * Copyright (c) 2018 Google, Inc
+ *
+ * Author: Eric Biggers <ebiggers@google.com>
+ */
+
+#include <linux/linkage.h>
+
+	.text
+
+	// arguments
+	ROUND_KEYS	.req	x0	// const {u64,u32} *round_keys
+	NROUNDS		.req	w1	// int nrounds
+	NROUNDS_X	.req	x1
+	DST		.req	x2	// void *dst
+	SRC		.req	x3	// const void *src
+	NBYTES		.req	w4	// unsigned int nbytes
+	TWEAK		.req	x5	// void *tweak
+
+	// registers which hold the data being encrypted/decrypted
+	// (underscores avoid a naming collision with ARM64 registers x0-x3)
+	X_0		.req	v0
+	Y_0		.req	v1
+	X_1		.req	v2
+	Y_1		.req	v3
+	X_2		.req	v4
+	Y_2		.req	v5
+	X_3		.req	v6
+	Y_3		.req	v7
+
+	// the round key, duplicated in all lanes
+	ROUND_KEY	.req	v8
+
+	// index vector for tbl-based 8-bit rotates
+	ROTATE_TABLE	.req	v9
+	ROTATE_TABLE_Q	.req	q9
+
+	// temporary registers
+	TMP0		.req	v10
+	TMP1		.req	v11
+	TMP2		.req	v12
+	TMP3		.req	v13
+
+	// multiplication table for updating XTS tweaks
+	GFMUL_TABLE	.req	v14
+	GFMUL_TABLE_Q	.req	q14
+
+	// next XTS tweak value(s)
+	TWEAKV_NEXT	.req	v15
+
+	// XTS tweaks for the blocks currently being encrypted/decrypted
+	TWEAKV0		.req	v16
+	TWEAKV1		.req	v17
+	TWEAKV2		.req	v18
+	TWEAKV3		.req	v19
+	TWEAKV4		.req	v20
+	TWEAKV5		.req	v21
+	TWEAKV6		.req	v22
+	TWEAKV7		.req	v23
+
+	.align		4
+.Lror64_8_table:
+	.octa		0x080f0e0d0c0b0a090007060504030201
+.Lror32_8_table:
+	.octa		0x0c0f0e0d080b0a090407060500030201
+.Lrol64_8_table:
+	.octa		0x0e0d0c0b0a09080f0605040302010007
+.Lrol32_8_table:
+	.octa		0x0e0d0c0f0a09080b0605040702010003
+.Lgf128mul_table:
+	.octa		0x00000000000000870000000000000001
+.Lgf64mul_table:
+	.octa		0x0000000000000000000000002d361b00
+
+/*
+ * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
+ *
+ * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
+ * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
+ * of ROUND_KEY.  'n' is the lane size: 64 for Speck128, or 32 for Speck64.
+ * 'lanes' is the lane specifier: "2d" for Speck128 or "4s" for Speck64.
+ */
+.macro _speck_round_128bytes	n, lanes
+
+	// x = ror(x, 8)
+	tbl		X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
+	tbl		X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
+	tbl		X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
+	tbl		X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
+
+	// x += y
+	add		X_0.\lanes, X_0.\lanes, Y_0.\lanes
+	add		X_1.\lanes, X_1.\lanes, Y_1.\lanes
+	add		X_2.\lanes, X_2.\lanes, Y_2.\lanes
+	add		X_3.\lanes, X_3.\lanes, Y_3.\lanes
+
+	// x ^= k
+	eor		X_0.16b, X_0.16b, ROUND_KEY.16b
+	eor		X_1.16b, X_1.16b, ROUND_KEY.16b
+	eor		X_2.16b, X_2.16b, ROUND_KEY.16b
+	eor		X_3.16b, X_3.16b, ROUND_KEY.16b
+
+	// y = rol(y, 3)
+	shl		TMP0.\lanes, Y_0.\lanes, #3
+	shl		TMP1.\lanes, Y_1.\lanes, #3
+	shl		TMP2.\lanes, Y_2.\lanes, #3
+	shl		TMP3.\lanes, Y_3.\lanes, #3
+	sri		TMP0.\lanes, Y_0.\lanes, #(\n - 3)
+	sri		TMP1.\lanes, Y_1.\lanes, #(\n - 3)
+	sri		TMP2.\lanes, Y_2.\lanes, #(\n - 3)
+	sri		TMP3.\lanes, Y_3.\lanes, #(\n - 3)
+
+	// y ^= x
+	eor		Y_0.16b, TMP0.16b, X_0.16b
+	eor		Y_1.16b, TMP1.16b, X_1.16b
+	eor		Y_2.16b, TMP2.16b, X_2.16b
+	eor		Y_3.16b, TMP3.16b, X_3.16b
+.endm
+
+/*
+ * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
+ *
+ * This is the inverse of _speck_round_128bytes().
+ */
+.macro _speck_unround_128bytes	n, lanes
+
+	// y ^= x
+	eor		TMP0.16b, Y_0.16b, X_0.16b
+	eor		TMP1.16b, Y_1.16b, X_1.16b
+	eor		TMP2.16b, Y_2.16b, X_2.16b
+	eor		TMP3.16b, Y_3.16b, X_3.16b
+
+	// y = ror(y, 3)
+	ushr		Y_0.\lanes, TMP0.\lanes, #3
+	ushr		Y_1.\lanes, TMP1.\lanes, #3
+	ushr		Y_2.\lanes, TMP2.\lanes, #3
+	ushr		Y_3.\lanes, TMP3.\lanes, #3
+	sli		Y_0.\lanes, TMP0.\lanes, #(\n - 3)
+	sli		Y_1.\lanes, TMP1.\lanes, #(\n - 3)
+	sli		Y_2.\lanes, TMP2.\lanes, #(\n - 3)
+	sli		Y_3.\lanes, TMP3.\lanes, #(\n - 3)
+
+	// x ^= k
+	eor		X_0.16b, X_0.16b, ROUND_KEY.16b
+	eor		X_1.16b, X_1.16b, ROUND_KEY.16b
+	eor		X_2.16b, X_2.16b, ROUND_KEY.16b
+	eor		X_3.16b, X_3.16b, ROUND_KEY.16b
+
+	// x -= y
+	sub		X_0.\lanes, X_0.\lanes, Y_0.\lanes
+	sub		X_1.\lanes, X_1.\lanes, Y_1.\lanes
+	sub		X_2.\lanes, X_2.\lanes, Y_2.\lanes
+	sub		X_3.\lanes, X_3.\lanes, Y_3.\lanes
+
+	// x = rol(x, 8)
+	tbl		X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
+	tbl		X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
+	tbl		X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
+	tbl		X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
+.endm
+
+.macro _next_xts_tweak	next, cur, tmp, n
+.if \n == 64
+	/*
+	 * Calculate the next tweak by multiplying the current one by x,
+	 * modulo p(x) = x^128 + x^7 + x^2 + x + 1.
+	 */
+	sshr		\tmp\().2d, \cur\().2d, #63
+	and		\tmp\().16b, \tmp\().16b, GFMUL_TABLE.16b
+	shl		\next\().2d, \cur\().2d, #1
+	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
+	eor		\next\().16b, \next\().16b, \tmp\().16b
+.else
+	/*
+	 * Calculate the next two tweaks by multiplying the current ones by x^2,
+	 * modulo p(x) = x^64 + x^4 + x^3 + x + 1.
+	 */
+	ushr		\tmp\().2d, \cur\().2d, #62
+	shl		\next\().2d, \cur\().2d, #2
+	tbl		\tmp\().16b, {GFMUL_TABLE.16b}, \tmp\().16b
+	eor		\next\().16b, \next\().16b, \tmp\().16b
+.endif
+.endm
+
+/*
+ * _speck_xts_crypt() - Speck-XTS encryption/decryption
+ *
+ * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
+ * using Speck-XTS, specifically the variant with a block size of '2n' and round
+ * count given by NROUNDS.  The expanded round keys are given in ROUND_KEYS, and
+ * the current XTS tweak value is given in TWEAK.  It's assumed that NBYTES is a
+ * nonzero multiple of 128.
+ */
+.macro _speck_xts_crypt	n, lanes, decrypting
+
+	/*
+	 * If decrypting, modify the ROUND_KEYS parameter to point to the last
+	 * round key rather than the first, since for decryption the round keys
+	 * are used in reverse order.
+	 */
+.if \decrypting
+	mov		NROUNDS, NROUNDS	/* zero the high 32 bits */
+.if \n == 64
+	add		ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #3
+	sub		ROUND_KEYS, ROUND_KEYS, #8
+.else
+	add		ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #2
+	sub		ROUND_KEYS, ROUND_KEYS, #4
+.endif
+.endif
+
+	// Load the index vector for tbl-based 8-bit rotates
+.if \decrypting
+	ldr		ROTATE_TABLE_Q, .Lrol\n\()_8_table
+.else
+	ldr		ROTATE_TABLE_Q, .Lror\n\()_8_table
+.endif
+
+	// One-time XTS preparation
+.if \n == 64
+	// Load first tweak
+	ld1		{TWEAKV0.16b}, [TWEAK]
+
+	// Load GF(2^128) multiplication table
+	ldr		GFMUL_TABLE_Q, .Lgf128mul_table
+.else
+	// Load first tweak
+	ld1		{TWEAKV0.8b}, [TWEAK]
+
+	// Load GF(2^64) multiplication table
+	ldr		GFMUL_TABLE_Q, .Lgf64mul_table
+
+	// Calculate second tweak, packing it together with the first
+	ushr		TMP0.2d, TWEAKV0.2d, #63
+	shl		TMP1.2d, TWEAKV0.2d, #1
+	tbl		TMP0.8b, {GFMUL_TABLE.16b}, TMP0.8b
+	eor		TMP0.8b, TMP0.8b, TMP1.8b
+	mov		TWEAKV0.d[1], TMP0.d[0]
+.endif
+
+.Lnext_128bytes_\@:
+
+	// Calculate XTS tweaks for next 128 bytes
+	_next_xts_tweak	TWEAKV1, TWEAKV0, TMP0, \n
+	_next_xts_tweak	TWEAKV2, TWEAKV1, TMP0, \n
+	_next_xts_tweak	TWEAKV3, TWEAKV2, TMP0, \n
+	_next_xts_tweak	TWEAKV4, TWEAKV3, TMP0, \n
+	_next_xts_tweak	TWEAKV5, TWEAKV4, TMP0, \n
+	_next_xts_tweak	TWEAKV6, TWEAKV5, TMP0, \n
+	_next_xts_tweak	TWEAKV7, TWEAKV6, TMP0, \n
+	_next_xts_tweak	TWEAKV_NEXT, TWEAKV7, TMP0, \n
+
+	// Load the next source blocks into {X,Y}[0-3]
+	ld1		{X_0.16b-Y_1.16b}, [SRC], #64
+	ld1		{X_2.16b-Y_3.16b}, [SRC], #64
+
+	// XOR the source blocks with their XTS tweaks
+	eor		TMP0.16b, X_0.16b, TWEAKV0.16b
+	eor		Y_0.16b,  Y_0.16b, TWEAKV1.16b
+	eor		TMP1.16b, X_1.16b, TWEAKV2.16b
+	eor		Y_1.16b,  Y_1.16b, TWEAKV3.16b
+	eor		TMP2.16b, X_2.16b, TWEAKV4.16b
+	eor		Y_2.16b,  Y_2.16b, TWEAKV5.16b
+	eor		TMP3.16b, X_3.16b, TWEAKV6.16b
+	eor		Y_3.16b,  Y_3.16b, TWEAKV7.16b
+
+	/*
+	 * De-interleave the 'x' and 'y' elements of each block, i.e. make it so
+	 * that the X[0-3] registers contain only the second halves of blocks,
+	 * and the Y[0-3] registers contain only the first halves of blocks.
+	 * (Speck uses the order (y, x) rather than the more intuitive (x, y).)
+	 */
+	uzp2		X_0.\lanes, TMP0.\lanes, Y_0.\lanes
+	uzp1		Y_0.\lanes, TMP0.\lanes, Y_0.\lanes
+	uzp2		X_1.\lanes, TMP1.\lanes, Y_1.\lanes
+	uzp1		Y_1.\lanes, TMP1.\lanes, Y_1.\lanes
+	uzp2		X_2.\lanes, TMP2.\lanes, Y_2.\lanes
+	uzp1		Y_2.\lanes, TMP2.\lanes, Y_2.\lanes
+	uzp2		X_3.\lanes, TMP3.\lanes, Y_3.\lanes
+	uzp1		Y_3.\lanes, TMP3.\lanes, Y_3.\lanes
+
+	// Do the cipher rounds
+	mov		x6, ROUND_KEYS
+	mov		w7, NROUNDS
+.Lnext_round_\@:
+.if \decrypting
+	ld1r		{ROUND_KEY.\lanes}, [x6]
+	sub		x6, x6, #( \n / 8 )
+	_speck_unround_128bytes	\n, \lanes
+.else
+	ld1r		{ROUND_KEY.\lanes}, [x6], #( \n / 8 )
+	_speck_round_128bytes	\n, \lanes
+.endif
+	subs		w7, w7, #1
+	bne		.Lnext_round_\@
+
+	// Re-interleave the 'x' and 'y' elements of each block
+	zip1		TMP0.\lanes, Y_0.\lanes, X_0.\lanes
+	zip2		Y_0.\lanes,  Y_0.\lanes, X_0.\lanes
+	zip1		TMP1.\lanes, Y_1.\lanes, X_1.\lanes
+	zip2		Y_1.\lanes,  Y_1.\lanes, X_1.\lanes
+	zip1		TMP2.\lanes, Y_2.\lanes, X_2.\lanes
+	zip2		Y_2.\lanes,  Y_2.\lanes, X_2.\lanes
+	zip1		TMP3.\lanes, Y_3.\lanes, X_3.\lanes
+	zip2		Y_3.\lanes,  Y_3.\lanes, X_3.\lanes
+
+	// XOR the encrypted/decrypted blocks with the tweaks calculated earlier
+	eor		X_0.16b, TMP0.16b, TWEAKV0.16b
+	eor		Y_0.16b, Y_0.16b,  TWEAKV1.16b
+	eor		X_1.16b, TMP1.16b, TWEAKV2.16b
+	eor		Y_1.16b, Y_1.16b,  TWEAKV3.16b
+	eor		X_2.16b, TMP2.16b, TWEAKV4.16b
+	eor		Y_2.16b, Y_2.16b,  TWEAKV5.16b
+	eor		X_3.16b, TMP3.16b, TWEAKV6.16b
+	eor		Y_3.16b, Y_3.16b,  TWEAKV7.16b
+	mov		TWEAKV0.16b, TWEAKV_NEXT.16b
+
+	// Store the ciphertext in the destination buffer
+	st1		{X_0.16b-Y_1.16b}, [DST], #64
+	st1		{X_2.16b-Y_3.16b}, [DST], #64
+
+	// Continue if there are more 128-byte chunks remaining
+	subs		NBYTES, NBYTES, #128
+	bne		.Lnext_128bytes_\@
+
+	// Store the next tweak and return
+.if \n == 64
+	st1		{TWEAKV_NEXT.16b}, [TWEAK]
+.else
+	st1		{TWEAKV_NEXT.8b}, [TWEAK]
+.endif
+	ret
+.endm
+
+ENTRY(speck128_xts_encrypt_neon)
+	_speck_xts_crypt	n=64, lanes=2d, decrypting=0
+ENDPROC(speck128_xts_encrypt_neon)
+
+ENTRY(speck128_xts_decrypt_neon)
+	_speck_xts_crypt	n=64, lanes=2d, decrypting=1
+ENDPROC(speck128_xts_decrypt_neon)
+
+ENTRY(speck64_xts_encrypt_neon)
+	_speck_xts_crypt	n=32, lanes=4s, decrypting=0
+ENDPROC(speck64_xts_encrypt_neon)
+
+ENTRY(speck64_xts_decrypt_neon)
+	_speck_xts_crypt	n=32, lanes=4s, decrypting=1
+ENDPROC(speck64_xts_decrypt_neon)

+ 282 - 0
arch/arm64/crypto/speck-neon-glue.c

@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
+ * (64-bit version; based on the 32-bit version)
+ *
+ * Copyright (c) 2018 Google, Inc
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/algapi.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/speck.h>
+#include <crypto/xts.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+/* The assembly functions only handle multiples of 128 bytes */
+#define SPECK_NEON_CHUNK_SIZE	128
+
+/* Speck128 */
+
+struct speck128_xts_tfm_ctx {
+	struct speck128_tfm_ctx main_key;
+	struct speck128_tfm_ctx tweak_key;
+};
+
+asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
+					  void *dst, const void *src,
+					  unsigned int nbytes, void *tweak);
+
+asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
+					  void *dst, const void *src,
+					  unsigned int nbytes, void *tweak);
+
+typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
+				     u8 *, const u8 *);
+typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
+					  const void *, unsigned int, void *);
+
+static __always_inline int
+__speck128_xts_crypt(struct skcipher_request *req,
+		     speck128_crypt_one_t crypt_one,
+		     speck128_xts_crypt_many_t crypt_many)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	le128 tweak;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+		u8 *dst = walk.dst.virt.addr;
+		const u8 *src = walk.src.virt.addr;
+
+		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
+			unsigned int count;
+
+			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
+			kernel_neon_begin();
+			(*crypt_many)(ctx->main_key.round_keys,
+				      ctx->main_key.nrounds,
+				      dst, src, count, &tweak);
+			kernel_neon_end();
+			dst += count;
+			src += count;
+			nbytes -= count;
+		}
+
+		/* Handle any remainder with generic code */
+		while (nbytes >= sizeof(tweak)) {
+			le128_xor((le128 *)dst, (const le128 *)src, &tweak);
+			(*crypt_one)(&ctx->main_key, dst, dst);
+			le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
+			gf128mul_x_ble(&tweak, &tweak);
+
+			dst += sizeof(tweak);
+			src += sizeof(tweak);
+			nbytes -= sizeof(tweak);
+		}
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+
+	return err;
+}
+
+static int speck128_xts_encrypt(struct skcipher_request *req)
+{
+	return __speck128_xts_crypt(req, crypto_speck128_encrypt,
+				    speck128_xts_encrypt_neon);
+}
+
+static int speck128_xts_decrypt(struct skcipher_request *req)
+{
+	return __speck128_xts_crypt(req, crypto_speck128_decrypt,
+				    speck128_xts_decrypt_neon);
+}
+
+static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int keylen)
+{
+	struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	keylen /= 2;
+
+	err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
+	if (err)
+		return err;
+
+	return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
+}
+
+/* Speck64 */
+
+struct speck64_xts_tfm_ctx {
+	struct speck64_tfm_ctx main_key;
+	struct speck64_tfm_ctx tweak_key;
+};
+
+asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
+					 void *dst, const void *src,
+					 unsigned int nbytes, void *tweak);
+
+asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
+					 void *dst, const void *src,
+					 unsigned int nbytes, void *tweak);
+
+typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
+				    u8 *, const u8 *);
+typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
+					 const void *, unsigned int, void *);
+
+static __always_inline int
+__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
+		    speck64_xts_crypt_many_t crypt_many)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	__le64 tweak;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
+
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
+		u8 *dst = walk.dst.virt.addr;
+		const u8 *src = walk.src.virt.addr;
+
+		if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
+			unsigned int count;
+
+			count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
+			kernel_neon_begin();
+			(*crypt_many)(ctx->main_key.round_keys,
+				      ctx->main_key.nrounds,
+				      dst, src, count, &tweak);
+			kernel_neon_end();
+			dst += count;
+			src += count;
+			nbytes -= count;
+		}
+
+		/* Handle any remainder with generic code */
+		while (nbytes >= sizeof(tweak)) {
+			*(__le64 *)dst = *(__le64 *)src ^ tweak;
+			(*crypt_one)(&ctx->main_key, dst, dst);
+			*(__le64 *)dst ^= tweak;
+			tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
+					    ((tweak & cpu_to_le64(1ULL << 63)) ?
+					     0x1B : 0));
+			dst += sizeof(tweak);
+			src += sizeof(tweak);
+			nbytes -= sizeof(tweak);
+		}
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+
+	return err;
+}
+
+static int speck64_xts_encrypt(struct skcipher_request *req)
+{
+	return __speck64_xts_crypt(req, crypto_speck64_encrypt,
+				   speck64_xts_encrypt_neon);
+}
+
+static int speck64_xts_decrypt(struct skcipher_request *req)
+{
+	return __speck64_xts_crypt(req, crypto_speck64_decrypt,
+				   speck64_xts_decrypt_neon);
+}
+
+static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	keylen /= 2;
+
+	err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
+	if (err)
+		return err;
+
+	return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
+}
+
+static struct skcipher_alg speck_algs[] = {
+	{
+		.base.cra_name		= "xts(speck128)",
+		.base.cra_driver_name	= "xts-speck128-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= SPECK128_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct speck128_xts_tfm_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SPECK128_128_KEY_SIZE,
+		.max_keysize		= 2 * SPECK128_256_KEY_SIZE,
+		.ivsize			= SPECK128_BLOCK_SIZE,
+		.walksize		= SPECK_NEON_CHUNK_SIZE,
+		.setkey			= speck128_xts_setkey,
+		.encrypt		= speck128_xts_encrypt,
+		.decrypt		= speck128_xts_decrypt,
+	}, {
+		.base.cra_name		= "xts(speck64)",
+		.base.cra_driver_name	= "xts-speck64-neon",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= SPECK64_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct speck64_xts_tfm_ctx),
+		.base.cra_alignmask	= 7,
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SPECK64_96_KEY_SIZE,
+		.max_keysize		= 2 * SPECK64_128_KEY_SIZE,
+		.ivsize			= SPECK64_BLOCK_SIZE,
+		.walksize		= SPECK_NEON_CHUNK_SIZE,
+		.setkey			= speck64_xts_setkey,
+		.encrypt		= speck64_xts_encrypt,
+		.decrypt		= speck64_xts_decrypt,
+	}
+};
+
+static int __init speck_neon_module_init(void)
+{
+	if (!(elf_hwcap & HWCAP_ASIMD))
+		return -ENODEV;
+	return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+static void __exit speck_neon_module_exit(void)
+{
+	crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+module_init(speck_neon_module_init);
+module_exit(speck_neon_module_exit);
+
+MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("xts(speck128)");
+MODULE_ALIAS_CRYPTO("xts-speck128-neon");
+MODULE_ALIAS_CRYPTO("xts(speck64)");
+MODULE_ALIAS_CRYPTO("xts-speck64-neon");

+ 693 - 721
arch/x86/crypto/aesni-intel_asm.S

@@ -94,23 +94,30 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
 
 
 
 
 #define	STACK_OFFSET    8*3
 #define	STACK_OFFSET    8*3
-#define	HashKey		16*0	// store HashKey <<1 mod poly here
-#define	HashKey_2	16*1	// store HashKey^2 <<1 mod poly here
-#define	HashKey_3	16*2	// store HashKey^3 <<1 mod poly here
-#define	HashKey_4	16*3	// store HashKey^4 <<1 mod poly here
-#define	HashKey_k	16*4	// store XOR of High 64 bits and Low 64
+
+#define AadHash 16*0
+#define AadLen 16*1
+#define InLen (16*1)+8
+#define PBlockEncKey 16*2
+#define OrigIV 16*3
+#define CurCount 16*4
+#define PBlockLen 16*5
+#define	HashKey		16*6	// store HashKey <<1 mod poly here
+#define	HashKey_2	16*7	// store HashKey^2 <<1 mod poly here
+#define	HashKey_3	16*8	// store HashKey^3 <<1 mod poly here
+#define	HashKey_4	16*9	// store HashKey^4 <<1 mod poly here
+#define	HashKey_k	16*10	// store XOR of High 64 bits and Low 64
 				// bits of  HashKey <<1 mod poly here
 				// bits of  HashKey <<1 mod poly here
 				//(for Karatsuba purposes)
 				//(for Karatsuba purposes)
-#define	HashKey_2_k	16*5	// store XOR of High 64 bits and Low 64
+#define	HashKey_2_k	16*11	// store XOR of High 64 bits and Low 64
 				// bits of  HashKey^2 <<1 mod poly here
 				// bits of  HashKey^2 <<1 mod poly here
 				// (for Karatsuba purposes)
 				// (for Karatsuba purposes)
-#define	HashKey_3_k	16*6	// store XOR of High 64 bits and Low 64
+#define	HashKey_3_k	16*12	// store XOR of High 64 bits and Low 64
 				// bits of  HashKey^3 <<1 mod poly here
 				// bits of  HashKey^3 <<1 mod poly here
 				// (for Karatsuba purposes)
 				// (for Karatsuba purposes)
-#define	HashKey_4_k	16*7	// store XOR of High 64 bits and Low 64
+#define	HashKey_4_k	16*13	// store XOR of High 64 bits and Low 64
 				// bits of  HashKey^4 <<1 mod poly here
 				// bits of  HashKey^4 <<1 mod poly here
 				// (for Karatsuba purposes)
 				// (for Karatsuba purposes)
-#define	VARIABLE_OFFSET	16*8
 
 
 #define arg1 rdi
 #define arg1 rdi
 #define arg2 rsi
 #define arg2 rsi
@@ -118,10 +125,11 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
 #define arg4 rcx
 #define arg4 rcx
 #define arg5 r8
 #define arg5 r8
 #define arg6 r9
 #define arg6 r9
-#define arg7 STACK_OFFSET+8(%r14)
-#define arg8 STACK_OFFSET+16(%r14)
-#define arg9 STACK_OFFSET+24(%r14)
-#define arg10 STACK_OFFSET+32(%r14)
+#define arg7 STACK_OFFSET+8(%rsp)
+#define arg8 STACK_OFFSET+16(%rsp)
+#define arg9 STACK_OFFSET+24(%rsp)
+#define arg10 STACK_OFFSET+32(%rsp)
+#define arg11 STACK_OFFSET+40(%rsp)
 #define keysize 2*15*16(%arg1)
 #define keysize 2*15*16(%arg1)
 #endif
 #endif
 
 
@@ -171,6 +179,332 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
 #define TKEYP	T1
 #define TKEYP	T1
 #endif
 #endif
 
 
+.macro FUNC_SAVE
+	push	%r12
+	push	%r13
+	push	%r14
+#
+# states of %xmm registers %xmm6:%xmm15 not saved
+# all %xmm registers are clobbered
+#
+.endm
+
+
+.macro FUNC_RESTORE
+	pop	%r14
+	pop	%r13
+	pop	%r12
+.endm
+
+# Precompute hashkeys.
+# Input: Hash subkey.
+# Output: HashKeys stored in gcm_context_data.  Only needs to be called
+# once per key.
+# clobbers r12, and tmp xmm registers.
+.macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7
+	mov	\SUBKEY, %r12
+	movdqu	(%r12), \TMP3
+	movdqa	SHUF_MASK(%rip), \TMP2
+	PSHUFB_XMM \TMP2, \TMP3
+
+	# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
+
+	movdqa	\TMP3, \TMP2
+	psllq	$1, \TMP3
+	psrlq	$63, \TMP2
+	movdqa	\TMP2, \TMP1
+	pslldq	$8, \TMP2
+	psrldq	$8, \TMP1
+	por	\TMP2, \TMP3
+
+	# reduce HashKey<<1
+
+	pshufd	$0x24, \TMP1, \TMP2
+	pcmpeqd TWOONE(%rip), \TMP2
+	pand	POLY(%rip), \TMP2
+	pxor	\TMP2, \TMP3
+	movdqa	\TMP3, HashKey(%arg2)
+
+	movdqa	   \TMP3, \TMP5
+	pshufd	   $78, \TMP3, \TMP1
+	pxor	   \TMP3, \TMP1
+	movdqa	   \TMP1, HashKey_k(%arg2)
+
+	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^2<<1 (mod poly)
+	movdqa	   \TMP5, HashKey_2(%arg2)
+# HashKey_2 = HashKey^2<<1 (mod poly)
+	pshufd	   $78, \TMP5, \TMP1
+	pxor	   \TMP5, \TMP1
+	movdqa	   \TMP1, HashKey_2_k(%arg2)
+
+	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^3<<1 (mod poly)
+	movdqa	   \TMP5, HashKey_3(%arg2)
+	pshufd	   $78, \TMP5, \TMP1
+	pxor	   \TMP5, \TMP1
+	movdqa	   \TMP1, HashKey_3_k(%arg2)
+
+	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^3<<1 (mod poly)
+	movdqa	   \TMP5, HashKey_4(%arg2)
+	pshufd	   $78, \TMP5, \TMP1
+	pxor	   \TMP5, \TMP1
+	movdqa	   \TMP1, HashKey_4_k(%arg2)
+.endm
+
+# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
+# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13
+.macro GCM_INIT Iv SUBKEY AAD AADLEN
+	mov \AADLEN, %r11
+	mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
+	xor %r11, %r11
+	mov %r11, InLen(%arg2) # ctx_data.in_length = 0
+	mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
+	mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
+	mov \Iv, %rax
+	movdqu (%rax), %xmm0
+	movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv
+
+	movdqa  SHUF_MASK(%rip), %xmm2
+	PSHUFB_XMM %xmm2, %xmm0
+	movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
+
+	PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+	movdqa HashKey(%arg2), %xmm13
+
+	CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
+	%xmm4, %xmm5, %xmm6
+.endm
+
+# GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context
+# struct has been initialized by GCM_INIT.
+# Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK
+# Clobbers rax, r10-r13, and xmm0-xmm15
+.macro GCM_ENC_DEC operation
+	movdqu AadHash(%arg2), %xmm8
+	movdqu HashKey(%arg2), %xmm13
+	add %arg5, InLen(%arg2)
+
+	xor %r11, %r11 # initialise the data pointer offset as zero
+	PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
+
+	sub %r11, %arg5		# sub partial block data used
+	mov %arg5, %r13		# save the number of bytes
+
+	and $-16, %r13		# %r13 = %r13 - (%r13 mod 16)
+	mov %r13, %r12
+	# Encrypt/Decrypt first few blocks
+
+	and	$(3<<4), %r12
+	jz	_initial_num_blocks_is_0_\@
+	cmp	$(2<<4), %r12
+	jb	_initial_num_blocks_is_1_\@
+	je	_initial_num_blocks_is_2_\@
+_initial_num_blocks_is_3_\@:
+	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation
+	sub	$48, %r13
+	jmp	_initial_blocks_\@
+_initial_num_blocks_is_2_\@:
+	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation
+	sub	$32, %r13
+	jmp	_initial_blocks_\@
+_initial_num_blocks_is_1_\@:
+	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation
+	sub	$16, %r13
+	jmp	_initial_blocks_\@
+_initial_num_blocks_is_0_\@:
+	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation
+_initial_blocks_\@:
+
+	# Main loop - Encrypt/Decrypt remaining blocks
+
+	cmp	$0, %r13
+	je	_zero_cipher_left_\@
+	sub	$64, %r13
+	je	_four_cipher_left_\@
+_crypt_by_4_\@:
+	GHASH_4_ENCRYPT_4_PARALLEL_\operation	%xmm9, %xmm10, %xmm11, %xmm12, \
+	%xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \
+	%xmm7, %xmm8, enc
+	add	$64, %r11
+	sub	$64, %r13
+	jne	_crypt_by_4_\@
+_four_cipher_left_\@:
+	GHASH_LAST_4	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
+%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
+_zero_cipher_left_\@:
+	movdqu %xmm8, AadHash(%arg2)
+	movdqu %xmm0, CurCount(%arg2)
+
+	mov	%arg5, %r13
+	and	$15, %r13			# %r13 = arg5 (mod 16)
+	je	_multiple_of_16_bytes_\@
+
+	mov %r13, PBlockLen(%arg2)
+
+	# Handle the last <16 Byte block separately
+	paddd ONE(%rip), %xmm0                # INCR CNT to get Yn
+	movdqu %xmm0, CurCount(%arg2)
+	movdqa SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM %xmm10, %xmm0
+
+	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm1        # Encrypt(K, Yn)
+	movdqu %xmm0, PBlockEncKey(%arg2)
+
+	cmp	$16, %arg5
+	jge _large_enough_update_\@
+
+	lea (%arg4,%r11,1), %r10
+	mov %r13, %r12
+	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
+	jmp _data_read_\@
+
+_large_enough_update_\@:
+	sub	$16, %r11
+	add	%r13, %r11
+
+	# receive the last <16 Byte block
+	movdqu	(%arg4, %r11, 1), %xmm1
+
+	sub	%r13, %r11
+	add	$16, %r11
+
+	lea	SHIFT_MASK+16(%rip), %r12
+	# adjust the shuffle mask pointer to be able to shift 16-r13 bytes
+	# (r13 is the number of bytes in plaintext mod 16)
+	sub	%r13, %r12
+	# get the appropriate shuffle mask
+	movdqu	(%r12), %xmm2
+	# shift right 16-r13 bytes
+	PSHUFB_XMM  %xmm2, %xmm1
+
+_data_read_\@:
+	lea ALL_F+16(%rip), %r12
+	sub %r13, %r12
+
+.ifc \operation, dec
+	movdqa  %xmm1, %xmm2
+.endif
+	pxor	%xmm1, %xmm0            # XOR Encrypt(K, Yn)
+	movdqu	(%r12), %xmm1
+	# get the appropriate mask to mask out top 16-r13 bytes of xmm0
+	pand	%xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
+.ifc \operation, dec
+	pand    %xmm1, %xmm2
+	movdqa SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM %xmm10 ,%xmm2
+
+	pxor %xmm2, %xmm8
+.else
+	movdqa SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM %xmm10,%xmm0
+
+	pxor	%xmm0, %xmm8
+.endif
+
+	movdqu %xmm8, AadHash(%arg2)
+.ifc \operation, enc
+	# GHASH computation for the last <16 byte block
+	movdqa SHUF_MASK(%rip), %xmm10
+	# shuffle xmm0 back to output as ciphertext
+	PSHUFB_XMM %xmm10, %xmm0
+.endif
+
+	# Output %r13 bytes
+	MOVQ_R64_XMM %xmm0, %rax
+	cmp $8, %r13
+	jle _less_than_8_bytes_left_\@
+	mov %rax, (%arg3 , %r11, 1)
+	add $8, %r11
+	psrldq $8, %xmm0
+	MOVQ_R64_XMM %xmm0, %rax
+	sub $8, %r13
+_less_than_8_bytes_left_\@:
+	mov %al,  (%arg3, %r11, 1)
+	add $1, %r11
+	shr $8, %rax
+	sub $1, %r13
+	jne _less_than_8_bytes_left_\@
+_multiple_of_16_bytes_\@:
+.endm
+
+# GCM_COMPLETE Finishes update of tag of last partial block
+# Output: Authorization Tag (AUTH_TAG)
+# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
+.macro GCM_COMPLETE AUTHTAG AUTHTAGLEN
+	movdqu AadHash(%arg2), %xmm8
+	movdqu HashKey(%arg2), %xmm13
+
+	mov PBlockLen(%arg2), %r12
+
+	cmp $0, %r12
+	je _partial_done\@
+
+	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+
+_partial_done\@:
+	mov AadLen(%arg2), %r12  # %r13 = aadLen (number of bytes)
+	shl	$3, %r12		  # convert into number of bits
+	movd	%r12d, %xmm15		  # len(A) in %xmm15
+	mov InLen(%arg2), %r12
+	shl     $3, %r12                  # len(C) in bits (*128)
+	MOVQ_R64_XMM    %r12, %xmm1
+
+	pslldq	$8, %xmm15		  # %xmm15 = len(A)||0x0000000000000000
+	pxor	%xmm1, %xmm15		  # %xmm15 = len(A)||len(C)
+	pxor	%xmm15, %xmm8
+	GHASH_MUL	%xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+	# final GHASH computation
+	movdqa SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM %xmm10, %xmm8
+
+	movdqu OrigIV(%arg2), %xmm0       # %xmm0 = Y0
+	ENCRYPT_SINGLE_BLOCK	%xmm0,  %xmm1	  # E(K, Y0)
+	pxor	%xmm8, %xmm0
+_return_T_\@:
+	mov	\AUTHTAG, %r10                     # %r10 = authTag
+	mov	\AUTHTAGLEN, %r11                    # %r11 = auth_tag_len
+	cmp	$16, %r11
+	je	_T_16_\@
+	cmp	$8, %r11
+	jl	_T_4_\@
+_T_8_\@:
+	MOVQ_R64_XMM	%xmm0, %rax
+	mov	%rax, (%r10)
+	add	$8, %r10
+	sub	$8, %r11
+	psrldq	$8, %xmm0
+	cmp	$0, %r11
+	je	_return_T_done_\@
+_T_4_\@:
+	movd	%xmm0, %eax
+	mov	%eax, (%r10)
+	add	$4, %r10
+	sub	$4, %r11
+	psrldq	$4, %xmm0
+	cmp	$0, %r11
+	je	_return_T_done_\@
+_T_123_\@:
+	movd	%xmm0, %eax
+	cmp	$2, %r11
+	jl	_T_1_\@
+	mov	%ax, (%r10)
+	cmp	$2, %r11
+	je	_return_T_done_\@
+	add	$2, %r10
+	sar	$16, %eax
+_T_1_\@:
+	mov	%al, (%r10)
+	jmp	_return_T_done_\@
+_T_16_\@:
+	movdqu	%xmm0, (%r10)
+_return_T_done_\@:
+.endm
 
 
 #ifdef __x86_64__
 #ifdef __x86_64__
 /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
 /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
@@ -264,232 +598,188 @@ _read_next_byte_lt8_\@:
 _done_read_partial_block_\@:
 _done_read_partial_block_\@:
 .endm
 .endm
 
 
-/*
-* if a = number of total plaintext bytes
-* b = floor(a/16)
-* num_initial_blocks = b mod 4
-* encrypt the initial num_initial_blocks blocks and apply ghash on
-* the ciphertext
-* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
-* are clobbered
-* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
-*/
-
-
-.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
-XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
-        MOVADQ     SHUF_MASK(%rip), %xmm14
-	mov	   arg7, %r10           # %r10 = AAD
-	mov	   arg8, %r11           # %r11 = aadLen
-	pxor	   %xmm\i, %xmm\i
-	pxor       \XMM2, \XMM2
+# CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted.
+# clobbers r10-11, xmm14
+.macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \
+	TMP6 TMP7
+	MOVADQ	   SHUF_MASK(%rip), %xmm14
+	mov	   \AAD, %r10		# %r10 = AAD
+	mov	   \AADLEN, %r11		# %r11 = aadLen
+	pxor	   \TMP7, \TMP7
+	pxor	   \TMP6, \TMP6
 
 
 	cmp	   $16, %r11
 	cmp	   $16, %r11
-	jl	   _get_AAD_rest\num_initial_blocks\operation
-_get_AAD_blocks\num_initial_blocks\operation:
-	movdqu	   (%r10), %xmm\i
-	PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
-	pxor	   %xmm\i, \XMM2
-	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+	jl	   _get_AAD_rest\@
+_get_AAD_blocks\@:
+	movdqu	   (%r10), \TMP7
+	PSHUFB_XMM   %xmm14, \TMP7 # byte-reflect the AAD data
+	pxor	   \TMP7, \TMP6
+	GHASH_MUL  \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
 	add	   $16, %r10
 	add	   $16, %r10
 	sub	   $16, %r11
 	sub	   $16, %r11
-	cmp	   $16, %r11
-	jge	   _get_AAD_blocks\num_initial_blocks\operation
-
-	movdqu	   \XMM2, %xmm\i
-
-	/* read the last <16B of AAD */
-_get_AAD_rest\num_initial_blocks\operation:
-	cmp	   $0, %r11
-	je	   _get_AAD_done\num_initial_blocks\operation
-
-	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
-	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
-	pxor	   \XMM2, %xmm\i
-	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-
-_get_AAD_done\num_initial_blocks\operation:
-	xor	   %r11, %r11 # initialise the data pointer offset as zero
-	# start AES for num_initial_blocks blocks
-
-	mov	   %arg5, %rax                      # %rax = *Y0
-	movdqu	   (%rax), \XMM0                    # XMM0 = Y0
-	PSHUFB_XMM   %xmm14, \XMM0
-
-.if (\i == 5) || (\i == 6) || (\i == 7)
-	MOVADQ		ONE(%RIP),\TMP1
-	MOVADQ		(%arg1),\TMP2
-.irpc index, \i_seq
-	paddd	   \TMP1, \XMM0                 # INCR Y0
-	movdqa	   \XMM0, %xmm\index
-	PSHUFB_XMM   %xmm14, %xmm\index      # perform a 16 byte swap
-	pxor	   \TMP2, %xmm\index
-.endr
-	lea	0x10(%arg1),%r10
-	mov	keysize,%eax
-	shr	$2,%eax				# 128->4, 192->6, 256->8
-	add	$5,%eax			      # 128->9, 192->11, 256->13
-
-aes_loop_initial_dec\num_initial_blocks:
-	MOVADQ	(%r10),\TMP1
-.irpc	index, \i_seq
-	AESENC	\TMP1, %xmm\index
-.endr
-	add	$16,%r10
-	sub	$1,%eax
-	jnz	aes_loop_initial_dec\num_initial_blocks
-
-	MOVADQ	(%r10), \TMP1
-.irpc index, \i_seq
-	AESENCLAST \TMP1, %xmm\index         # Last Round
-.endr
-.irpc index, \i_seq
-	movdqu	   (%arg3 , %r11, 1), \TMP1
-	pxor	   \TMP1, %xmm\index
-	movdqu	   %xmm\index, (%arg2 , %r11, 1)
-	# write back plaintext/ciphertext for num_initial_blocks
-	add	   $16, %r11
-
-	movdqa     \TMP1, %xmm\index
-	PSHUFB_XMM	   %xmm14, %xmm\index
-                # prepare plaintext/ciphertext for GHASH computation
-.endr
-.endif
-
-        # apply GHASH on num_initial_blocks blocks
-
-.if \i == 5
-        pxor       %xmm5, %xmm6
-	GHASH_MUL  %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm6, %xmm7
-	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.elseif \i == 6
-        pxor       %xmm6, %xmm7
-	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.elseif \i == 7
-        pxor       %xmm7, %xmm8
-	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.endif
-	cmp	   $64, %r13
-	jl	_initial_blocks_done\num_initial_blocks\operation
-	# no need for precomputed values
-/*
-*
-* Precomputations for HashKey parallel with encryption of first 4 blocks.
-* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
-*/
-	MOVADQ	   ONE(%rip), \TMP1
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM1
-	PSHUFB_XMM  %xmm14, \XMM1        # perform a 16 byte swap
-
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM2
-	PSHUFB_XMM  %xmm14, \XMM2        # perform a 16 byte swap
-
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM3
-	PSHUFB_XMM %xmm14, \XMM3        # perform a 16 byte swap
-
-	paddd	   \TMP1, \XMM0              # INCR Y0
-	MOVADQ	   \XMM0, \XMM4
-	PSHUFB_XMM %xmm14, \XMM4        # perform a 16 byte swap
-
-	MOVADQ	   0(%arg1),\TMP1
-	pxor	   \TMP1, \XMM1
-	pxor	   \TMP1, \XMM2
-	pxor	   \TMP1, \XMM3
-	pxor	   \TMP1, \XMM4
-	movdqa	   \TMP3, \TMP5
-	pshufd	   $78, \TMP3, \TMP1
-	pxor	   \TMP3, \TMP1
-	movdqa	   \TMP1, HashKey_k(%rsp)
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^2<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_2(%rsp)
-# HashKey_2 = HashKey^2<<1 (mod poly)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_2_k(%rsp)
-.irpc index, 1234 # do 4 rounds
-	movaps 0x10*\index(%arg1), \TMP1
-	AESENC	   \TMP1, \XMM1
-	AESENC	   \TMP1, \XMM2
-	AESENC	   \TMP1, \XMM3
-	AESENC	   \TMP1, \XMM4
-.endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_3(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_3_k(%rsp)
-.irpc index, 56789 # do next 5 rounds
-	movaps 0x10*\index(%arg1), \TMP1
-	AESENC	   \TMP1, \XMM1
-	AESENC	   \TMP1, \XMM2
-	AESENC	   \TMP1, \XMM3
-	AESENC	   \TMP1, \XMM4
-.endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_4(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_4_k(%rsp)
-	lea	   0xa0(%arg1),%r10
-	mov	   keysize,%eax
-	shr	   $2,%eax			# 128->4, 192->6, 256->8
-	sub	   $4,%eax			# 128->0, 192->2, 256->4
-	jz	   aes_loop_pre_dec_done\num_initial_blocks
-
-aes_loop_pre_dec\num_initial_blocks:
-	MOVADQ	   (%r10),\TMP2
-.irpc	index, 1234
-	AESENC	   \TMP2, %xmm\index
-.endr
-	add	   $16,%r10
-	sub	   $1,%eax
-	jnz	   aes_loop_pre_dec\num_initial_blocks
+	cmp	   $16, %r11
+	jge	   _get_AAD_blocks\@
 
 
-aes_loop_pre_dec_done\num_initial_blocks:
-	MOVADQ	   (%r10), \TMP2
-	AESENCLAST \TMP2, \XMM1
-	AESENCLAST \TMP2, \XMM2
-	AESENCLAST \TMP2, \XMM3
-	AESENCLAST \TMP2, \XMM4
-	movdqu	   16*0(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM1
-	movdqu	   \XMM1, 16*0(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM1
-	movdqu	   16*1(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM2
-	movdqu	   \XMM2, 16*1(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM2
-	movdqu	   16*2(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM3
-	movdqu	   \XMM3, 16*2(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM3
-	movdqu	   16*3(%arg3 , %r11 , 1), \TMP1
-	pxor	   \TMP1, \XMM4
-	movdqu	   \XMM4, 16*3(%arg2 , %r11 , 1)
-	movdqa     \TMP1, \XMM4
-	add	   $64, %r11
-	PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
-	pxor	   \XMMDst, \XMM1
-# combine GHASHed value with the corresponding ciphertext
-	PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
-	PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
-	PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
+	movdqu	   \TMP6, \TMP7
+
+	/* read the last <16B of AAD */
+_get_AAD_rest\@:
+	cmp	   $0, %r11
+	je	   _get_AAD_done\@
 
 
-_initial_blocks_done\num_initial_blocks\operation:
+	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
+	PSHUFB_XMM   %xmm14, \TMP7 # byte-reflect the AAD data
+	pxor	   \TMP6, \TMP7
+	GHASH_MUL  \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
+	movdqu \TMP7, \TMP6
 
 
+_get_AAD_done\@:
+	movdqu \TMP6, AadHash(%arg2)
 .endm
 .endm
 
 
+# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
+# between update calls.
+# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
+# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
+# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
+.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
+	AAD_HASH operation
+	mov 	PBlockLen(%arg2), %r13
+	cmp	$0, %r13
+	je	_partial_block_done_\@	# Leave Macro if no partial blocks
+	# Read in input data without over reading
+	cmp	$16, \PLAIN_CYPH_LEN
+	jl	_fewer_than_16_bytes_\@
+	movups	(\PLAIN_CYPH_IN), %xmm1	# If more than 16 bytes, just fill xmm
+	jmp	_data_read_\@
+
+_fewer_than_16_bytes_\@:
+	lea	(\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
+	mov	\PLAIN_CYPH_LEN, %r12
+	READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1
+
+	mov PBlockLen(%arg2), %r13
+
+_data_read_\@:				# Finished reading in data
+
+	movdqu	PBlockEncKey(%arg2), %xmm9
+	movdqu	HashKey(%arg2), %xmm13
+
+	lea	SHIFT_MASK(%rip), %r12
+
+	# adjust the shuffle mask pointer to be able to shift r13 bytes
+	# r16-r13 is the number of bytes in plaintext mod 16)
+	add	%r13, %r12
+	movdqu	(%r12), %xmm2		# get the appropriate shuffle mask
+	PSHUFB_XMM %xmm2, %xmm9		# shift right r13 bytes
+
+.ifc \operation, dec
+	movdqa	%xmm1, %xmm3
+	pxor	%xmm1, %xmm9		# Cyphertext XOR E(K, Yn)
+
+	mov	\PLAIN_CYPH_LEN, %r10
+	add	%r13, %r10
+	# Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
+	sub	$16, %r10
+	# Determine if if partial block is not being filled and
+	# shift mask accordingly
+	jge	_no_extra_mask_1_\@
+	sub	%r10, %r12
+_no_extra_mask_1_\@:
+
+	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
+	# get the appropriate mask to mask out bottom r13 bytes of xmm9
+	pand	%xmm1, %xmm9		# mask out bottom r13 bytes of xmm9
+
+	pand	%xmm1, %xmm3
+	movdqa	SHUF_MASK(%rip), %xmm10
+	PSHUFB_XMM	%xmm10, %xmm3
+	PSHUFB_XMM	%xmm2, %xmm3
+	pxor	%xmm3, \AAD_HASH
+
+	cmp	$0, %r10
+	jl	_partial_incomplete_1_\@
+
+	# GHASH computation for the last <16 Byte block
+	GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+	xor	%rax,%rax
+
+	mov	%rax, PBlockLen(%arg2)
+	jmp	_dec_done_\@
+_partial_incomplete_1_\@:
+	add	\PLAIN_CYPH_LEN, PBlockLen(%arg2)
+_dec_done_\@:
+	movdqu	\AAD_HASH, AadHash(%arg2)
+.else
+	pxor	%xmm1, %xmm9			# Plaintext XOR E(K, Yn)
+
+	mov	\PLAIN_CYPH_LEN, %r10
+	add	%r13, %r10
+	# Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
+	sub	$16, %r10
+	# Determine if if partial block is not being filled and
+	# shift mask accordingly
+	jge	_no_extra_mask_2_\@
+	sub	%r10, %r12
+_no_extra_mask_2_\@:
+
+	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
+	# get the appropriate mask to mask out bottom r13 bytes of xmm9
+	pand	%xmm1, %xmm9
+
+	movdqa	SHUF_MASK(%rip), %xmm1
+	PSHUFB_XMM %xmm1, %xmm9
+	PSHUFB_XMM %xmm2, %xmm9
+	pxor	%xmm9, \AAD_HASH
+
+	cmp	$0, %r10
+	jl	_partial_incomplete_2_\@
+
+	# GHASH computation for the last <16 Byte block
+	GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+	xor	%rax,%rax
+
+	mov	%rax, PBlockLen(%arg2)
+	jmp	_encode_done_\@
+_partial_incomplete_2_\@:
+	add	\PLAIN_CYPH_LEN, PBlockLen(%arg2)
+_encode_done_\@:
+	movdqu	\AAD_HASH, AadHash(%arg2)
+
+	movdqa	SHUF_MASK(%rip), %xmm10
+	# shuffle xmm9 back to output as ciphertext
+	PSHUFB_XMM	%xmm10, %xmm9
+	PSHUFB_XMM	%xmm2, %xmm9
+.endif
+	# output encrypted Bytes
+	cmp	$0, %r10
+	jl	_partial_fill_\@
+	mov	%r13, %r12
+	mov	$16, %r13
+	# Set r13 to be the number of bytes to write out
+	sub	%r12, %r13
+	jmp	_count_set_\@
+_partial_fill_\@:
+	mov	\PLAIN_CYPH_LEN, %r13
+_count_set_\@:
+	movdqa	%xmm9, %xmm0
+	MOVQ_R64_XMM	%xmm0, %rax
+	cmp	$8, %r13
+	jle	_less_than_8_bytes_left_\@
+
+	mov	%rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
+	add	$8, \DATA_OFFSET
+	psrldq	$8, %xmm0
+	MOVQ_R64_XMM	%xmm0, %rax
+	sub	$8, %r13
+_less_than_8_bytes_left_\@:
+	movb	%al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
+	add	$1, \DATA_OFFSET
+	shr	$8, %rax
+	sub	$1, %r13
+	jne	_less_than_8_bytes_left_\@
+_partial_block_done_\@:
+.endm # PARTIAL_BLOCK
 
 
 /*
 /*
 * if a = number of total plaintext bytes
 * if a = number of total plaintext bytes
@@ -499,49 +789,19 @@ _initial_blocks_done\num_initial_blocks\operation:
 * the ciphertext
 * the ciphertext
 * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
 * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
 * are clobbered
 * are clobbered
-* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
+* arg1, %arg2, %arg3 are used as a pointer only, not modified
 */
 */
 
 
 
 
-.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
-XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
-        MOVADQ     SHUF_MASK(%rip), %xmm14
-	mov	   arg7, %r10           # %r10 = AAD
-	mov	   arg8, %r11           # %r11 = aadLen
-	pxor	   %xmm\i, %xmm\i
-	pxor	   \XMM2, \XMM2
-
-	cmp	   $16, %r11
-	jl	   _get_AAD_rest\num_initial_blocks\operation
-_get_AAD_blocks\num_initial_blocks\operation:
-	movdqu	   (%r10), %xmm\i
-	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
-	pxor	   %xmm\i, \XMM2
-	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-	add	   $16, %r10
-	sub	   $16, %r11
-	cmp	   $16, %r11
-	jge	   _get_AAD_blocks\num_initial_blocks\operation
-
-	movdqu	   \XMM2, %xmm\i
-
-	/* read the last <16B of AAD */
-_get_AAD_rest\num_initial_blocks\operation:
-	cmp	   $0, %r11
-	je	   _get_AAD_done\num_initial_blocks\operation
+.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
+	XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
+	MOVADQ		SHUF_MASK(%rip), %xmm14
 
 
-	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
-	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
-	pxor	   \XMM2, %xmm\i
-	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+	movdqu AadHash(%arg2), %xmm\i		    # XMM0 = Y0
 
 
-_get_AAD_done\num_initial_blocks\operation:
-	xor	   %r11, %r11 # initialise the data pointer offset as zero
 	# start AES for num_initial_blocks blocks
 	# start AES for num_initial_blocks blocks
 
 
-	mov	   %arg5, %rax                      # %rax = *Y0
-	movdqu	   (%rax), \XMM0                    # XMM0 = Y0
-	PSHUFB_XMM   %xmm14, \XMM0
+	movdqu CurCount(%arg2), \XMM0                # XMM0 = Y0
 
 
 .if (\i == 5) || (\i == 6) || (\i == 7)
 .if (\i == 5) || (\i == 6) || (\i == 7)
 
 
@@ -549,7 +809,11 @@ _get_AAD_done\num_initial_blocks\operation:
 	MOVADQ		0(%arg1),\TMP2
 	MOVADQ		0(%arg1),\TMP2
 .irpc index, \i_seq
 .irpc index, \i_seq
 	paddd		\TMP1, \XMM0                 # INCR Y0
 	paddd		\TMP1, \XMM0                 # INCR Y0
+.ifc \operation, dec
+        movdqa     \XMM0, %xmm\index
+.else
 	MOVADQ		\XMM0, %xmm\index
 	MOVADQ		\XMM0, %xmm\index
+.endif
 	PSHUFB_XMM	%xmm14, %xmm\index      # perform a 16 byte swap
 	PSHUFB_XMM	%xmm14, %xmm\index      # perform a 16 byte swap
 	pxor		\TMP2, %xmm\index
 	pxor		\TMP2, %xmm\index
 .endr
 .endr
@@ -558,25 +822,29 @@ _get_AAD_done\num_initial_blocks\operation:
 	shr	$2,%eax				# 128->4, 192->6, 256->8
 	shr	$2,%eax				# 128->4, 192->6, 256->8
 	add	$5,%eax			      # 128->9, 192->11, 256->13
 	add	$5,%eax			      # 128->9, 192->11, 256->13
 
 
-aes_loop_initial_enc\num_initial_blocks:
+aes_loop_initial_\@:
 	MOVADQ	(%r10),\TMP1
 	MOVADQ	(%r10),\TMP1
 .irpc	index, \i_seq
 .irpc	index, \i_seq
 	AESENC	\TMP1, %xmm\index
 	AESENC	\TMP1, %xmm\index
 .endr
 .endr
 	add	$16,%r10
 	add	$16,%r10
 	sub	$1,%eax
 	sub	$1,%eax
-	jnz	aes_loop_initial_enc\num_initial_blocks
+	jnz	aes_loop_initial_\@
 
 
 	MOVADQ	(%r10), \TMP1
 	MOVADQ	(%r10), \TMP1
 .irpc index, \i_seq
 .irpc index, \i_seq
 	AESENCLAST \TMP1, %xmm\index         # Last Round
 	AESENCLAST \TMP1, %xmm\index         # Last Round
 .endr
 .endr
 .irpc index, \i_seq
 .irpc index, \i_seq
-	movdqu	   (%arg3 , %r11, 1), \TMP1
+	movdqu	   (%arg4 , %r11, 1), \TMP1
 	pxor	   \TMP1, %xmm\index
 	pxor	   \TMP1, %xmm\index
-	movdqu	   %xmm\index, (%arg2 , %r11, 1)
+	movdqu	   %xmm\index, (%arg3 , %r11, 1)
 	# write back plaintext/ciphertext for num_initial_blocks
 	# write back plaintext/ciphertext for num_initial_blocks
 	add	   $16, %r11
 	add	   $16, %r11
+
+.ifc \operation, dec
+	movdqa     \TMP1, %xmm\index
+.endif
 	PSHUFB_XMM	   %xmm14, %xmm\index
 	PSHUFB_XMM	   %xmm14, %xmm\index
 
 
 		# prepare plaintext/ciphertext for GHASH computation
 		# prepare plaintext/ciphertext for GHASH computation
@@ -602,7 +870,7 @@ aes_loop_initial_enc\num_initial_blocks:
 	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
 	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
 .endif
 .endif
 	cmp	   $64, %r13
 	cmp	   $64, %r13
-	jl	_initial_blocks_done\num_initial_blocks\operation
+	jl	_initial_blocks_done\@
 	# no need for precomputed values
 	# no need for precomputed values
 /*
 /*
 *
 *
@@ -631,17 +899,6 @@ aes_loop_initial_enc\num_initial_blocks:
 	pxor	   \TMP1, \XMM2
 	pxor	   \TMP1, \XMM2
 	pxor	   \TMP1, \XMM3
 	pxor	   \TMP1, \XMM3
 	pxor	   \TMP1, \XMM4
 	pxor	   \TMP1, \XMM4
-	movdqa	   \TMP3, \TMP5
-	pshufd	   $78, \TMP3, \TMP1
-	pxor	   \TMP3, \TMP1
-	movdqa	   \TMP1, HashKey_k(%rsp)
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^2<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_2(%rsp)
-# HashKey_2 = HashKey^2<<1 (mod poly)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_2_k(%rsp)
 .irpc index, 1234 # do 4 rounds
 .irpc index, 1234 # do 4 rounds
 	movaps 0x10*\index(%arg1), \TMP1
 	movaps 0x10*\index(%arg1), \TMP1
 	AESENC	   \TMP1, \XMM1
 	AESENC	   \TMP1, \XMM1
@@ -649,12 +906,6 @@ aes_loop_initial_enc\num_initial_blocks:
 	AESENC	   \TMP1, \XMM3
 	AESENC	   \TMP1, \XMM3
 	AESENC	   \TMP1, \XMM4
 	AESENC	   \TMP1, \XMM4
 .endr
 .endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_3(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_3_k(%rsp)
 .irpc index, 56789 # do next 5 rounds
 .irpc index, 56789 # do next 5 rounds
 	movaps 0x10*\index(%arg1), \TMP1
 	movaps 0x10*\index(%arg1), \TMP1
 	AESENC	   \TMP1, \XMM1
 	AESENC	   \TMP1, \XMM1
@@ -662,45 +913,56 @@ aes_loop_initial_enc\num_initial_blocks:
 	AESENC	   \TMP1, \XMM3
 	AESENC	   \TMP1, \XMM3
 	AESENC	   \TMP1, \XMM4
 	AESENC	   \TMP1, \XMM4
 .endr
 .endr
-	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
-	movdqa	   \TMP5, HashKey_4(%rsp)
-	pshufd	   $78, \TMP5, \TMP1
-	pxor	   \TMP5, \TMP1
-	movdqa	   \TMP1, HashKey_4_k(%rsp)
 	lea	   0xa0(%arg1),%r10
 	lea	   0xa0(%arg1),%r10
 	mov	   keysize,%eax
 	mov	   keysize,%eax
 	shr	   $2,%eax			# 128->4, 192->6, 256->8
 	shr	   $2,%eax			# 128->4, 192->6, 256->8
 	sub	   $4,%eax			# 128->0, 192->2, 256->4
 	sub	   $4,%eax			# 128->0, 192->2, 256->4
-	jz	   aes_loop_pre_enc_done\num_initial_blocks
+	jz	   aes_loop_pre_done\@
 
 
-aes_loop_pre_enc\num_initial_blocks:
+aes_loop_pre_\@:
 	MOVADQ	   (%r10),\TMP2
 	MOVADQ	   (%r10),\TMP2
 .irpc	index, 1234
 .irpc	index, 1234
 	AESENC	   \TMP2, %xmm\index
 	AESENC	   \TMP2, %xmm\index
 .endr
 .endr
 	add	   $16,%r10
 	add	   $16,%r10
 	sub	   $1,%eax
 	sub	   $1,%eax
-	jnz	   aes_loop_pre_enc\num_initial_blocks
+	jnz	   aes_loop_pre_\@
 
 
-aes_loop_pre_enc_done\num_initial_blocks:
+aes_loop_pre_done\@:
 	MOVADQ	   (%r10), \TMP2
 	MOVADQ	   (%r10), \TMP2
 	AESENCLAST \TMP2, \XMM1
 	AESENCLAST \TMP2, \XMM1
 	AESENCLAST \TMP2, \XMM2
 	AESENCLAST \TMP2, \XMM2
 	AESENCLAST \TMP2, \XMM3
 	AESENCLAST \TMP2, \XMM3
 	AESENCLAST \TMP2, \XMM4
 	AESENCLAST \TMP2, \XMM4
-	movdqu	   16*0(%arg3 , %r11 , 1), \TMP1
+	movdqu	   16*0(%arg4 , %r11 , 1), \TMP1
 	pxor	   \TMP1, \XMM1
 	pxor	   \TMP1, \XMM1
-	movdqu	   16*1(%arg3 , %r11 , 1), \TMP1
+.ifc \operation, dec
+	movdqu     \XMM1, 16*0(%arg3 , %r11 , 1)
+	movdqa     \TMP1, \XMM1
+.endif
+	movdqu	   16*1(%arg4 , %r11 , 1), \TMP1
 	pxor	   \TMP1, \XMM2
 	pxor	   \TMP1, \XMM2
-	movdqu	   16*2(%arg3 , %r11 , 1), \TMP1
+.ifc \operation, dec
+	movdqu     \XMM2, 16*1(%arg3 , %r11 , 1)
+	movdqa     \TMP1, \XMM2
+.endif
+	movdqu	   16*2(%arg4 , %r11 , 1), \TMP1
 	pxor	   \TMP1, \XMM3
 	pxor	   \TMP1, \XMM3
-	movdqu	   16*3(%arg3 , %r11 , 1), \TMP1
+.ifc \operation, dec
+	movdqu     \XMM3, 16*2(%arg3 , %r11 , 1)
+	movdqa     \TMP1, \XMM3
+.endif
+	movdqu	   16*3(%arg4 , %r11 , 1), \TMP1
 	pxor	   \TMP1, \XMM4
 	pxor	   \TMP1, \XMM4
-	movdqu     \XMM1, 16*0(%arg2 , %r11 , 1)
-	movdqu     \XMM2, 16*1(%arg2 , %r11 , 1)
-	movdqu     \XMM3, 16*2(%arg2 , %r11 , 1)
-	movdqu     \XMM4, 16*3(%arg2 , %r11 , 1)
+.ifc \operation, dec
+	movdqu     \XMM4, 16*3(%arg3 , %r11 , 1)
+	movdqa     \TMP1, \XMM4
+.else
+	movdqu     \XMM1, 16*0(%arg3 , %r11 , 1)
+	movdqu     \XMM2, 16*1(%arg3 , %r11 , 1)
+	movdqu     \XMM3, 16*2(%arg3 , %r11 , 1)
+	movdqu     \XMM4, 16*3(%arg3 , %r11 , 1)
+.endif
 
 
 	add	   $64, %r11
 	add	   $64, %r11
 	PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
 	PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
@@ -710,14 +972,14 @@ aes_loop_pre_enc_done\num_initial_blocks:
 	PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
 	PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
 	PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
 	PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
 
 
-_initial_blocks_done\num_initial_blocks\operation:
+_initial_blocks_done\@:
 
 
 .endm
 .endm
 
 
 /*
 /*
 * encrypt 4 blocks at a time
 * encrypt 4 blocks at a time
 * ghash the 4 previously encrypted ciphertext blocks
 * ghash the 4 previously encrypted ciphertext blocks
-* arg1, %arg2, %arg3 are used as pointers only, not modified
+* arg1, %arg3, %arg4 are used as pointers only, not modified
 * %r11 is the data offset value
 * %r11 is the data offset value
 */
 */
 .macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \
 .macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \
@@ -735,7 +997,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	pshufd	  $78, \XMM5, \TMP6
 	pshufd	  $78, \XMM5, \TMP6
 	pxor	  \XMM5, \TMP6
 	pxor	  \XMM5, \TMP6
 	paddd     ONE(%rip), \XMM0		# INCR CNT
 	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa	  HashKey_4(%rsp), \TMP5
+	movdqa	  HashKey_4(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
 	PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
 	movdqa    \XMM0, \XMM1
 	movdqa    \XMM0, \XMM1
 	paddd     ONE(%rip), \XMM0		# INCR CNT
 	paddd     ONE(%rip), \XMM0		# INCR CNT
@@ -754,7 +1016,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	pxor	  (%arg1), \XMM2
 	pxor	  (%arg1), \XMM2
 	pxor	  (%arg1), \XMM3
 	pxor	  (%arg1), \XMM3
 	pxor	  (%arg1), \XMM4
 	pxor	  (%arg1), \XMM4
-	movdqa	  HashKey_4_k(%rsp), \TMP5
+	movdqa	  HashKey_4_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
 	PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
 	movaps 0x10(%arg1), \TMP1
 	movaps 0x10(%arg1), \TMP1
 	AESENC	  \TMP1, \XMM1              # Round 1
 	AESENC	  \TMP1, \XMM1              # Round 1
@@ -769,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM6, \TMP1
 	movdqa	  \XMM6, \TMP1
 	pshufd	  $78, \XMM6, \TMP2
 	pshufd	  $78, \XMM6, \TMP2
 	pxor	  \XMM6, \TMP2
 	pxor	  \XMM6, \TMP2
-	movdqa	  HashKey_3(%rsp), \TMP5
+	movdqa	  HashKey_3(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
 	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
 	movaps 0x30(%arg1), \TMP3
 	movaps 0x30(%arg1), \TMP3
 	AESENC    \TMP3, \XMM1              # Round 3
 	AESENC    \TMP3, \XMM1              # Round 3
@@ -782,7 +1044,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM4
 	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_3_k(%rsp), \TMP5
+	movdqa	  HashKey_3_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	movaps 0x50(%arg1), \TMP3
 	movaps 0x50(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1              # Round 5
 	AESENC	  \TMP3, \XMM1              # Round 5
@@ -796,7 +1058,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM7, \TMP1
 	movdqa	  \XMM7, \TMP1
 	pshufd	  $78, \XMM7, \TMP2
 	pshufd	  $78, \XMM7, \TMP2
 	pxor	  \XMM7, \TMP2
 	pxor	  \XMM7, \TMP2
-	movdqa	  HashKey_2(%rsp ), \TMP5
+	movdqa	  HashKey_2(%arg2), \TMP5
 
 
         # Multiply TMP5 * HashKey using karatsuba
         # Multiply TMP5 * HashKey using karatsuba
 
 
@@ -812,7 +1074,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM4
 	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_2_k(%rsp), \TMP5
+	movdqa	  HashKey_2_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	movaps 0x80(%arg1), \TMP3
 	movaps 0x80(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1             # Round 8
 	AESENC	  \TMP3, \XMM1             # Round 8
@@ -830,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM8, \TMP1
 	movdqa	  \XMM8, \TMP1
 	pshufd	  $78, \XMM8, \TMP2
 	pshufd	  $78, \XMM8, \TMP2
 	pxor	  \XMM8, \TMP2
 	pxor	  \XMM8, \TMP2
-	movdqa	  HashKey(%rsp), \TMP5
+	movdqa	  HashKey(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
 	PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
 	movaps 0x90(%arg1), \TMP3
 	movaps 0x90(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1            # Round 9
 	AESENC	  \TMP3, \XMM1            # Round 9
@@ -842,37 +1104,37 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	mov	  keysize,%eax
 	mov	  keysize,%eax
 	shr	  $2,%eax			# 128->4, 192->6, 256->8
 	shr	  $2,%eax			# 128->4, 192->6, 256->8
 	sub	  $4,%eax			# 128->0, 192->2, 256->4
 	sub	  $4,%eax			# 128->0, 192->2, 256->4
-	jz	  aes_loop_par_enc_done
+	jz	  aes_loop_par_enc_done\@
 
 
-aes_loop_par_enc:
+aes_loop_par_enc\@:
 	MOVADQ	  (%r10),\TMP3
 	MOVADQ	  (%r10),\TMP3
 .irpc	index, 1234
 .irpc	index, 1234
 	AESENC	  \TMP3, %xmm\index
 	AESENC	  \TMP3, %xmm\index
 .endr
 .endr
 	add	  $16,%r10
 	add	  $16,%r10
 	sub	  $1,%eax
 	sub	  $1,%eax
-	jnz	  aes_loop_par_enc
+	jnz	  aes_loop_par_enc\@
 
 
-aes_loop_par_enc_done:
+aes_loop_par_enc_done\@:
 	MOVADQ	  (%r10), \TMP3
 	MOVADQ	  (%r10), \TMP3
 	AESENCLAST \TMP3, \XMM1           # Round 10
 	AESENCLAST \TMP3, \XMM1           # Round 10
 	AESENCLAST \TMP3, \XMM2
 	AESENCLAST \TMP3, \XMM2
 	AESENCLAST \TMP3, \XMM3
 	AESENCLAST \TMP3, \XMM3
 	AESENCLAST \TMP3, \XMM4
 	AESENCLAST \TMP3, \XMM4
-	movdqa    HashKey_k(%rsp), \TMP5
+	movdqa    HashKey_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
 	PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
-	movdqu	  (%arg3,%r11,1), \TMP3
+	movdqu	  (%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
 	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
-	movdqu	  16(%arg3,%r11,1), \TMP3
+	movdqu	  16(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
 	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
-	movdqu	  32(%arg3,%r11,1), \TMP3
+	movdqu	  32(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
 	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
-	movdqu	  48(%arg3,%r11,1), \TMP3
+	movdqu	  48(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
 	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
-        movdqu    \XMM1, (%arg2,%r11,1)        # Write to the ciphertext buffer
-        movdqu    \XMM2, 16(%arg2,%r11,1)      # Write to the ciphertext buffer
-        movdqu    \XMM3, 32(%arg2,%r11,1)      # Write to the ciphertext buffer
-        movdqu    \XMM4, 48(%arg2,%r11,1)      # Write to the ciphertext buffer
+        movdqu    \XMM1, (%arg3,%r11,1)        # Write to the ciphertext buffer
+        movdqu    \XMM2, 16(%arg3,%r11,1)      # Write to the ciphertext buffer
+        movdqu    \XMM3, 32(%arg3,%r11,1)      # Write to the ciphertext buffer
+        movdqu    \XMM4, 48(%arg3,%r11,1)      # Write to the ciphertext buffer
 	PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
 	PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
 	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
 	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
 	PSHUFB_XMM %xmm15, \XMM3	# perform a 16 byte swap
 	PSHUFB_XMM %xmm15, \XMM3	# perform a 16 byte swap
@@ -925,7 +1187,7 @@ aes_loop_par_enc_done:
 /*
 /*
 * decrypt 4 blocks at a time
 * decrypt 4 blocks at a time
 * ghash the 4 previously decrypted ciphertext blocks
 * ghash the 4 previously decrypted ciphertext blocks
-* arg1, %arg2, %arg3 are used as pointers only, not modified
+* arg1, %arg3, %arg4 are used as pointers only, not modified
 * %r11 is the data offset value
 * %r11 is the data offset value
 */
 */
 .macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \
 .macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \
@@ -943,7 +1205,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	pshufd	  $78, \XMM5, \TMP6
 	pshufd	  $78, \XMM5, \TMP6
 	pxor	  \XMM5, \TMP6
 	pxor	  \XMM5, \TMP6
 	paddd     ONE(%rip), \XMM0		# INCR CNT
 	paddd     ONE(%rip), \XMM0		# INCR CNT
-	movdqa	  HashKey_4(%rsp), \TMP5
+	movdqa	  HashKey_4(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
 	PCLMULQDQ 0x11, \TMP5, \TMP4           # TMP4 = a1*b1
 	movdqa    \XMM0, \XMM1
 	movdqa    \XMM0, \XMM1
 	paddd     ONE(%rip), \XMM0		# INCR CNT
 	paddd     ONE(%rip), \XMM0		# INCR CNT
@@ -962,7 +1224,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	pxor	  (%arg1), \XMM2
 	pxor	  (%arg1), \XMM2
 	pxor	  (%arg1), \XMM3
 	pxor	  (%arg1), \XMM3
 	pxor	  (%arg1), \XMM4
 	pxor	  (%arg1), \XMM4
-	movdqa	  HashKey_4_k(%rsp), \TMP5
+	movdqa	  HashKey_4_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
 	PCLMULQDQ 0x00, \TMP5, \TMP6           # TMP6 = (a1+a0)*(b1+b0)
 	movaps 0x10(%arg1), \TMP1
 	movaps 0x10(%arg1), \TMP1
 	AESENC	  \TMP1, \XMM1              # Round 1
 	AESENC	  \TMP1, \XMM1              # Round 1
@@ -977,7 +1239,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM6, \TMP1
 	movdqa	  \XMM6, \TMP1
 	pshufd	  $78, \XMM6, \TMP2
 	pshufd	  $78, \XMM6, \TMP2
 	pxor	  \XMM6, \TMP2
 	pxor	  \XMM6, \TMP2
-	movdqa	  HashKey_3(%rsp), \TMP5
+	movdqa	  HashKey_3(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
 	PCLMULQDQ 0x11, \TMP5, \TMP1           # TMP1 = a1 * b1
 	movaps 0x30(%arg1), \TMP3
 	movaps 0x30(%arg1), \TMP3
 	AESENC    \TMP3, \XMM1              # Round 3
 	AESENC    \TMP3, \XMM1              # Round 3
@@ -990,7 +1252,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM4
 	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_3_k(%rsp), \TMP5
+	movdqa	  HashKey_3_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	movaps 0x50(%arg1), \TMP3
 	movaps 0x50(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1              # Round 5
 	AESENC	  \TMP3, \XMM1              # Round 5
@@ -1004,7 +1266,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM7, \TMP1
 	movdqa	  \XMM7, \TMP1
 	pshufd	  $78, \XMM7, \TMP2
 	pshufd	  $78, \XMM7, \TMP2
 	pxor	  \XMM7, \TMP2
 	pxor	  \XMM7, \TMP2
-	movdqa	  HashKey_2(%rsp ), \TMP5
+	movdqa	  HashKey_2(%arg2), \TMP5
 
 
         # Multiply TMP5 * HashKey using karatsuba
         # Multiply TMP5 * HashKey using karatsuba
 
 
@@ -1020,7 +1282,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM2
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM3
 	AESENC	  \TMP3, \XMM4
 	AESENC	  \TMP3, \XMM4
-	movdqa	  HashKey_2_k(%rsp), \TMP5
+	movdqa	  HashKey_2_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	PCLMULQDQ 0x00, \TMP5, \TMP2           # TMP2 = (a1+a0)*(b1+b0)
 	movaps 0x80(%arg1), \TMP3
 	movaps 0x80(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1             # Round 8
 	AESENC	  \TMP3, \XMM1             # Round 8
@@ -1038,7 +1300,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	movdqa	  \XMM8, \TMP1
 	movdqa	  \XMM8, \TMP1
 	pshufd	  $78, \XMM8, \TMP2
 	pshufd	  $78, \XMM8, \TMP2
 	pxor	  \XMM8, \TMP2
 	pxor	  \XMM8, \TMP2
-	movdqa	  HashKey(%rsp), \TMP5
+	movdqa	  HashKey(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
 	PCLMULQDQ 0x11, \TMP5, \TMP1          # TMP1 = a1*b1
 	movaps 0x90(%arg1), \TMP3
 	movaps 0x90(%arg1), \TMP3
 	AESENC	  \TMP3, \XMM1            # Round 9
 	AESENC	  \TMP3, \XMM1            # Round 9
@@ -1050,40 +1312,40 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
 	mov	  keysize,%eax
 	mov	  keysize,%eax
 	shr	  $2,%eax		        # 128->4, 192->6, 256->8
 	shr	  $2,%eax		        # 128->4, 192->6, 256->8
 	sub	  $4,%eax			# 128->0, 192->2, 256->4
 	sub	  $4,%eax			# 128->0, 192->2, 256->4
-	jz	  aes_loop_par_dec_done
+	jz	  aes_loop_par_dec_done\@
 
 
-aes_loop_par_dec:
+aes_loop_par_dec\@:
 	MOVADQ	  (%r10),\TMP3
 	MOVADQ	  (%r10),\TMP3
 .irpc	index, 1234
 .irpc	index, 1234
 	AESENC	  \TMP3, %xmm\index
 	AESENC	  \TMP3, %xmm\index
 .endr
 .endr
 	add	  $16,%r10
 	add	  $16,%r10
 	sub	  $1,%eax
 	sub	  $1,%eax
-	jnz	  aes_loop_par_dec
+	jnz	  aes_loop_par_dec\@
 
 
-aes_loop_par_dec_done:
+aes_loop_par_dec_done\@:
 	MOVADQ	  (%r10), \TMP3
 	MOVADQ	  (%r10), \TMP3
 	AESENCLAST \TMP3, \XMM1           # last round
 	AESENCLAST \TMP3, \XMM1           # last round
 	AESENCLAST \TMP3, \XMM2
 	AESENCLAST \TMP3, \XMM2
 	AESENCLAST \TMP3, \XMM3
 	AESENCLAST \TMP3, \XMM3
 	AESENCLAST \TMP3, \XMM4
 	AESENCLAST \TMP3, \XMM4
-	movdqa    HashKey_k(%rsp), \TMP5
+	movdqa    HashKey_k(%arg2), \TMP5
 	PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
 	PCLMULQDQ 0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
-	movdqu	  (%arg3,%r11,1), \TMP3
+	movdqu	  (%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
 	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM1, (%arg2,%r11,1)        # Write to plaintext buffer
+	movdqu	  \XMM1, (%arg3,%r11,1)        # Write to plaintext buffer
 	movdqa    \TMP3, \XMM1
 	movdqa    \TMP3, \XMM1
-	movdqu	  16(%arg3,%r11,1), \TMP3
+	movdqu	  16(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
 	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM2, 16(%arg2,%r11,1)      # Write to plaintext buffer
+	movdqu	  \XMM2, 16(%arg3,%r11,1)      # Write to plaintext buffer
 	movdqa    \TMP3, \XMM2
 	movdqa    \TMP3, \XMM2
-	movdqu	  32(%arg3,%r11,1), \TMP3
+	movdqu	  32(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
 	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM3, 32(%arg2,%r11,1)      # Write to plaintext buffer
+	movdqu	  \XMM3, 32(%arg3,%r11,1)      # Write to plaintext buffer
 	movdqa    \TMP3, \XMM3
 	movdqa    \TMP3, \XMM3
-	movdqu	  48(%arg3,%r11,1), \TMP3
+	movdqu	  48(%arg4,%r11,1), \TMP3
 	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
 	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
-	movdqu	  \XMM4, 48(%arg2,%r11,1)      # Write to plaintext buffer
+	movdqu	  \XMM4, 48(%arg3,%r11,1)      # Write to plaintext buffer
 	movdqa    \TMP3, \XMM4
 	movdqa    \TMP3, \XMM4
 	PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
 	PSHUFB_XMM %xmm15, \XMM1        # perform a 16 byte swap
 	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
 	PSHUFB_XMM %xmm15, \XMM2	# perform a 16 byte swap
@@ -1143,10 +1405,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 	movdqa	  \XMM1, \TMP6
 	movdqa	  \XMM1, \TMP6
 	pshufd	  $78, \XMM1, \TMP2
 	pshufd	  $78, \XMM1, \TMP2
 	pxor	  \XMM1, \TMP2
 	pxor	  \XMM1, \TMP2
-	movdqa	  HashKey_4(%rsp), \TMP5
+	movdqa	  HashKey_4(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP6       # TMP6 = a1*b1
 	PCLMULQDQ 0x11, \TMP5, \TMP6       # TMP6 = a1*b1
 	PCLMULQDQ 0x00, \TMP5, \XMM1       # XMM1 = a0*b0
 	PCLMULQDQ 0x00, \TMP5, \XMM1       # XMM1 = a0*b0
-	movdqa	  HashKey_4_k(%rsp), \TMP4
+	movdqa	  HashKey_4_k(%arg2), \TMP4
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	movdqa	  \XMM1, \XMMDst
 	movdqa	  \XMM1, \XMMDst
 	movdqa	  \TMP2, \XMM1              # result in TMP6, XMMDst, XMM1
 	movdqa	  \TMP2, \XMM1              # result in TMP6, XMMDst, XMM1
@@ -1156,10 +1418,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 	movdqa	  \XMM2, \TMP1
 	movdqa	  \XMM2, \TMP1
 	pshufd	  $78, \XMM2, \TMP2
 	pshufd	  $78, \XMM2, \TMP2
 	pxor	  \XMM2, \TMP2
 	pxor	  \XMM2, \TMP2
-	movdqa	  HashKey_3(%rsp), \TMP5
+	movdqa	  HashKey_3(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
 	PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
 	PCLMULQDQ 0x00, \TMP5, \XMM2       # XMM2 = a0*b0
 	PCLMULQDQ 0x00, \TMP5, \XMM2       # XMM2 = a0*b0
-	movdqa	  HashKey_3_k(%rsp), \TMP4
+	movdqa	  HashKey_3_k(%arg2), \TMP4
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	pxor	  \TMP1, \TMP6
 	pxor	  \TMP1, \TMP6
 	pxor	  \XMM2, \XMMDst
 	pxor	  \XMM2, \XMMDst
@@ -1171,10 +1433,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 	movdqa	  \XMM3, \TMP1
 	movdqa	  \XMM3, \TMP1
 	pshufd	  $78, \XMM3, \TMP2
 	pshufd	  $78, \XMM3, \TMP2
 	pxor	  \XMM3, \TMP2
 	pxor	  \XMM3, \TMP2
-	movdqa	  HashKey_2(%rsp), \TMP5
+	movdqa	  HashKey_2(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
 	PCLMULQDQ 0x11, \TMP5, \TMP1       # TMP1 = a1*b1
 	PCLMULQDQ 0x00, \TMP5, \XMM3       # XMM3 = a0*b0
 	PCLMULQDQ 0x00, \TMP5, \XMM3       # XMM3 = a0*b0
-	movdqa	  HashKey_2_k(%rsp), \TMP4
+	movdqa	  HashKey_2_k(%arg2), \TMP4
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	pxor	  \TMP1, \TMP6
 	pxor	  \TMP1, \TMP6
 	pxor	  \XMM3, \XMMDst
 	pxor	  \XMM3, \XMMDst
@@ -1184,10 +1446,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 	movdqa	  \XMM4, \TMP1
 	movdqa	  \XMM4, \TMP1
 	pshufd	  $78, \XMM4, \TMP2
 	pshufd	  $78, \XMM4, \TMP2
 	pxor	  \XMM4, \TMP2
 	pxor	  \XMM4, \TMP2
-	movdqa	  HashKey(%rsp), \TMP5
+	movdqa	  HashKey(%arg2), \TMP5
 	PCLMULQDQ 0x11, \TMP5, \TMP1	    # TMP1 = a1*b1
 	PCLMULQDQ 0x11, \TMP5, \TMP1	    # TMP1 = a1*b1
 	PCLMULQDQ 0x00, \TMP5, \XMM4       # XMM4 = a0*b0
 	PCLMULQDQ 0x00, \TMP5, \XMM4       # XMM4 = a0*b0
-	movdqa	  HashKey_k(%rsp), \TMP4
+	movdqa	  HashKey_k(%arg2), \TMP4
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	PCLMULQDQ 0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
 	pxor	  \TMP1, \TMP6
 	pxor	  \TMP1, \TMP6
 	pxor	  \XMM4, \XMMDst
 	pxor	  \XMM4, \XMMDst
@@ -1256,6 +1518,8 @@ _esb_loop_\@:
 .endm
 .endm
 /*****************************************************************************
 /*****************************************************************************
 * void aesni_gcm_dec(void *aes_ctx,    // AES Key schedule. Starts on a 16 byte boundary.
 * void aesni_gcm_dec(void *aes_ctx,    // AES Key schedule. Starts on a 16 byte boundary.
+*                   struct gcm_context_data *data
+*                                      // Context data
 *                   u8 *out,           // Plaintext output. Encrypt in-place is allowed.
 *                   u8 *out,           // Plaintext output. Encrypt in-place is allowed.
 *                   const u8 *in,      // Ciphertext input
 *                   const u8 *in,      // Ciphertext input
 *                   u64 plaintext_len, // Length of data in bytes for decryption.
 *                   u64 plaintext_len, // Length of data in bytes for decryption.
@@ -1333,195 +1597,20 @@ _esb_loop_\@:
 *
 *
 *****************************************************************************/
 *****************************************************************************/
 ENTRY(aesni_gcm_dec)
 ENTRY(aesni_gcm_dec)
-	push	%r12
-	push	%r13
-	push	%r14
-	mov	%rsp, %r14
-/*
-* states of %xmm registers %xmm6:%xmm15 not saved
-* all %xmm registers are clobbered
-*/
-	sub	$VARIABLE_OFFSET, %rsp
-	and	$~63, %rsp                        # align rsp to 64 bytes
-	mov	%arg6, %r12
-	movdqu	(%r12), %xmm13			  # %xmm13 = HashKey
-        movdqa  SHUF_MASK(%rip), %xmm2
-	PSHUFB_XMM %xmm2, %xmm13
-
-
-# Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH)
-
-	movdqa	%xmm13, %xmm2
-	psllq	$1, %xmm13
-	psrlq	$63, %xmm2
-	movdqa	%xmm2, %xmm1
-	pslldq	$8, %xmm2
-	psrldq	$8, %xmm1
-	por	%xmm2, %xmm13
-
-        # Reduction
-
-	pshufd	$0x24, %xmm1, %xmm2
-	pcmpeqd TWOONE(%rip), %xmm2
-	pand	POLY(%rip), %xmm2
-	pxor	%xmm2, %xmm13     # %xmm13 holds the HashKey<<1 (mod poly)
-
-
-        # Decrypt first few blocks
-
-	movdqa %xmm13, HashKey(%rsp)           # store HashKey<<1 (mod poly)
-	mov %arg4, %r13    # save the number of bytes of plaintext/ciphertext
-	and $-16, %r13                      # %r13 = %r13 - (%r13 mod 16)
-	mov %r13, %r12
-	and $(3<<4), %r12
-	jz _initial_num_blocks_is_0_decrypt
-	cmp $(2<<4), %r12
-	jb _initial_num_blocks_is_1_decrypt
-	je _initial_num_blocks_is_2_decrypt
-_initial_num_blocks_is_3_decrypt:
-	INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec
-	sub	$48, %r13
-	jmp	_initial_blocks_decrypted
-_initial_num_blocks_is_2_decrypt:
-	INITIAL_BLOCKS_DEC	2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec
-	sub	$32, %r13
-	jmp	_initial_blocks_decrypted
-_initial_num_blocks_is_1_decrypt:
-	INITIAL_BLOCKS_DEC	1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec
-	sub	$16, %r13
-	jmp	_initial_blocks_decrypted
-_initial_num_blocks_is_0_decrypt:
-	INITIAL_BLOCKS_DEC	0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec
-_initial_blocks_decrypted:
-	cmp	$0, %r13
-	je	_zero_cipher_left_decrypt
-	sub	$64, %r13
-	je	_four_cipher_left_decrypt
-_decrypt_by_4:
-	GHASH_4_ENCRYPT_4_PARALLEL_DEC	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
-%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec
-	add	$64, %r11
-	sub	$64, %r13
-	jne	_decrypt_by_4
-_four_cipher_left_decrypt:
-	GHASH_LAST_4	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
-%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
-_zero_cipher_left_decrypt:
-	mov	%arg4, %r13
-	and	$15, %r13				# %r13 = arg4 (mod 16)
-	je	_multiple_of_16_bytes_decrypt
-
-        # Handle the last <16 byte block separately
+	FUNC_SAVE
 
 
-	paddd ONE(%rip), %xmm0         # increment CNT to get Yn
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm0
-
-	ENCRYPT_SINGLE_BLOCK  %xmm0, %xmm1    # E(K, Yn)
-
-	lea (%arg3,%r11,1), %r10
-	mov %r13, %r12
-	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
-
-	lea ALL_F+16(%rip), %r12
-	sub %r13, %r12
-	movdqa  %xmm1, %xmm2
-	pxor %xmm1, %xmm0            # Ciphertext XOR E(K, Yn)
-	movdqu (%r12), %xmm1
-	# get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
-	pand %xmm1, %xmm0            # mask out top 16-%r13 bytes of %xmm0
-	pand    %xmm1, %xmm2
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10 ,%xmm2
-
-	pxor %xmm2, %xmm8
-	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-
-        # output %r13 bytes
-	MOVQ_R64_XMM	%xmm0, %rax
-	cmp	$8, %r13
-	jle	_less_than_8_bytes_left_decrypt
-	mov	%rax, (%arg2 , %r11, 1)
-	add	$8, %r11
-	psrldq	$8, %xmm0
-	MOVQ_R64_XMM	%xmm0, %rax
-	sub	$8, %r13
-_less_than_8_bytes_left_decrypt:
-	mov	%al,  (%arg2, %r11, 1)
-	add	$1, %r11
-	shr	$8, %rax
-	sub	$1, %r13
-	jne	_less_than_8_bytes_left_decrypt
-_multiple_of_16_bytes_decrypt:
-	mov	arg8, %r12		  # %r13 = aadLen (number of bytes)
-	shl	$3, %r12		  # convert into number of bits
-	movd	%r12d, %xmm15		  # len(A) in %xmm15
-	shl	$3, %arg4		  # len(C) in bits (*128)
-	MOVQ_R64_XMM	%arg4, %xmm1
-	pslldq	$8, %xmm15		  # %xmm15 = len(A)||0x0000000000000000
-	pxor	%xmm1, %xmm15		  # %xmm15 = len(A)||len(C)
-	pxor	%xmm15, %xmm8
-	GHASH_MUL	%xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	         # final GHASH computation
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm8
-
-	mov	%arg5, %rax		  # %rax = *Y0
-	movdqu	(%rax), %xmm0		  # %xmm0 = Y0
-	ENCRYPT_SINGLE_BLOCK	%xmm0,  %xmm1	  # E(K, Y0)
-	pxor	%xmm8, %xmm0
-_return_T_decrypt:
-	mov	arg9, %r10                # %r10 = authTag
-	mov	arg10, %r11               # %r11 = auth_tag_len
-	cmp	$16, %r11
-	je	_T_16_decrypt
-	cmp	$8, %r11
-	jl	_T_4_decrypt
-_T_8_decrypt:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
-	add	$8, %r10
-	sub	$8, %r11
-	psrldq	$8, %xmm0
-	cmp	$0, %r11
-	je	_return_T_done_decrypt
-_T_4_decrypt:
-	movd	%xmm0, %eax
-	mov	%eax, (%r10)
-	add	$4, %r10
-	sub	$4, %r11
-	psrldq	$4, %xmm0
-	cmp	$0, %r11
-	je	_return_T_done_decrypt
-_T_123_decrypt:
-	movd	%xmm0, %eax
-	cmp	$2, %r11
-	jl	_T_1_decrypt
-	mov	%ax, (%r10)
-	cmp	$2, %r11
-	je	_return_T_done_decrypt
-	add	$2, %r10
-	sar	$16, %eax
-_T_1_decrypt:
-	mov	%al, (%r10)
-	jmp	_return_T_done_decrypt
-_T_16_decrypt:
-	movdqu	%xmm0, (%r10)
-_return_T_done_decrypt:
-	mov	%r14, %rsp
-	pop	%r14
-	pop	%r13
-	pop	%r12
+	GCM_INIT %arg6, arg7, arg8, arg9
+	GCM_ENC_DEC dec
+	GCM_COMPLETE arg10, arg11
+	FUNC_RESTORE
 	ret
 	ret
 ENDPROC(aesni_gcm_dec)
 ENDPROC(aesni_gcm_dec)
 
 
 
 
 /*****************************************************************************
 /*****************************************************************************
 * void aesni_gcm_enc(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
 * void aesni_gcm_enc(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                    struct gcm_context_data *data
+*                                        // Context data
 *                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
 *                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
 *                    const u8 *in,       // Plaintext input
 *                    const u8 *in,       // Plaintext input
 *                    u64 plaintext_len,  // Length of data in bytes for encryption.
 *                    u64 plaintext_len,  // Length of data in bytes for encryption.
@@ -1596,195 +1685,78 @@ ENDPROC(aesni_gcm_dec)
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 ***************************************************************************/
 ***************************************************************************/
 ENTRY(aesni_gcm_enc)
 ENTRY(aesni_gcm_enc)
-	push	%r12
-	push	%r13
-	push	%r14
-	mov	%rsp, %r14
-#
-# states of %xmm registers %xmm6:%xmm15 not saved
-# all %xmm registers are clobbered
-#
-	sub	$VARIABLE_OFFSET, %rsp
-	and	$~63, %rsp
-	mov	%arg6, %r12
-	movdqu	(%r12), %xmm13
-        movdqa  SHUF_MASK(%rip), %xmm2
-	PSHUFB_XMM %xmm2, %xmm13
-
-
-# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
-
-	movdqa	%xmm13, %xmm2
-	psllq	$1, %xmm13
-	psrlq	$63, %xmm2
-	movdqa	%xmm2, %xmm1
-	pslldq	$8, %xmm2
-	psrldq	$8, %xmm1
-	por	%xmm2, %xmm13
-
-        # reduce HashKey<<1
-
-	pshufd	$0x24, %xmm1, %xmm2
-	pcmpeqd TWOONE(%rip), %xmm2
-	pand	POLY(%rip), %xmm2
-	pxor	%xmm2, %xmm13
-	movdqa	%xmm13, HashKey(%rsp)
-	mov	%arg4, %r13            # %xmm13 holds HashKey<<1 (mod poly)
-	and	$-16, %r13
-	mov	%r13, %r12
-
-        # Encrypt first few blocks
-
-	and	$(3<<4), %r12
-	jz	_initial_num_blocks_is_0_encrypt
-	cmp	$(2<<4), %r12
-	jb	_initial_num_blocks_is_1_encrypt
-	je	_initial_num_blocks_is_2_encrypt
-_initial_num_blocks_is_3_encrypt:
-	INITIAL_BLOCKS_ENC	3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc
-	sub	$48, %r13
-	jmp	_initial_blocks_encrypted
-_initial_num_blocks_is_2_encrypt:
-	INITIAL_BLOCKS_ENC	2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc
-	sub	$32, %r13
-	jmp	_initial_blocks_encrypted
-_initial_num_blocks_is_1_encrypt:
-	INITIAL_BLOCKS_ENC	1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc
-	sub	$16, %r13
-	jmp	_initial_blocks_encrypted
-_initial_num_blocks_is_0_encrypt:
-	INITIAL_BLOCKS_ENC	0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc
-_initial_blocks_encrypted:
-
-        # Main loop - Encrypt remaining blocks
-
-	cmp	$0, %r13
-	je	_zero_cipher_left_encrypt
-	sub	$64, %r13
-	je	_four_cipher_left_encrypt
-_encrypt_by_4_encrypt:
-	GHASH_4_ENCRYPT_4_PARALLEL_ENC	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
-%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc
-	add	$64, %r11
-	sub	$64, %r13
-	jne	_encrypt_by_4_encrypt
-_four_cipher_left_encrypt:
-	GHASH_LAST_4	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
-%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
-_zero_cipher_left_encrypt:
-	mov	%arg4, %r13
-	and	$15, %r13			# %r13 = arg4 (mod 16)
-	je	_multiple_of_16_bytes_encrypt
+	FUNC_SAVE
 
 
-         # Handle the last <16 Byte block separately
-	paddd ONE(%rip), %xmm0                # INCR CNT to get Yn
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm0
-
-	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm1        # Encrypt(K, Yn)
+	GCM_INIT %arg6, arg7, arg8, arg9
+	GCM_ENC_DEC enc
 
 
-	lea (%arg3,%r11,1), %r10
-	mov %r13, %r12
-	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
-
-	lea ALL_F+16(%rip), %r12
-	sub %r13, %r12
-	pxor	%xmm1, %xmm0            # Plaintext XOR Encrypt(K, Yn)
-	movdqu	(%r12), %xmm1
-	# get the appropriate mask to mask out top 16-r13 bytes of xmm0
-	pand	%xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10,%xmm0
+	GCM_COMPLETE arg10, arg11
+	FUNC_RESTORE
+	ret
+ENDPROC(aesni_gcm_enc)
 
 
-	pxor	%xmm0, %xmm8
-	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	# GHASH computation for the last <16 byte block
-	movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm0
+/*****************************************************************************
+* void aesni_gcm_init(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                     struct gcm_context_data *data,
+*                                         // context data
+*                     u8 *iv,             // Pre-counter block j0: 4 byte salt (from Security Association)
+*                                         // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
+*                                         // concatenated with 0x00000001. 16-byte aligned pointer.
+*                     u8 *hash_subkey,    // H, the Hash sub key input. Data starts on a 16-byte boundary.
+*                     const u8 *aad,      // Additional Authentication Data (AAD)
+*                     u64 aad_len)        // Length of AAD in bytes.
+*/
+ENTRY(aesni_gcm_init)
+	FUNC_SAVE
+	GCM_INIT %arg3, %arg4,%arg5, %arg6
+	FUNC_RESTORE
+	ret
+ENDPROC(aesni_gcm_init)
 
 
-	# shuffle xmm0 back to output as ciphertext
+/*****************************************************************************
+* void aesni_gcm_enc_update(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                    struct gcm_context_data *data,
+*                                        // context data
+*                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
+*                    const u8 *in,       // Plaintext input
+*                    u64 plaintext_len,  // Length of data in bytes for encryption.
+*/
+ENTRY(aesni_gcm_enc_update)
+	FUNC_SAVE
+	GCM_ENC_DEC enc
+	FUNC_RESTORE
+	ret
+ENDPROC(aesni_gcm_enc_update)
 
 
-        # Output %r13 bytes
-	MOVQ_R64_XMM %xmm0, %rax
-	cmp $8, %r13
-	jle _less_than_8_bytes_left_encrypt
-	mov %rax, (%arg2 , %r11, 1)
-	add $8, %r11
-	psrldq $8, %xmm0
-	MOVQ_R64_XMM %xmm0, %rax
-	sub $8, %r13
-_less_than_8_bytes_left_encrypt:
-	mov %al,  (%arg2, %r11, 1)
-	add $1, %r11
-	shr $8, %rax
-	sub $1, %r13
-	jne _less_than_8_bytes_left_encrypt
-_multiple_of_16_bytes_encrypt:
-	mov	arg8, %r12    # %r12 = addLen (number of bytes)
-	shl	$3, %r12
-	movd	%r12d, %xmm15       # len(A) in %xmm15
-	shl	$3, %arg4               # len(C) in bits (*128)
-	MOVQ_R64_XMM	%arg4, %xmm1
-	pslldq	$8, %xmm15          # %xmm15 = len(A)||0x0000000000000000
-	pxor	%xmm1, %xmm15       # %xmm15 = len(A)||len(C)
-	pxor	%xmm15, %xmm8
-	GHASH_MUL	%xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	# final GHASH computation
-        movdqa SHUF_MASK(%rip), %xmm10
-	PSHUFB_XMM %xmm10, %xmm8         # perform a 16 byte swap
+/*****************************************************************************
+* void aesni_gcm_dec_update(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                    struct gcm_context_data *data,
+*                                        // context data
+*                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
+*                    const u8 *in,       // Plaintext input
+*                    u64 plaintext_len,  // Length of data in bytes for encryption.
+*/
+ENTRY(aesni_gcm_dec_update)
+	FUNC_SAVE
+	GCM_ENC_DEC dec
+	FUNC_RESTORE
+	ret
+ENDPROC(aesni_gcm_dec_update)
 
 
-	mov	%arg5, %rax		       # %rax  = *Y0
-	movdqu	(%rax), %xmm0		       # %xmm0 = Y0
-	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm15         # Encrypt(K, Y0)
-	pxor	%xmm8, %xmm0
-_return_T_encrypt:
-	mov	arg9, %r10                     # %r10 = authTag
-	mov	arg10, %r11                    # %r11 = auth_tag_len
-	cmp	$16, %r11
-	je	_T_16_encrypt
-	cmp	$8, %r11
-	jl	_T_4_encrypt
-_T_8_encrypt:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
-	add	$8, %r10
-	sub	$8, %r11
-	psrldq	$8, %xmm0
-	cmp	$0, %r11
-	je	_return_T_done_encrypt
-_T_4_encrypt:
-	movd	%xmm0, %eax
-	mov	%eax, (%r10)
-	add	$4, %r10
-	sub	$4, %r11
-	psrldq	$4, %xmm0
-	cmp	$0, %r11
-	je	_return_T_done_encrypt
-_T_123_encrypt:
-	movd	%xmm0, %eax
-	cmp	$2, %r11
-	jl	_T_1_encrypt
-	mov	%ax, (%r10)
-	cmp	$2, %r11
-	je	_return_T_done_encrypt
-	add	$2, %r10
-	sar	$16, %eax
-_T_1_encrypt:
-	mov	%al, (%r10)
-	jmp	_return_T_done_encrypt
-_T_16_encrypt:
-	movdqu	%xmm0, (%r10)
-_return_T_done_encrypt:
-	mov	%r14, %rsp
-	pop	%r14
-	pop	%r13
-	pop	%r12
+/*****************************************************************************
+* void aesni_gcm_finalize(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
+*                    struct gcm_context_data *data,
+*                                        // context data
+*                    u8 *auth_tag,       // Authenticated Tag output.
+*                    u64 auth_tag_len);  // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
+*                                        // 12 or 8.
+*/
+ENTRY(aesni_gcm_finalize)
+	FUNC_SAVE
+	GCM_COMPLETE %arg3 %arg4
+	FUNC_RESTORE
 	ret
 	ret
-ENDPROC(aesni_gcm_enc)
+ENDPROC(aesni_gcm_finalize)
 
 
 #endif
 #endif
 
 

+ 206 - 24
arch/x86/crypto/aesni-intel_glue.c

@@ -72,6 +72,21 @@ struct aesni_xts_ctx {
 	u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
 	u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
 };
 };
 
 
+#define GCM_BLOCK_LEN 16
+
+struct gcm_context_data {
+	/* init, update and finalize context data */
+	u8 aad_hash[GCM_BLOCK_LEN];
+	u64 aad_length;
+	u64 in_length;
+	u8 partial_block_enc_key[GCM_BLOCK_LEN];
+	u8 orig_IV[GCM_BLOCK_LEN];
+	u8 current_counter[GCM_BLOCK_LEN];
+	u64 partial_block_len;
+	u64 unused;
+	u8 hash_keys[GCM_BLOCK_LEN * 8];
+};
+
 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
 			     unsigned int key_len);
 			     unsigned int key_len);
 asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
 asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
@@ -105,6 +120,7 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
 
 
 /* asmlinkage void aesni_gcm_enc()
 /* asmlinkage void aesni_gcm_enc()
  * void *ctx,  AES Key schedule. Starts on a 16 byte boundary.
  * void *ctx,  AES Key schedule. Starts on a 16 byte boundary.
+ * struct gcm_context_data.  May be uninitialized.
  * u8 *out, Ciphertext output. Encrypt in-place is allowed.
  * u8 *out, Ciphertext output. Encrypt in-place is allowed.
  * const u8 *in, Plaintext input
  * const u8 *in, Plaintext input
  * unsigned long plaintext_len, Length of data in bytes for encryption.
  * unsigned long plaintext_len, Length of data in bytes for encryption.
@@ -117,13 +133,15 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
  * unsigned long auth_tag_len), Authenticated Tag Length in bytes.
  * unsigned long auth_tag_len), Authenticated Tag Length in bytes.
  *          Valid values are 16 (most likely), 12 or 8.
  *          Valid values are 16 (most likely), 12 or 8.
  */
  */
-asmlinkage void aesni_gcm_enc(void *ctx, u8 *out,
+asmlinkage void aesni_gcm_enc(void *ctx,
+			struct gcm_context_data *gdata, u8 *out,
 			const u8 *in, unsigned long plaintext_len, u8 *iv,
 			const u8 *in, unsigned long plaintext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len);
 			u8 *auth_tag, unsigned long auth_tag_len);
 
 
 /* asmlinkage void aesni_gcm_dec()
 /* asmlinkage void aesni_gcm_dec()
  * void *ctx, AES Key schedule. Starts on a 16 byte boundary.
  * void *ctx, AES Key schedule. Starts on a 16 byte boundary.
+ * struct gcm_context_data.  May be uninitialized.
  * u8 *out, Plaintext output. Decrypt in-place is allowed.
  * u8 *out, Plaintext output. Decrypt in-place is allowed.
  * const u8 *in, Ciphertext input
  * const u8 *in, Ciphertext input
  * unsigned long ciphertext_len, Length of data in bytes for decryption.
  * unsigned long ciphertext_len, Length of data in bytes for decryption.
@@ -137,11 +155,28 @@ asmlinkage void aesni_gcm_enc(void *ctx, u8 *out,
  * unsigned long auth_tag_len) Authenticated Tag Length in bytes.
  * unsigned long auth_tag_len) Authenticated Tag Length in bytes.
  * Valid values are 16 (most likely), 12 or 8.
  * Valid values are 16 (most likely), 12 or 8.
  */
  */
-asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
+asmlinkage void aesni_gcm_dec(void *ctx,
+			struct gcm_context_data *gdata, u8 *out,
 			const u8 *in, unsigned long ciphertext_len, u8 *iv,
 			const u8 *in, unsigned long ciphertext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len);
 			u8 *auth_tag, unsigned long auth_tag_len);
 
 
+/* Scatter / Gather routines, with args similar to above */
+asmlinkage void aesni_gcm_init(void *ctx,
+			       struct gcm_context_data *gdata,
+			       u8 *iv,
+			       u8 *hash_subkey, const u8 *aad,
+			       unsigned long aad_len);
+asmlinkage void aesni_gcm_enc_update(void *ctx,
+				     struct gcm_context_data *gdata, u8 *out,
+				     const u8 *in, unsigned long plaintext_len);
+asmlinkage void aesni_gcm_dec_update(void *ctx,
+				     struct gcm_context_data *gdata, u8 *out,
+				     const u8 *in,
+				     unsigned long ciphertext_len);
+asmlinkage void aesni_gcm_finalize(void *ctx,
+				   struct gcm_context_data *gdata,
+				   u8 *auth_tag, unsigned long auth_tag_len);
 
 
 #ifdef CONFIG_AS_AVX
 #ifdef CONFIG_AS_AVX
 asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
 asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
@@ -167,15 +202,17 @@ asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out,
 			const u8 *aad, unsigned long aad_len,
 			const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len);
 			u8 *auth_tag, unsigned long auth_tag_len);
 
 
-static void aesni_gcm_enc_avx(void *ctx, u8 *out,
+static void aesni_gcm_enc_avx(void *ctx,
+			struct gcm_context_data *data, u8 *out,
 			const u8 *in, unsigned long plaintext_len, u8 *iv,
 			const u8 *in, unsigned long plaintext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len)
 			u8 *auth_tag, unsigned long auth_tag_len)
 {
 {
         struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
         struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
 	if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){
 	if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){
-		aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
-				aad_len, auth_tag, auth_tag_len);
+		aesni_gcm_enc(ctx, data, out, in,
+			plaintext_len, iv, hash_subkey, aad,
+			aad_len, auth_tag, auth_tag_len);
 	} else {
 	} else {
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
 		aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
@@ -183,15 +220,17 @@ static void aesni_gcm_enc_avx(void *ctx, u8 *out,
 	}
 	}
 }
 }
 
 
-static void aesni_gcm_dec_avx(void *ctx, u8 *out,
+static void aesni_gcm_dec_avx(void *ctx,
+			struct gcm_context_data *data, u8 *out,
 			const u8 *in, unsigned long ciphertext_len, u8 *iv,
 			const u8 *in, unsigned long ciphertext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len)
 			u8 *auth_tag, unsigned long auth_tag_len)
 {
 {
         struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
         struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
 	if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
 	if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
-		aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad,
-				aad_len, auth_tag, auth_tag_len);
+		aesni_gcm_dec(ctx, data, out, in,
+			ciphertext_len, iv, hash_subkey, aad,
+			aad_len, auth_tag, auth_tag_len);
 	} else {
 	} else {
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
 		aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
@@ -218,15 +257,17 @@ asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out,
 			const u8 *aad, unsigned long aad_len,
 			const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len);
 			u8 *auth_tag, unsigned long auth_tag_len);
 
 
-static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
+static void aesni_gcm_enc_avx2(void *ctx,
+			struct gcm_context_data *data, u8 *out,
 			const u8 *in, unsigned long plaintext_len, u8 *iv,
 			const u8 *in, unsigned long plaintext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len)
 			u8 *auth_tag, unsigned long auth_tag_len)
 {
 {
        struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
        struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
 	if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
 	if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
-		aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
-				aad_len, auth_tag, auth_tag_len);
+		aesni_gcm_enc(ctx, data, out, in,
+			      plaintext_len, iv, hash_subkey, aad,
+			      aad_len, auth_tag, auth_tag_len);
 	} else if (plaintext_len < AVX_GEN4_OPTSIZE) {
 	} else if (plaintext_len < AVX_GEN4_OPTSIZE) {
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
 		aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
@@ -238,15 +279,17 @@ static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
 	}
 	}
 }
 }
 
 
-static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
+static void aesni_gcm_dec_avx2(void *ctx,
+	struct gcm_context_data *data, u8 *out,
 			const u8 *in, unsigned long ciphertext_len, u8 *iv,
 			const u8 *in, unsigned long ciphertext_len, u8 *iv,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
 			u8 *auth_tag, unsigned long auth_tag_len)
 			u8 *auth_tag, unsigned long auth_tag_len)
 {
 {
        struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
        struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
 	if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
 	if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
-		aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey,
-				aad, aad_len, auth_tag, auth_tag_len);
+		aesni_gcm_dec(ctx, data, out, in,
+			      ciphertext_len, iv, hash_subkey,
+			      aad, aad_len, auth_tag, auth_tag_len);
 	} else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
 	} else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
 		aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
 		aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
@@ -259,15 +302,19 @@ static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
 }
 }
 #endif
 #endif
 
 
-static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out,
-			const u8 *in, unsigned long plaintext_len, u8 *iv,
-			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
-			u8 *auth_tag, unsigned long auth_tag_len);
+static void (*aesni_gcm_enc_tfm)(void *ctx,
+				 struct gcm_context_data *data, u8 *out,
+				 const u8 *in, unsigned long plaintext_len,
+				 u8 *iv, u8 *hash_subkey, const u8 *aad,
+				 unsigned long aad_len, u8 *auth_tag,
+				 unsigned long auth_tag_len);
 
 
-static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out,
-			const u8 *in, unsigned long ciphertext_len, u8 *iv,
-			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
-			u8 *auth_tag, unsigned long auth_tag_len);
+static void (*aesni_gcm_dec_tfm)(void *ctx,
+				 struct gcm_context_data *data, u8 *out,
+				 const u8 *in, unsigned long ciphertext_len,
+				 u8 *iv, u8 *hash_subkey, const u8 *aad,
+				 unsigned long aad_len, u8 *auth_tag,
+				 unsigned long auth_tag_len);
 
 
 static inline struct
 static inline struct
 aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
 aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
@@ -744,6 +791,127 @@ static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
 	return 0;
 	return 0;
 }
 }
 
 
+static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
+			      unsigned int assoclen, u8 *hash_subkey,
+			      u8 *iv, void *aes_ctx)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
+	struct gcm_context_data data AESNI_ALIGN_ATTR;
+	struct scatter_walk dst_sg_walk = {};
+	unsigned long left = req->cryptlen;
+	unsigned long len, srclen, dstlen;
+	struct scatter_walk assoc_sg_walk;
+	struct scatter_walk src_sg_walk;
+	struct scatterlist src_start[2];
+	struct scatterlist dst_start[2];
+	struct scatterlist *src_sg;
+	struct scatterlist *dst_sg;
+	u8 *src, *dst, *assoc;
+	u8 *assocmem = NULL;
+	u8 authTag[16];
+
+	if (!enc)
+		left -= auth_tag_len;
+
+	/* Linearize assoc, if not already linear */
+	if (req->src->length >= assoclen && req->src->length &&
+		(!PageHighMem(sg_page(req->src)) ||
+			req->src->offset + req->src->length < PAGE_SIZE)) {
+		scatterwalk_start(&assoc_sg_walk, req->src);
+		assoc = scatterwalk_map(&assoc_sg_walk);
+	} else {
+		/* assoc can be any length, so must be on heap */
+		assocmem = kmalloc(assoclen, GFP_ATOMIC);
+		if (unlikely(!assocmem))
+			return -ENOMEM;
+		assoc = assocmem;
+
+		scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
+	}
+
+	src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen);
+	scatterwalk_start(&src_sg_walk, src_sg);
+	if (req->src != req->dst) {
+		dst_sg = scatterwalk_ffwd(dst_start, req->dst, req->assoclen);
+		scatterwalk_start(&dst_sg_walk, dst_sg);
+	}
+
+	kernel_fpu_begin();
+	aesni_gcm_init(aes_ctx, &data, iv,
+		hash_subkey, assoc, assoclen);
+	if (req->src != req->dst) {
+		while (left) {
+			src = scatterwalk_map(&src_sg_walk);
+			dst = scatterwalk_map(&dst_sg_walk);
+			srclen = scatterwalk_clamp(&src_sg_walk, left);
+			dstlen = scatterwalk_clamp(&dst_sg_walk, left);
+			len = min(srclen, dstlen);
+			if (len) {
+				if (enc)
+					aesni_gcm_enc_update(aes_ctx, &data,
+							     dst, src, len);
+				else
+					aesni_gcm_dec_update(aes_ctx, &data,
+							     dst, src, len);
+			}
+			left -= len;
+
+			scatterwalk_unmap(src);
+			scatterwalk_unmap(dst);
+			scatterwalk_advance(&src_sg_walk, len);
+			scatterwalk_advance(&dst_sg_walk, len);
+			scatterwalk_done(&src_sg_walk, 0, left);
+			scatterwalk_done(&dst_sg_walk, 1, left);
+		}
+	} else {
+		while (left) {
+			dst = src = scatterwalk_map(&src_sg_walk);
+			len = scatterwalk_clamp(&src_sg_walk, left);
+			if (len) {
+				if (enc)
+					aesni_gcm_enc_update(aes_ctx, &data,
+							     src, src, len);
+				else
+					aesni_gcm_dec_update(aes_ctx, &data,
+							     src, src, len);
+			}
+			left -= len;
+			scatterwalk_unmap(src);
+			scatterwalk_advance(&src_sg_walk, len);
+			scatterwalk_done(&src_sg_walk, 1, left);
+		}
+	}
+	aesni_gcm_finalize(aes_ctx, &data, authTag, auth_tag_len);
+	kernel_fpu_end();
+
+	if (!assocmem)
+		scatterwalk_unmap(assoc);
+	else
+		kfree(assocmem);
+
+	if (!enc) {
+		u8 authTagMsg[16];
+
+		/* Copy out original authTag */
+		scatterwalk_map_and_copy(authTagMsg, req->src,
+					 req->assoclen + req->cryptlen -
+					 auth_tag_len,
+					 auth_tag_len, 0);
+
+		/* Compare generated tag with passed in tag. */
+		return crypto_memneq(authTagMsg, authTag, auth_tag_len) ?
+			-EBADMSG : 0;
+	}
+
+	/* Copy in the authTag */
+	scatterwalk_map_and_copy(authTag, req->dst,
+				 req->assoclen + req->cryptlen,
+				 auth_tag_len, 1);
+
+	return 0;
+}
+
 static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
 static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
 			  u8 *hash_subkey, u8 *iv, void *aes_ctx)
 			  u8 *hash_subkey, u8 *iv, void *aes_ctx)
 {
 {
@@ -753,7 +921,14 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
 	struct scatter_walk src_sg_walk;
 	struct scatter_walk src_sg_walk;
 	struct scatter_walk dst_sg_walk = {};
 	struct scatter_walk dst_sg_walk = {};
+	struct gcm_context_data data AESNI_ALIGN_ATTR;
 
 
+	if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 ||
+		aesni_gcm_enc_tfm == aesni_gcm_enc ||
+		req->cryptlen < AVX_GEN2_OPTSIZE) {
+		return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv,
+					  aes_ctx);
+	}
 	if (sg_is_last(req->src) &&
 	if (sg_is_last(req->src) &&
 	    (!PageHighMem(sg_page(req->src)) ||
 	    (!PageHighMem(sg_page(req->src)) ||
 	    req->src->offset + req->src->length <= PAGE_SIZE) &&
 	    req->src->offset + req->src->length <= PAGE_SIZE) &&
@@ -782,7 +957,7 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
 	}
 	}
 
 
 	kernel_fpu_begin();
 	kernel_fpu_begin();
-	aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv,
+	aesni_gcm_enc_tfm(aes_ctx, &data, dst, src, req->cryptlen, iv,
 			  hash_subkey, assoc, assoclen,
 			  hash_subkey, assoc, assoclen,
 			  dst + req->cryptlen, auth_tag_len);
 			  dst + req->cryptlen, auth_tag_len);
 	kernel_fpu_end();
 	kernel_fpu_end();
@@ -817,8 +992,15 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
 	u8 authTag[16];
 	u8 authTag[16];
 	struct scatter_walk src_sg_walk;
 	struct scatter_walk src_sg_walk;
 	struct scatter_walk dst_sg_walk = {};
 	struct scatter_walk dst_sg_walk = {};
+	struct gcm_context_data data AESNI_ALIGN_ATTR;
 	int retval = 0;
 	int retval = 0;
 
 
+	if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 ||
+		aesni_gcm_enc_tfm == aesni_gcm_enc ||
+		req->cryptlen < AVX_GEN2_OPTSIZE) {
+		return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv,
+					  aes_ctx);
+	}
 	tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
 	tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
 
 
 	if (sg_is_last(req->src) &&
 	if (sg_is_last(req->src) &&
@@ -849,7 +1031,7 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
 
 
 
 
 	kernel_fpu_begin();
 	kernel_fpu_begin();
-	aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
+	aesni_gcm_dec_tfm(aes_ctx, &data, dst, src, tempCipherLen, iv,
 			  hash_subkey, assoc, assoclen,
 			  hash_subkey, assoc, assoclen,
 			  authTag, auth_tag_len);
 			  authTag, auth_tag_len);
 	kernel_fpu_end();
 	kernel_fpu_end();

+ 113 - 117
arch/x86/crypto/blowfish_glue.c

@@ -25,13 +25,13 @@
  *
  *
  */
  */
 
 
-#include <asm/processor.h>
+#include <crypto/algapi.h>
 #include <crypto/blowfish.h>
 #include <crypto/blowfish.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/types.h>
-#include <crypto/algapi.h>
 
 
 /* regular block cipher functions */
 /* regular block cipher functions */
 asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
 asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
@@ -77,20 +77,28 @@ static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
 	blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
 }
 }
 
 
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+static int blowfish_setkey_skcipher(struct crypto_skcipher *tfm,
+				    const u8 *key, unsigned int keylen)
+{
+	return blowfish_setkey(&tfm->base, key, keylen);
+}
+
+static int ecb_crypt(struct skcipher_request *req,
 		     void (*fn)(struct bf_ctx *, u8 *, const u8 *),
 		     void (*fn)(struct bf_ctx *, u8 *, const u8 *),
 		     void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
 		     void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
 {
 {
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = BF_BLOCK_SIZE;
 	unsigned int bsize = BF_BLOCK_SIZE;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes;
 	unsigned int nbytes;
 	int err;
 	int err;
 
 
-	err = blkcipher_walk_virt(desc, walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
+	while ((nbytes = walk.nbytes)) {
+		u8 *wsrc = walk.src.virt.addr;
+		u8 *wdst = walk.dst.virt.addr;
 
 
 		/* Process four block batch */
 		/* Process four block batch */
 		if (nbytes >= bsize * 4) {
 		if (nbytes >= bsize * 4) {
@@ -116,34 +124,25 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
 		} while (nbytes >= bsize);
 		} while (nbytes >= bsize);
 
 
 done:
 done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
 	return err;
 	return err;
 }
 }
 
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way);
+	return ecb_crypt(req, blowfish_enc_blk, blowfish_enc_blk_4way);
 }
 }
 
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
+	return ecb_crypt(req, blowfish_dec_blk, blowfish_dec_blk_4way);
 }
 }
 
 
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_encrypt(struct bf_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
 {
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = BF_BLOCK_SIZE;
 	unsigned int bsize = BF_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -164,27 +163,27 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
 	return nbytes;
 	return nbytes;
 }
 }
 
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 	int err;
 
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
 	while ((nbytes = walk.nbytes)) {
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __cbc_encrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
 	return err;
 	return err;
 }
 }
 
 
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_decrypt(struct bf_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
 {
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = BF_BLOCK_SIZE;
 	unsigned int bsize = BF_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -245,24 +244,25 @@ done:
 	return nbytes;
 	return nbytes;
 }
 }
 
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 	int err;
 
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
 	while ((nbytes = walk.nbytes)) {
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __cbc_decrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
 	return err;
 	return err;
 }
 }
 
 
-static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
+static void ctr_crypt_final(struct bf_ctx *ctx, struct skcipher_walk *walk)
 {
 {
 	u8 *ctrblk = walk->iv;
 	u8 *ctrblk = walk->iv;
 	u8 keystream[BF_BLOCK_SIZE];
 	u8 keystream[BF_BLOCK_SIZE];
@@ -276,10 +276,8 @@ static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
 	crypto_inc(ctrblk, BF_BLOCK_SIZE);
 	crypto_inc(ctrblk, BF_BLOCK_SIZE);
 }
 }
 
 
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
+static unsigned int __ctr_crypt(struct bf_ctx *ctx, struct skcipher_walk *walk)
 {
 {
-	struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = BF_BLOCK_SIZE;
 	unsigned int bsize = BF_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -332,29 +330,30 @@ done:
 	return nbytes;
 	return nbytes;
 }
 }
 
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 	int err;
 
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
 	while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
 	while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __ctr_crypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
-	if (walk.nbytes) {
-		ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
+	if (nbytes) {
+		ctr_crypt_final(ctx, &walk);
+		err = skcipher_walk_done(&walk, 0);
 	}
 	}
 
 
 	return err;
 	return err;
 }
 }
 
 
-static struct crypto_alg bf_algs[4] = { {
+static struct crypto_alg bf_cipher_alg = {
 	.cra_name		= "blowfish",
 	.cra_name		= "blowfish",
 	.cra_driver_name	= "blowfish-asm",
 	.cra_driver_name	= "blowfish-asm",
 	.cra_priority		= 200,
 	.cra_priority		= 200,
@@ -372,66 +371,50 @@ static struct crypto_alg bf_algs[4] = { {
 			.cia_decrypt		= blowfish_decrypt,
 			.cia_decrypt		= blowfish_decrypt,
 		}
 		}
 	}
 	}
-}, {
-	.cra_name		= "ecb(blowfish)",
-	.cra_driver_name	= "ecb-blowfish-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= BF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= BF_MIN_KEY_SIZE,
-			.max_keysize	= BF_MAX_KEY_SIZE,
-			.setkey		= blowfish_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
+};
+
+static struct skcipher_alg bf_skcipher_algs[] = {
+	{
+		.base.cra_name		= "ecb(blowfish)",
+		.base.cra_driver_name	= "ecb-blowfish-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= BF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct bf_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= BF_MIN_KEY_SIZE,
+		.max_keysize		= BF_MAX_KEY_SIZE,
+		.setkey			= blowfish_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(blowfish)",
+		.base.cra_driver_name	= "cbc-blowfish-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= BF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct bf_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= BF_MIN_KEY_SIZE,
+		.max_keysize		= BF_MAX_KEY_SIZE,
+		.ivsize			= BF_BLOCK_SIZE,
+		.setkey			= blowfish_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ctr(blowfish)",
+		.base.cra_driver_name	= "ctr-blowfish-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct bf_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= BF_MIN_KEY_SIZE,
+		.max_keysize		= BF_MAX_KEY_SIZE,
+		.ivsize			= BF_BLOCK_SIZE,
+		.chunksize		= BF_BLOCK_SIZE,
+		.setkey			= blowfish_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
 	},
 	},
-}, {
-	.cra_name		= "cbc(blowfish)",
-	.cra_driver_name	= "cbc-blowfish-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= BF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= BF_MIN_KEY_SIZE,
-			.max_keysize	= BF_MAX_KEY_SIZE,
-			.ivsize		= BF_BLOCK_SIZE,
-			.setkey		= blowfish_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(blowfish)",
-	.cra_driver_name	= "ctr-blowfish-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct bf_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= BF_MIN_KEY_SIZE,
-			.max_keysize	= BF_MAX_KEY_SIZE,
-			.ivsize		= BF_BLOCK_SIZE,
-			.setkey		= blowfish_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-} };
+};
 
 
 static bool is_blacklisted_cpu(void)
 static bool is_blacklisted_cpu(void)
 {
 {
@@ -456,6 +439,8 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
 
 static int __init init(void)
 static int __init init(void)
 {
 {
+	int err;
+
 	if (!force && is_blacklisted_cpu()) {
 	if (!force && is_blacklisted_cpu()) {
 		printk(KERN_INFO
 		printk(KERN_INFO
 			"blowfish-x86_64: performance on this CPU "
 			"blowfish-x86_64: performance on this CPU "
@@ -464,12 +449,23 @@ static int __init init(void)
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}
 
 
-	return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
+	err = crypto_register_alg(&bf_cipher_alg);
+	if (err)
+		return err;
+
+	err = crypto_register_skciphers(bf_skcipher_algs,
+					ARRAY_SIZE(bf_skcipher_algs));
+	if (err)
+		crypto_unregister_alg(&bf_cipher_alg);
+
+	return err;
 }
 }
 
 
 static void __exit fini(void)
 static void __exit fini(void)
 {
 {
-	crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
+	crypto_unregister_alg(&bf_cipher_alg);
+	crypto_unregister_skciphers(bf_skcipher_algs,
+				    ARRAY_SIZE(bf_skcipher_algs));
 }
 }
 
 
 module_init(init);
 module_init(init);

+ 99 - 392
arch/x86/crypto/camellia_aesni_avx2_glue.c

@@ -10,18 +10,15 @@
  *
  *
  */
  */
 
 
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/ablk_helper.h>
-#include <crypto/algapi.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
-#include <asm/fpu/api.h>
 #include <asm/crypto/camellia.h>
 #include <asm/crypto/camellia.h>
 #include <asm/crypto/glue_helper.h>
 #include <asm/crypto/glue_helper.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/xts.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
 
 
 #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
 #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
 #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
 #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
@@ -150,413 +147,120 @@ static const struct common_glue_ctx camellia_dec_xts = {
 	} }
 	} }
 };
 };
 
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keylen)
 {
 {
-	return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
+	return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen,
+				 &tfm->base.crt_flags);
 }
 }
 
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
 {
-	return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&camellia_enc, req);
 }
 }
 
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
-				       dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
-				       nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
-}
-
-static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
-{
-	return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
-			      CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
-			      nbytes);
-}
-
-static inline void camellia_fpu_end(bool fpu_enabled)
-{
-	glue_fpu_end(fpu_enabled);
-}
-
-static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-			   unsigned int key_len)
-{
-	return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
-				 &tfm->crt_flags);
-}
-
-struct crypt_priv {
-	struct camellia_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
 {
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_enc_32way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
-	}
-
-	if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-	}
-
-	while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
-		camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_enc_blk(ctx->ctx, srcdst, srcdst);
+	return glue_ecb_req_128bit(&camellia_dec, req);
 }
 }
 
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
 {
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_dec_32way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
-	}
-
-	if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-	}
-
-	while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
-		camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_dec_blk(ctx->ctx, srcdst, srcdst);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+					   req);
 }
 }
 
 
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
 {
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->camellia_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	camellia_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
+	return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
 }
 }
 
 
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
 {
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->camellia_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	camellia_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
+	return glue_ctr_req_128bit(&camellia_ctr, req);
 }
 }
 
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
 {
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 
-	return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(camellia_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
+	return glue_xts_req_128bit(&camellia_enc_xts, req,
+				   XTS_TWEAK_CAST(camellia_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 }
 
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
 {
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 
-	return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(camellia_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
+	return glue_xts_req_128bit(&camellia_dec_xts, req,
+				   XTS_TWEAK_CAST(camellia_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 }
 
 
-static struct crypto_alg cmll_algs[10] = { {
-	.cra_name		= "__ecb-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-ecb-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-cbc-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-ctr-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-lrw-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_camellia_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= lrw_camellia_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-camellia-aesni-avx2",
-	.cra_driver_name	= "__driver-xts-camellia-aesni-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= xts_camellia_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(camellia)",
-	.cra_driver_name	= "ecb-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(camellia)",
-	.cra_driver_name	= "cbc-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(camellia)",
-	.cra_driver_name	= "ctr-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(camellia)",
-	.cra_driver_name	= "lrw-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(camellia)",
-	.cra_driver_name	= "xts-camellia-aesni-avx2",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg camellia_algs[] = {
+	{
+		.base.cra_name		= "__ecb(camellia)",
+		.base.cra_driver_name	= "__ecb-camellia-aesni-avx2",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(camellia)",
+		.base.cra_driver_name	= "__cbc-camellia-aesni-avx2",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(camellia)",
+		.base.cra_driver_name	= "__ctr-camellia-aesni-avx2",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.chunksize		= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(camellia)",
+		.base.cra_driver_name	= "__xts-camellia-aesni-avx2",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= 2 * CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= xts_camellia_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
 	},
-} };
+};
+
+static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
 
 
 static int __init camellia_aesni_init(void)
 static int __init camellia_aesni_init(void)
 {
 {
@@ -576,12 +280,15 @@ static int __init camellia_aesni_init(void)
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}
 
 
-	return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+	return simd_register_skciphers_compat(camellia_algs,
+					      ARRAY_SIZE(camellia_algs),
+					      camellia_simd_algs);
 }
 }
 
 
 static void __exit camellia_aesni_fini(void)
 static void __exit camellia_aesni_fini(void)
 {
 {
-	crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+	simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs),
+				  camellia_simd_algs);
 }
 }
 
 
 module_init(camellia_aesni_init);
 module_init(camellia_aesni_init);

+ 118 - 377
arch/x86/crypto/camellia_aesni_avx_glue.c

@@ -10,18 +10,15 @@
  *
  *
  */
  */
 
 
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/ablk_helper.h>
-#include <crypto/algapi.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
-#include <asm/fpu/api.h>
 #include <asm/crypto/camellia.h>
 #include <asm/crypto/camellia.h>
 #include <asm/crypto/glue_helper.h>
 #include <asm/crypto/glue_helper.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/xts.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
 
 
 #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
 #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
 
 
@@ -154,401 +151,142 @@ static const struct common_glue_ctx camellia_dec_xts = {
 	} }
 	} }
 };
 };
 
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			   unsigned int keylen)
 {
 {
-	return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
+	return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen,
+				 &tfm->base.crt_flags);
 }
 }
 
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
-				       dst, src, nbytes);
+	return glue_ecb_req_128bit(&camellia_enc, req);
 }
 }
 
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
 {
-	return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_ecb_req_128bit(&camellia_dec, req);
 }
 }
 
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
 {
-	return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+					   req);
 }
 }
 
 
-static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
 {
-	return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
-			      CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
-			      nbytes);
+	return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
 }
 }
 
 
-static inline void camellia_fpu_end(bool fpu_enabled)
+static int ctr_crypt(struct skcipher_request *req)
 {
 {
-	glue_fpu_end(fpu_enabled);
+	return glue_ctr_req_128bit(&camellia_ctr, req);
 }
 }
 
 
-static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
-			   unsigned int key_len)
+int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			unsigned int keylen)
 {
 {
-	return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
-				 &tfm->crt_flags);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u32 *flags = &tfm->base.crt_flags;
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	/* first half of xts-key is for crypt */
+	err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
+				flags);
 }
 }
+EXPORT_SYMBOL_GPL(xts_camellia_setkey);
 
 
-struct crypt_priv {
-	struct camellia_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
 {
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-	}
-
-	while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
-		camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
-	}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_enc_blk(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&camellia_enc_xts, req,
+				   XTS_TWEAK_CAST(camellia_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 }
 
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
 {
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
-		camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
-	}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 
-	while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
-		camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
-		nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_dec_blk(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&camellia_dec_xts, req,
+				   XTS_TWEAK_CAST(camellia_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 }
 
 
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->camellia_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	camellia_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->camellia_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	camellia_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(camellia_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(camellia_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg cmll_algs[10] = { {
-	.cra_name		= "__ecb-camellia-aesni",
-	.cra_driver_name	= "__driver-ecb-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-camellia-aesni",
-	.cra_driver_name	= "__driver-cbc-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-camellia-aesni",
-	.cra_driver_name	= "__driver-ctr-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-camellia-aesni",
-	.cra_driver_name	= "__driver-lrw-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_camellia_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= lrw_camellia_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-camellia-aesni",
-	.cra_driver_name	= "__driver-xts-camellia-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= xts_camellia_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(camellia)",
-	.cra_driver_name	= "ecb-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(camellia)",
-	.cra_driver_name	= "cbc-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(camellia)",
-	.cra_driver_name	= "ctr-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(camellia)",
-	.cra_driver_name	= "lrw-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-					  CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(camellia)",
-	.cra_driver_name	= "xts-camellia-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg camellia_algs[] = {
+	{
+		.base.cra_name		= "__ecb(camellia)",
+		.base.cra_driver_name	= "__ecb-camellia-aesni",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(camellia)",
+		.base.cra_driver_name	= "__cbc-camellia-aesni",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(camellia)",
+		.base.cra_driver_name	= "__ctr-camellia-aesni",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.chunksize		= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(camellia)",
+		.base.cra_driver_name	= "__xts-camellia-aesni",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= 2 * CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= xts_camellia_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
 	},
-} };
+};
+
+static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
 
 
 static int __init camellia_aesni_init(void)
 static int __init camellia_aesni_init(void)
 {
 {
@@ -567,12 +305,15 @@ static int __init camellia_aesni_init(void)
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}
 
 
-	return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+	return simd_register_skciphers_compat(camellia_algs,
+					      ARRAY_SIZE(camellia_algs),
+					      camellia_simd_algs);
 }
 }
 
 
 static void __exit camellia_aesni_fini(void)
 static void __exit camellia_aesni_fini(void)
 {
 {
-	crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+	simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs),
+				  camellia_simd_algs);
 }
 }
 
 
 module_init(camellia_aesni_init);
 module_init(camellia_aesni_init);

+ 79 - 277
arch/x86/crypto/camellia_glue.c

@@ -23,15 +23,12 @@
  *
  *
  */
  */
 
 
-#include <asm/processor.h>
 #include <asm/unaligned.h>
 #include <asm/unaligned.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/types.h>
 #include <crypto/algapi.h>
 #include <crypto/algapi.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
 #include <asm/crypto/camellia.h>
 #include <asm/crypto/camellia.h>
 #include <asm/crypto/glue_helper.h>
 #include <asm/crypto/glue_helper.h>
 
 
@@ -1272,13 +1269,19 @@ int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key,
 }
 }
 EXPORT_SYMBOL_GPL(__camellia_setkey);
 EXPORT_SYMBOL_GPL(__camellia_setkey);
 
 
-static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+static int camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
 			   unsigned int key_len)
 			   unsigned int key_len)
 {
 {
-	return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
+	return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len,
 				 &tfm->crt_flags);
 				 &tfm->crt_flags);
 }
 }
 
 
+static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				    unsigned int key_len)
+{
+	return camellia_setkey(&tfm->base, key, key_len);
+}
+
 void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
 void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
 {
 {
 	u128 iv = *src;
 	u128 iv = *src;
@@ -1373,188 +1376,33 @@ static const struct common_glue_ctx camellia_dec_cbc = {
 	} }
 	} }
 };
 };
 
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
-				       dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
-				       nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
-}
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct camellia_ctx *ctx = priv;
-	int i;
-
-	while (nbytes >= 2 * bsize) {
-		camellia_enc_blk_2way(ctx, srcdst, srcdst);
-		srcdst += bsize * 2;
-		nbytes -= bsize * 2;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_enc_blk(ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
-	struct camellia_ctx *ctx = priv;
-	int i;
-
-	while (nbytes >= 2 * bsize) {
-		camellia_dec_blk_2way(ctx, srcdst, srcdst);
-		srcdst += bsize * 2;
-		nbytes -= bsize * 2;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		camellia_dec_blk(ctx, srcdst, srcdst);
-}
-
-int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
-			unsigned int keylen)
-{
-	struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __camellia_setkey(&ctx->camellia_ctx, key,
-				keylen - CAMELLIA_BLOCK_SIZE,
-				&tfm->crt_flags);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table,
-			      key + keylen - CAMELLIA_BLOCK_SIZE);
-}
-EXPORT_SYMBOL_GPL(lrw_camellia_setkey);
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[2 * 4];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->camellia_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
 {
-	struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[2 * 4];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->camellia_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
+	return glue_ecb_req_128bit(&camellia_enc, req);
 }
 }
 
 
-void lrw_camellia_exit_tfm(struct crypto_tfm *tfm)
+static int ecb_decrypt(struct skcipher_request *req)
 {
 {
-	struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
+	return glue_ecb_req_128bit(&camellia_dec, req);
 }
 }
-EXPORT_SYMBOL_GPL(lrw_camellia_exit_tfm);
 
 
-int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
-			unsigned int keylen)
+static int cbc_encrypt(struct skcipher_request *req)
 {
 {
-	struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	int err;
-
-	err = xts_check_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	/* first half of xts-key is for crypt */
-	err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
-				flags);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+					   req);
 }
 }
-EXPORT_SYMBOL_GPL(xts_camellia_setkey);
 
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
 {
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[2 * 4];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
+	return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
 }
 }
 
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
 {
-	struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[2 * 4];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
+	return glue_ctr_req_128bit(&camellia_ctr, req);
 }
 }
 
 
-static struct crypto_alg camellia_algs[6] = { {
+static struct crypto_alg camellia_cipher_alg = {
 	.cra_name		= "camellia",
 	.cra_name		= "camellia",
 	.cra_driver_name	= "camellia-asm",
 	.cra_driver_name	= "camellia-asm",
 	.cra_priority		= 200,
 	.cra_priority		= 200,
@@ -1572,109 +1420,50 @@ static struct crypto_alg camellia_algs[6] = { {
 			.cia_decrypt	 = camellia_decrypt
 			.cia_decrypt	 = camellia_decrypt
 		}
 		}
 	}
 	}
-}, {
-	.cra_name		= "ecb(camellia)",
-	.cra_driver_name	= "ecb-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(camellia)",
-	.cra_driver_name	= "cbc-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(camellia)",
-	.cra_driver_name	= "ctr-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct camellia_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= camellia_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "lrw(camellia)",
-	.cra_driver_name	= "lrw-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_camellia_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE +
-						CAMELLIA_BLOCK_SIZE,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE +
-						CAMELLIA_BLOCK_SIZE,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= lrw_camellia_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(camellia)",
-	.cra_driver_name	= "xts-camellia-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct camellia_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAMELLIA_MAX_KEY_SIZE * 2,
-			.ivsize		= CAMELLIA_BLOCK_SIZE,
-			.setkey		= xts_camellia_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-} };
+};
+
+static struct skcipher_alg camellia_skcipher_algs[] = {
+	{
+		.base.cra_name		= "ecb(camellia)",
+		.base.cra_driver_name	= "ecb-camellia-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.setkey			= camellia_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(camellia)",
+		.base.cra_driver_name	= "cbc-camellia-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ctr(camellia)",
+		.base.cra_driver_name	= "ctr-camellia-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
+		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
+		.ivsize			= CAMELLIA_BLOCK_SIZE,
+		.chunksize		= CAMELLIA_BLOCK_SIZE,
+		.setkey			= camellia_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}
+};
 
 
 static bool is_blacklisted_cpu(void)
 static bool is_blacklisted_cpu(void)
 {
 {
@@ -1700,6 +1489,8 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
 
 static int __init init(void)
 static int __init init(void)
 {
 {
+	int err;
+
 	if (!force && is_blacklisted_cpu()) {
 	if (!force && is_blacklisted_cpu()) {
 		printk(KERN_INFO
 		printk(KERN_INFO
 			"camellia-x86_64: performance on this CPU "
 			"camellia-x86_64: performance on this CPU "
@@ -1708,12 +1499,23 @@ static int __init init(void)
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}
 
 
-	return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
+	err = crypto_register_alg(&camellia_cipher_alg);
+	if (err)
+		return err;
+
+	err = crypto_register_skciphers(camellia_skcipher_algs,
+					ARRAY_SIZE(camellia_skcipher_algs));
+	if (err)
+		crypto_unregister_alg(&camellia_cipher_alg);
+
+	return err;
 }
 }
 
 
 static void __exit fini(void)
 static void __exit fini(void)
 {
 {
-	crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
+	crypto_unregister_alg(&camellia_cipher_alg);
+	crypto_unregister_skciphers(camellia_skcipher_algs,
+				    ARRAY_SIZE(camellia_skcipher_algs));
 }
 }
 
 
 module_init(init);
 module_init(init);

+ 127 - 225
arch/x86/crypto/cast5_avx_glue.c

@@ -21,18 +21,14 @@
  *
  *
  */
  */
 
 
-#include <linux/module.h>
-#include <linux/hardirq.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
 #include <crypto/algapi.h>
 #include <crypto/algapi.h>
 #include <crypto/cast5.h>
 #include <crypto/cast5.h>
-#include <crypto/cryptd.h>
-#include <crypto/ctr.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/glue_helper.h>
+#include <crypto/internal/simd.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
 
 
 #define CAST5_PARALLEL_BLOCKS 16
 #define CAST5_PARALLEL_BLOCKS 16
 
 
@@ -45,10 +41,17 @@ asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
 				__be64 *iv);
 				__be64 *iv);
 
 
-static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	return cast5_setkey(&tfm->base, key, keylen);
+}
+
+static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk,
+				   unsigned int nbytes)
 {
 {
 	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
 	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
-			      NULL, fpu_enabled, nbytes);
+			      walk, fpu_enabled, nbytes);
 }
 }
 
 
 static inline void cast5_fpu_end(bool fpu_enabled)
 static inline void cast5_fpu_end(bool fpu_enabled)
@@ -56,29 +59,28 @@ static inline void cast5_fpu_end(bool fpu_enabled)
 	return glue_fpu_end(fpu_enabled);
 	return glue_fpu_end(fpu_enabled);
 }
 }
 
 
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
-		     bool enc)
+static int ecb_crypt(struct skcipher_request *req, bool enc)
 {
 {
 	bool fpu_enabled = false;
 	bool fpu_enabled = false;
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
 	unsigned int nbytes;
 	unsigned int nbytes;
 	void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
 	void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
 	int err;
 	int err;
 
 
-	fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
-
-	err = blkcipher_walk_virt(desc, walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
+	while ((nbytes = walk.nbytes)) {
+		u8 *wsrc = walk.src.virt.addr;
+		u8 *wdst = walk.dst.virt.addr;
 
 
-		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
 
 
 		/* Process multi-block batch */
 		/* Process multi-block batch */
 		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
 		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
+			fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
 			do {
 			do {
 				fn(ctx, wdst, wsrc);
 				fn(ctx, wdst, wsrc);
 
 
@@ -103,76 +105,58 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
 		} while (nbytes >= bsize);
 		} while (nbytes >= bsize);
 
 
 done:
 done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
 	cast5_fpu_end(fpu_enabled);
 	cast5_fpu_end(fpu_enabled);
 	return err;
 	return err;
 }
 }
 
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, true);
+	return ecb_crypt(req, true);
 }
 }
 
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, false);
+	return ecb_crypt(req, false);
 }
 }
 
 
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static int cbc_encrypt(struct skcipher_request *req)
 {
 {
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
-	unsigned int nbytes = walk->nbytes;
-	u64 *src = (u64 *)walk->src.virt.addr;
-	u64 *dst = (u64 *)walk->dst.virt.addr;
-	u64 *iv = (u64 *)walk->iv;
-
-	do {
-		*dst = *src ^ *iv;
-		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
-		iv = dst;
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-	*(u64 *)walk->iv = *iv;
-	return nbytes;
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 	int err;
 
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
 	while ((nbytes = walk.nbytes)) {
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		u64 *src = (u64 *)walk.src.virt.addr;
+		u64 *dst = (u64 *)walk.dst.virt.addr;
+		u64 *iv = (u64 *)walk.iv;
+
+		do {
+			*dst = *src ^ *iv;
+			__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
+			iv = dst;
+			src++;
+			dst++;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+		*(u64 *)walk.iv = *iv;
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
 	return err;
 	return err;
 }
 }
 
 
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_decrypt(struct cast5_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
 {
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -224,31 +208,29 @@ done:
 	return nbytes;
 	return nbytes;
 }
 }
 
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
 {
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
 	bool fpu_enabled = false;
 	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 	int err;
 
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, false);
 
 
 	while ((nbytes = walk.nbytes)) {
 	while ((nbytes = walk.nbytes)) {
-		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
+		nbytes = __cbc_decrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
 	cast5_fpu_end(fpu_enabled);
 	cast5_fpu_end(fpu_enabled);
 	return err;
 	return err;
 }
 }
 
 
-static void ctr_crypt_final(struct blkcipher_desc *desc,
-			    struct blkcipher_walk *walk)
+static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx)
 {
 {
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	u8 *ctrblk = walk->iv;
 	u8 *ctrblk = walk->iv;
 	u8 keystream[CAST5_BLOCK_SIZE];
 	u8 keystream[CAST5_BLOCK_SIZE];
 	u8 *src = walk->src.virt.addr;
 	u8 *src = walk->src.virt.addr;
@@ -261,10 +243,9 @@ static void ctr_crypt_final(struct blkcipher_desc *desc,
 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
 }
 }
 
 
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
+static unsigned int __ctr_crypt(struct skcipher_walk *walk,
+				struct cast5_ctx *ctx)
 {
 {
-	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
 	const unsigned int bsize = CAST5_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -307,162 +288,80 @@ done:
 	return nbytes;
 	return nbytes;
 }
 }
 
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
 {
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
 	bool fpu_enabled = false;
 	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 	int err;
 
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, false);
 
 
 	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
 	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
-		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
+		nbytes = __ctr_crypt(&walk, ctx);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
 	cast5_fpu_end(fpu_enabled);
 	cast5_fpu_end(fpu_enabled);
 
 
 	if (walk.nbytes) {
 	if (walk.nbytes) {
-		ctr_crypt_final(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		ctr_crypt_final(&walk, ctx);
+		err = skcipher_walk_done(&walk, 0);
 	}
 	}
 
 
 	return err;
 	return err;
 }
 }
 
 
+static struct skcipher_alg cast5_algs[] = {
+	{
+		.base.cra_name		= "__ecb(cast5)",
+		.base.cra_driver_name	= "__ecb-cast5-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST5_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST5_MIN_KEY_SIZE,
+		.max_keysize		= CAST5_MAX_KEY_SIZE,
+		.setkey			= cast5_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(cast5)",
+		.base.cra_driver_name	= "__cbc-cast5-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST5_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST5_MIN_KEY_SIZE,
+		.max_keysize		= CAST5_MAX_KEY_SIZE,
+		.ivsize			= CAST5_BLOCK_SIZE,
+		.setkey			= cast5_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(cast5)",
+		.base.cra_driver_name	= "__ctr-cast5-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST5_MIN_KEY_SIZE,
+		.max_keysize		= CAST5_MAX_KEY_SIZE,
+		.ivsize			= CAST5_BLOCK_SIZE,
+		.chunksize		= CAST5_BLOCK_SIZE,
+		.setkey			= cast5_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}
+};
 
 
-static struct crypto_alg cast5_algs[6] = { {
-	.cra_name		= "__ecb-cast5-avx",
-	.cra_driver_name	= "__driver-ecb-cast5-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST5_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast5_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.setkey		= cast5_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-cast5-avx",
-	.cra_driver_name	= "__driver-cbc-cast5-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST5_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast5_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.setkey		= cast5_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-cast5-avx",
-	.cra_driver_name	= "__driver-ctr-cast5-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct cast5_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.ivsize		= CAST5_BLOCK_SIZE,
-			.setkey		= cast5_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(cast5)",
-	.cra_driver_name	= "ecb-cast5-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST5_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(cast5)",
-	.cra_driver_name	= "cbc-cast5-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST5_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.ivsize		= CAST5_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(cast5)",
-	.cra_driver_name	= "ctr-cast5-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST5_MIN_KEY_SIZE,
-			.max_keysize	= CAST5_MAX_KEY_SIZE,
-			.ivsize		= CAST5_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-} };
+static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)];
 
 
 static int __init cast5_init(void)
 static int __init cast5_init(void)
 {
 {
@@ -474,12 +373,15 @@ static int __init cast5_init(void)
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}
 
 
-	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
+	return simd_register_skciphers_compat(cast5_algs,
+					      ARRAY_SIZE(cast5_algs),
+					      cast5_simd_algs);
 }
 }
 
 
 static void __exit cast5_exit(void)
 static void __exit cast5_exit(void)
 {
 {
-	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
+	simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs),
+				  cast5_simd_algs);
 }
 }
 
 
 module_init(cast5_init);
 module_init(cast5_init);

+ 100 - 389
arch/x86/crypto/cast6_avx_glue.c

@@ -24,19 +24,13 @@
  */
  */
 
 
 #include <linux/module.h>
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
 #include <crypto/algapi.h>
 #include <crypto/cast6.h>
 #include <crypto/cast6.h>
-#include <crypto/cryptd.h>
-#include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
+#include <crypto/internal/simd.h>
 #include <crypto/xts.h>
 #include <crypto/xts.h>
-#include <asm/fpu/api.h>
 #include <asm/crypto/glue_helper.h>
 #include <asm/crypto/glue_helper.h>
 
 
 #define CAST6_PARALLEL_BLOCKS 8
 #define CAST6_PARALLEL_BLOCKS 8
@@ -56,6 +50,12 @@ asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
 asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
 asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
 				   const u8 *src, le128 *iv);
 				   const u8 *src, le128 *iv);
 
 
+static int cast6_setkey_skcipher(struct crypto_skcipher *tfm,
+				 const u8 *key, unsigned int keylen)
+{
+	return cast6_setkey(&tfm->base, key, keylen);
+}
+
 static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 {
 {
 	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
 	glue_xts_crypt_128bit_one(ctx, dst, src, iv,
@@ -157,164 +157,30 @@ static const struct common_glue_ctx cast6_dec_xts = {
 	} }
 	} }
 };
 };
 
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc,
-				       dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src,
-				       nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
 {
-	return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&cast6_enc, req);
 }
 }
 
 
-static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
 {
-	return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS,
-			      NULL, fpu_enabled, nbytes);
+	return glue_ecb_req_128bit(&cast6_dec, req);
 }
 }
 
 
-static inline void cast6_fpu_end(bool fpu_enabled)
+static int cbc_encrypt(struct skcipher_request *req)
 {
 {
-	glue_fpu_end(fpu_enabled);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__cast6_encrypt),
+					   req);
 }
 }
 
 
-struct crypt_priv {
-	struct cast6_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
 {
-	const unsigned int bsize = CAST6_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
-		cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__cast6_encrypt(ctx->ctx, srcdst, srcdst);
+	return glue_cbc_decrypt_req_128bit(&cast6_dec_cbc, req);
 }
 }
 
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
 {
-	const unsigned int bsize = CAST6_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
-		cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__cast6_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-struct cast6_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct cast6_ctx cast6_ctx;
-};
-
-static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE,
-			     &tfm->crt_flags);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAST6_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->cast6_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	cast6_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[CAST6_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->cast6_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	cast6_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static void lrw_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
+	return glue_ctr_req_128bit(&cast6_ctr, req);
 }
 }
 
 
 struct cast6_xts_ctx {
 struct cast6_xts_ctx {
@@ -322,14 +188,14 @@ struct cast6_xts_ctx {
 	struct cast6_ctx crypt_ctx;
 	struct cast6_ctx crypt_ctx;
 };
 };
 
 
-static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
+static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			    unsigned int keylen)
 {
 {
-	struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
+	struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u32 *flags = &tfm->base.crt_flags;
 	int err;
 	int err;
 
 
-	err = xts_check_key(tfm, key, keylen);
+	err = xts_verify_key(tfm, key, keylen);
 	if (err)
 	if (err)
 		return err;
 		return err;
 
 
@@ -343,245 +209,87 @@ static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
 			      flags);
 			      flags);
 }
 }
 
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
 {
-	struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 
-	return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__cast6_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
+	return glue_xts_req_128bit(&cast6_enc_xts, req,
+				   XTS_TWEAK_CAST(__cast6_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 }
 
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
 {
-	struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 
-	return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__cast6_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
+	return glue_xts_req_128bit(&cast6_dec_xts, req,
+				   XTS_TWEAK_CAST(__cast6_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 }
 
 
-static struct crypto_alg cast6_algs[10] = { {
-	.cra_name		= "__ecb-cast6-avx",
-	.cra_driver_name	= "__driver-ecb-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast6_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.setkey		= cast6_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-cast6-avx",
-	.cra_driver_name	= "__driver-cbc-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast6_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.setkey		= cast6_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-cast6-avx",
-	.cra_driver_name	= "__driver-ctr-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct cast6_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= cast6_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-cast6-avx",
-	.cra_driver_name	= "__driver-lrw-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast6_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE +
-					  CAST6_BLOCK_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE +
-					  CAST6_BLOCK_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= lrw_cast6_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-cast6-avx",
-	.cra_driver_name	= "__driver-xts-cast6-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct cast6_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAST6_MAX_KEY_SIZE * 2,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= xts_cast6_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(cast6)",
-	.cra_driver_name	= "ecb-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(cast6)",
-	.cra_driver_name	= "cbc-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(cast6)",
-	.cra_driver_name	= "ctr-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(cast6)",
-	.cra_driver_name	= "lrw-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE +
-					  CAST6_BLOCK_SIZE,
-			.max_keysize	= CAST6_MAX_KEY_SIZE +
-					  CAST6_BLOCK_SIZE,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(cast6)",
-	.cra_driver_name	= "xts-cast6-avx",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= CAST6_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= CAST6_MIN_KEY_SIZE * 2,
-			.max_keysize	= CAST6_MAX_KEY_SIZE * 2,
-			.ivsize		= CAST6_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg cast6_algs[] = {
+	{
+		.base.cra_name		= "__ecb(cast6)",
+		.base.cra_driver_name	= "__ecb-cast6-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST6_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast6_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST6_MIN_KEY_SIZE,
+		.max_keysize		= CAST6_MAX_KEY_SIZE,
+		.setkey			= cast6_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(cast6)",
+		.base.cra_driver_name	= "__cbc-cast6-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST6_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast6_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST6_MIN_KEY_SIZE,
+		.max_keysize		= CAST6_MAX_KEY_SIZE,
+		.ivsize			= CAST6_BLOCK_SIZE,
+		.setkey			= cast6_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(cast6)",
+		.base.cra_driver_name	= "__ctr-cast6-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct cast6_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= CAST6_MIN_KEY_SIZE,
+		.max_keysize		= CAST6_MAX_KEY_SIZE,
+		.ivsize			= CAST6_BLOCK_SIZE,
+		.chunksize		= CAST6_BLOCK_SIZE,
+		.setkey			= cast6_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(cast6)",
+		.base.cra_driver_name	= "__xts-cast6-avx",
+		.base.cra_priority	= 200,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= CAST6_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct cast6_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * CAST6_MIN_KEY_SIZE,
+		.max_keysize		= 2 * CAST6_MAX_KEY_SIZE,
+		.ivsize			= CAST6_BLOCK_SIZE,
+		.setkey			= xts_cast6_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
 	},
-} };
+};
+
+static struct simd_skcipher_alg *cast6_simd_algs[ARRAY_SIZE(cast6_algs)];
 
 
 static int __init cast6_init(void)
 static int __init cast6_init(void)
 {
 {
@@ -593,12 +301,15 @@ static int __init cast6_init(void)
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}
 
 
-	return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
+	return simd_register_skciphers_compat(cast6_algs,
+					      ARRAY_SIZE(cast6_algs),
+					      cast6_simd_algs);
 }
 }
 
 
 static void __exit cast6_exit(void)
 static void __exit cast6_exit(void)
 {
 {
-	crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
+	simd_unregister_skciphers(cast6_algs, ARRAY_SIZE(cast6_algs),
+				  cast6_simd_algs);
 }
 }
 
 
 module_init(cast6_init);
 module_init(cast6_init);

+ 119 - 119
arch/x86/crypto/des3_ede_glue.c

@@ -20,13 +20,13 @@
  *
  *
  */
  */
 
 
-#include <asm/processor.h>
+#include <crypto/algapi.h>
 #include <crypto/des.h>
 #include <crypto/des.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/types.h>
-#include <crypto/algapi.h>
 
 
 struct des3_ede_x86_ctx {
 struct des3_ede_x86_ctx {
 	u32 enc_expkey[DES3_EDE_EXPKEY_WORDS];
 	u32 enc_expkey[DES3_EDE_EXPKEY_WORDS];
@@ -83,18 +83,18 @@ static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src);
 	des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src);
 }
 }
 
 
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
-		     const u32 *expkey)
+static int ecb_crypt(struct skcipher_request *req, const u32 *expkey)
 {
 {
-	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
+	const unsigned int bsize = DES3_EDE_BLOCK_SIZE;
+	struct skcipher_walk walk;
 	unsigned int nbytes;
 	unsigned int nbytes;
 	int err;
 	int err;
 
 
-	err = blkcipher_walk_virt(desc, walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
+	while ((nbytes = walk.nbytes)) {
+		u8 *wsrc = walk.src.virt.addr;
+		u8 *wdst = walk.dst.virt.addr;
 
 
 		/* Process four block batch */
 		/* Process four block batch */
 		if (nbytes >= bsize * 3) {
 		if (nbytes >= bsize * 3) {
@@ -121,36 +121,31 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
 		} while (nbytes >= bsize);
 		} while (nbytes >= bsize);
 
 
 done:
 done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
 	return err;
 	return err;
 }
 }
 
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, ctx->enc_expkey);
+	return ecb_crypt(req, ctx->enc_expkey);
 }
 }
 
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_crypt(desc, &walk, ctx->dec_expkey);
+	return ecb_crypt(req, ctx->dec_expkey);
 }
 }
 
 
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_encrypt(struct des3_ede_x86_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
 	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -171,27 +166,27 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
 	return nbytes;
 	return nbytes;
 }
 }
 
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 	int err;
 
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
 	while ((nbytes = walk.nbytes)) {
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_encrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __cbc_encrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
 	return err;
 	return err;
 }
 }
 
 
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-				  struct blkcipher_walk *walk)
+static unsigned int __cbc_decrypt(struct des3_ede_x86_ctx *ctx,
+				  struct skcipher_walk *walk)
 {
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
 	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	unsigned int nbytes = walk->nbytes;
 	u64 *src = (u64 *)walk->src.virt.addr;
 	u64 *src = (u64 *)walk->src.virt.addr;
@@ -250,25 +245,26 @@ done:
 	return nbytes;
 	return nbytes;
 }
 }
 
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 	int err;
 
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
 	while ((nbytes = walk.nbytes)) {
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __cbc_decrypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __cbc_decrypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
 	return err;
 	return err;
 }
 }
 
 
 static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
 static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
-			    struct blkcipher_walk *walk)
+			    struct skcipher_walk *walk)
 {
 {
 	u8 *ctrblk = walk->iv;
 	u8 *ctrblk = walk->iv;
 	u8 keystream[DES3_EDE_BLOCK_SIZE];
 	u8 keystream[DES3_EDE_BLOCK_SIZE];
@@ -282,10 +278,9 @@ static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
 	crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
 	crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
 }
 }
 
 
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-				struct blkcipher_walk *walk)
+static unsigned int __ctr_crypt(struct des3_ede_x86_ctx *ctx,
+				struct skcipher_walk *walk)
 {
 {
-	struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
 	unsigned int bsize = DES3_EDE_BLOCK_SIZE;
 	unsigned int nbytes = walk->nbytes;
 	unsigned int nbytes = walk->nbytes;
 	__be64 *src = (__be64 *)walk->src.virt.addr;
 	__be64 *src = (__be64 *)walk->src.virt.addr;
@@ -333,23 +328,24 @@ done:
 	return nbytes;
 	return nbytes;
 }
 }
 
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
 {
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 	int err;
 
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
 	while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) {
 	while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) {
-		nbytes = __ctr_crypt(desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		nbytes = __ctr_crypt(ctx, &walk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
-	if (walk.nbytes) {
-		ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
+	if (nbytes) {
+		ctr_crypt_final(ctx, &walk);
+		err = skcipher_walk_done(&walk, 0);
 	}
 	}
 
 
 	return err;
 	return err;
@@ -381,7 +377,14 @@ static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key,
 	return 0;
 	return 0;
 }
 }
 
 
-static struct crypto_alg des3_ede_algs[4] = { {
+static int des3_ede_x86_setkey_skcipher(struct crypto_skcipher *tfm,
+					const u8 *key,
+					unsigned int keylen)
+{
+	return des3_ede_x86_setkey(&tfm->base, key, keylen);
+}
+
+static struct crypto_alg des3_ede_cipher = {
 	.cra_name		= "des3_ede",
 	.cra_name		= "des3_ede",
 	.cra_driver_name	= "des3_ede-asm",
 	.cra_driver_name	= "des3_ede-asm",
 	.cra_priority		= 200,
 	.cra_priority		= 200,
@@ -399,66 +402,50 @@ static struct crypto_alg des3_ede_algs[4] = { {
 			.cia_decrypt		= des3_ede_x86_decrypt,
 			.cia_decrypt		= des3_ede_x86_decrypt,
 		}
 		}
 	}
 	}
-}, {
-	.cra_name		= "ecb(des3_ede)",
-	.cra_driver_name	= "ecb-des3_ede-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des3_ede_x86_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.setkey		= des3_ede_x86_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(des3_ede)",
-	.cra_driver_name	= "cbc-des3_ede-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct des3_ede_x86_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.ivsize		= DES3_EDE_BLOCK_SIZE,
-			.setkey		= des3_ede_x86_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(des3_ede)",
-	.cra_driver_name	= "ctr-des3_ede-asm",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct des3_ede_x86_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= DES3_EDE_KEY_SIZE,
-			.max_keysize	= DES3_EDE_KEY_SIZE,
-			.ivsize		= DES3_EDE_BLOCK_SIZE,
-			.setkey		= des3_ede_x86_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-} };
+};
+
+static struct skcipher_alg des3_ede_skciphers[] = {
+	{
+		.base.cra_name		= "ecb(des3_ede)",
+		.base.cra_driver_name	= "ecb-des3_ede-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct des3_ede_x86_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.setkey			= des3_ede_x86_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(des3_ede)",
+		.base.cra_driver_name	= "cbc-des3_ede-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct des3_ede_x86_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.ivsize			= DES3_EDE_BLOCK_SIZE,
+		.setkey			= des3_ede_x86_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ctr(des3_ede)",
+		.base.cra_driver_name	= "ctr-des3_ede-asm",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct des3_ede_x86_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= DES3_EDE_KEY_SIZE,
+		.max_keysize		= DES3_EDE_KEY_SIZE,
+		.ivsize			= DES3_EDE_BLOCK_SIZE,
+		.chunksize		= DES3_EDE_BLOCK_SIZE,
+		.setkey			= des3_ede_x86_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}
+};
 
 
 static bool is_blacklisted_cpu(void)
 static bool is_blacklisted_cpu(void)
 {
 {
@@ -483,17 +470,30 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
 
 
 static int __init des3_ede_x86_init(void)
 static int __init des3_ede_x86_init(void)
 {
 {
+	int err;
+
 	if (!force && is_blacklisted_cpu()) {
 	if (!force && is_blacklisted_cpu()) {
 		pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n");
 		pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n");
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}
 
 
-	return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
+	err = crypto_register_alg(&des3_ede_cipher);
+	if (err)
+		return err;
+
+	err = crypto_register_skciphers(des3_ede_skciphers,
+					ARRAY_SIZE(des3_ede_skciphers));
+	if (err)
+		crypto_unregister_alg(&des3_ede_cipher);
+
+	return err;
 }
 }
 
 
 static void __exit des3_ede_x86_fini(void)
 static void __exit des3_ede_x86_fini(void)
 {
 {
-	crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
+	crypto_unregister_alg(&des3_ede_cipher);
+	crypto_unregister_skciphers(des3_ede_skciphers,
+				    ARRAY_SIZE(des3_ede_skciphers));
 }
 }
 
 
 module_init(des3_ede_x86_init);
 module_init(des3_ede_x86_init);

+ 125 - 266
arch/x86/crypto/glue_helper.c

@@ -29,313 +29,212 @@
 #include <crypto/b128ops.h>
 #include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
 #include <crypto/gf128mul.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/skcipher.h>
-#include <crypto/lrw.h>
 #include <crypto/xts.h>
 #include <crypto/xts.h>
 #include <asm/crypto/glue_helper.h>
 #include <asm/crypto/glue_helper.h>
 
 
-static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
-				   struct blkcipher_desc *desc,
-				   struct blkcipher_walk *walk)
+int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
+			struct skcipher_request *req)
 {
 {
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
+	void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	const unsigned int bsize = 128 / 8;
 	const unsigned int bsize = 128 / 8;
-	unsigned int nbytes, i, func_bytes;
+	struct skcipher_walk walk;
 	bool fpu_enabled = false;
 	bool fpu_enabled = false;
+	unsigned int nbytes;
 	int err;
 	int err;
 
 
-	err = blkcipher_walk_virt(desc, walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	while ((nbytes = walk->nbytes)) {
-		u8 *wsrc = walk->src.virt.addr;
-		u8 *wdst = walk->dst.virt.addr;
+	while ((nbytes = walk.nbytes)) {
+		const u8 *src = walk.src.virt.addr;
+		u8 *dst = walk.dst.virt.addr;
+		unsigned int func_bytes;
+		unsigned int i;
 
 
 		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
 		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
-					     desc, fpu_enabled, nbytes);
-
+					     &walk, fpu_enabled, nbytes);
 		for (i = 0; i < gctx->num_funcs; i++) {
 		for (i = 0; i < gctx->num_funcs; i++) {
 			func_bytes = bsize * gctx->funcs[i].num_blocks;
 			func_bytes = bsize * gctx->funcs[i].num_blocks;
 
 
-			/* Process multi-block batch */
-			if (nbytes >= func_bytes) {
-				do {
-					gctx->funcs[i].fn_u.ecb(ctx, wdst,
-								wsrc);
+			if (nbytes < func_bytes)
+				continue;
 
 
-					wsrc += func_bytes;
-					wdst += func_bytes;
-					nbytes -= func_bytes;
-				} while (nbytes >= func_bytes);
+			/* Process multi-block batch */
+			do {
+				gctx->funcs[i].fn_u.ecb(ctx, dst, src);
+				src += func_bytes;
+				dst += func_bytes;
+				nbytes -= func_bytes;
+			} while (nbytes >= func_bytes);
 
 
-				if (nbytes < bsize)
-					goto done;
-			}
+			if (nbytes < bsize)
+				break;
 		}
 		}
-
-done:
-		err = blkcipher_walk_done(desc, walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
 	glue_fpu_end(fpu_enabled);
 	glue_fpu_end(fpu_enabled);
 	return err;
 	return err;
 }
 }
+EXPORT_SYMBOL_GPL(glue_ecb_req_128bit);
 
 
-int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
-			  struct blkcipher_desc *desc, struct scatterlist *dst,
-			  struct scatterlist *src, unsigned int nbytes)
+int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn,
+				struct skcipher_request *req)
 {
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return __glue_ecb_crypt_128bit(gctx, desc, &walk);
-}
-EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit);
-
-static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn,
-					      struct blkcipher_desc *desc,
-					      struct blkcipher_walk *walk)
-{
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
+	void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	const unsigned int bsize = 128 / 8;
 	const unsigned int bsize = 128 / 8;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 *iv = (u128 *)walk->iv;
-
-	do {
-		u128_xor(dst, src, iv);
-		fn(ctx, (u8 *)dst, (u8 *)dst);
-		iv = dst;
-
-		src += 1;
-		dst += 1;
-		nbytes -= bsize;
-	} while (nbytes >= bsize);
-
-	*(u128 *)walk->iv = *iv;
-	return nbytes;
-}
-
-int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
-			    struct blkcipher_desc *desc,
-			    struct scatterlist *dst,
-			    struct scatterlist *src, unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 	int err;
 
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
 	while ((nbytes = walk.nbytes)) {
 	while ((nbytes = walk.nbytes)) {
-		nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		const u128 *src = (u128 *)walk.src.virt.addr;
+		u128 *dst = (u128 *)walk.dst.virt.addr;
+		u128 *iv = (u128 *)walk.iv;
+
+		do {
+			u128_xor(dst, src, iv);
+			fn(ctx, (u8 *)dst, (u8 *)dst);
+			iv = dst;
+			src++;
+			dst++;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+		*(u128 *)walk.iv = *iv;
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
-
 	return err;
 	return err;
 }
 }
-EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit);
+EXPORT_SYMBOL_GPL(glue_cbc_encrypt_req_128bit);
 
 
-static unsigned int
-__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
-			  struct blkcipher_desc *desc,
-			  struct blkcipher_walk *walk)
+int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
+				struct skcipher_request *req)
 {
 {
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
+	void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	const unsigned int bsize = 128 / 8;
 	const unsigned int bsize = 128 / 8;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	u128 last_iv;
-	unsigned int num_blocks, func_bytes;
-	unsigned int i;
+	struct skcipher_walk walk;
+	bool fpu_enabled = false;
+	unsigned int nbytes;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	/* Start of the last block. */
-	src += nbytes / bsize - 1;
-	dst += nbytes / bsize - 1;
+	while ((nbytes = walk.nbytes)) {
+		const u128 *src = walk.src.virt.addr;
+		u128 *dst = walk.dst.virt.addr;
+		unsigned int func_bytes, num_blocks;
+		unsigned int i;
+		u128 last_iv;
 
 
-	last_iv = *src;
+		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+					     &walk, fpu_enabled, nbytes);
+		/* Start of the last block. */
+		src += nbytes / bsize - 1;
+		dst += nbytes / bsize - 1;
 
 
-	for (i = 0; i < gctx->num_funcs; i++) {
-		num_blocks = gctx->funcs[i].num_blocks;
-		func_bytes = bsize * num_blocks;
+		last_iv = *src;
 
 
-		/* Process multi-block batch */
-		if (nbytes >= func_bytes) {
+		for (i = 0; i < gctx->num_funcs; i++) {
+			num_blocks = gctx->funcs[i].num_blocks;
+			func_bytes = bsize * num_blocks;
+
+			if (nbytes < func_bytes)
+				continue;
+
+			/* Process multi-block batch */
 			do {
 			do {
-				nbytes -= func_bytes - bsize;
 				src -= num_blocks - 1;
 				src -= num_blocks - 1;
 				dst -= num_blocks - 1;
 				dst -= num_blocks - 1;
 
 
 				gctx->funcs[i].fn_u.cbc(ctx, dst, src);
 				gctx->funcs[i].fn_u.cbc(ctx, dst, src);
 
 
-				nbytes -= bsize;
+				nbytes -= func_bytes;
 				if (nbytes < bsize)
 				if (nbytes < bsize)
 					goto done;
 					goto done;
 
 
-				u128_xor(dst, dst, src - 1);
-				src -= 1;
-				dst -= 1;
+				u128_xor(dst, dst, --src);
+				dst--;
 			} while (nbytes >= func_bytes);
 			} while (nbytes >= func_bytes);
 		}
 		}
-	}
-
 done:
 done:
-	u128_xor(dst, dst, (u128 *)walk->iv);
-	*(u128 *)walk->iv = last_iv;
-
-	return nbytes;
-}
-
-int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
-			    struct blkcipher_desc *desc,
-			    struct scatterlist *dst,
-			    struct scatterlist *src, unsigned int nbytes)
-{
-	const unsigned int bsize = 128 / 8;
-	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-
-	while ((nbytes = walk.nbytes)) {
-		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
-					     desc, fpu_enabled, nbytes);
-		nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		u128_xor(dst, dst, (u128 *)walk.iv);
+		*(u128 *)walk.iv = last_iv;
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
 	glue_fpu_end(fpu_enabled);
 	glue_fpu_end(fpu_enabled);
 	return err;
 	return err;
 }
 }
-EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
+EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit);
 
 
-static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
-					struct blkcipher_desc *desc,
-					struct blkcipher_walk *walk)
+int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
+			struct skcipher_request *req)
 {
 {
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
-	u8 *src = (u8 *)walk->src.virt.addr;
-	u8 *dst = (u8 *)walk->dst.virt.addr;
-	unsigned int nbytes = walk->nbytes;
-	le128 ctrblk;
-	u128 tmp;
+	void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+	const unsigned int bsize = 128 / 8;
+	struct skcipher_walk walk;
+	bool fpu_enabled = false;
+	unsigned int nbytes;
+	int err;
 
 
-	be128_to_le128(&ctrblk, (be128 *)walk->iv);
+	err = skcipher_walk_virt(&walk, req, false);
 
 
-	memcpy(&tmp, src, nbytes);
-	fn_ctr(ctx, &tmp, &tmp, &ctrblk);
-	memcpy(dst, &tmp, nbytes);
+	while ((nbytes = walk.nbytes) >= bsize) {
+		const u128 *src = walk.src.virt.addr;
+		u128 *dst = walk.dst.virt.addr;
+		unsigned int func_bytes, num_blocks;
+		unsigned int i;
+		le128 ctrblk;
 
 
-	le128_to_be128((be128 *)walk->iv, &ctrblk);
-}
+		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+					     &walk, fpu_enabled, nbytes);
 
 
-static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
-					    struct blkcipher_desc *desc,
-					    struct blkcipher_walk *walk)
-{
-	const unsigned int bsize = 128 / 8;
-	void *ctx = crypto_blkcipher_ctx(desc->tfm);
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	le128 ctrblk;
-	unsigned int num_blocks, func_bytes;
-	unsigned int i;
+		be128_to_le128(&ctrblk, (be128 *)walk.iv);
 
 
-	be128_to_le128(&ctrblk, (be128 *)walk->iv);
+		for (i = 0; i < gctx->num_funcs; i++) {
+			num_blocks = gctx->funcs[i].num_blocks;
+			func_bytes = bsize * num_blocks;
 
 
-	/* Process multi-block batch */
-	for (i = 0; i < gctx->num_funcs; i++) {
-		num_blocks = gctx->funcs[i].num_blocks;
-		func_bytes = bsize * num_blocks;
+			if (nbytes < func_bytes)
+				continue;
 
 
-		if (nbytes >= func_bytes) {
+			/* Process multi-block batch */
 			do {
 			do {
 				gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
 				gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
-
 				src += num_blocks;
 				src += num_blocks;
 				dst += num_blocks;
 				dst += num_blocks;
 				nbytes -= func_bytes;
 				nbytes -= func_bytes;
 			} while (nbytes >= func_bytes);
 			} while (nbytes >= func_bytes);
 
 
 			if (nbytes < bsize)
 			if (nbytes < bsize)
-				goto done;
+				break;
 		}
 		}
-	}
-
-done:
-	le128_to_be128((be128 *)walk->iv, &ctrblk);
-	return nbytes;
-}
-
-int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
-			  struct blkcipher_desc *desc, struct scatterlist *dst,
-			  struct scatterlist *src, unsigned int nbytes)
-{
-	const unsigned int bsize = 128 / 8;
-	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, bsize);
 
 
-	while ((nbytes = walk.nbytes) >= bsize) {
-		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
-					     desc, fpu_enabled, nbytes);
-		nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		le128_to_be128((be128 *)walk.iv, &ctrblk);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	}
 
 
 	glue_fpu_end(fpu_enabled);
 	glue_fpu_end(fpu_enabled);
 
 
-	if (walk.nbytes) {
-		glue_ctr_crypt_final_128bit(
-			gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit);
-
-static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
-					    void *ctx,
-					    struct blkcipher_desc *desc,
-					    struct blkcipher_walk *walk)
-{
-	const unsigned int bsize = 128 / 8;
-	unsigned int nbytes = walk->nbytes;
-	u128 *src = (u128 *)walk->src.virt.addr;
-	u128 *dst = (u128 *)walk->dst.virt.addr;
-	unsigned int num_blocks, func_bytes;
-	unsigned int i;
-
-	/* Process multi-block batch */
-	for (i = 0; i < gctx->num_funcs; i++) {
-		num_blocks = gctx->funcs[i].num_blocks;
-		func_bytes = bsize * num_blocks;
-
-		if (nbytes >= func_bytes) {
-			do {
-				gctx->funcs[i].fn_u.xts(ctx, dst, src,
-							(le128 *)walk->iv);
+	if (nbytes) {
+		le128 ctrblk;
+		u128 tmp;
 
 
-				src += num_blocks;
-				dst += num_blocks;
-				nbytes -= func_bytes;
-			} while (nbytes >= func_bytes);
+		be128_to_le128(&ctrblk, (be128 *)walk.iv);
+		memcpy(&tmp, walk.src.virt.addr, nbytes);
+		gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, &tmp, &tmp,
+							  &ctrblk);
+		memcpy(walk.dst.virt.addr, &tmp, nbytes);
+		le128_to_be128((be128 *)walk.iv, &ctrblk);
 
 
-			if (nbytes < bsize)
-				goto done;
-		}
+		err = skcipher_walk_done(&walk, 0);
 	}
 	}
 
 
-done:
-	return nbytes;
+	return err;
 }
 }
+EXPORT_SYMBOL_GPL(glue_ctr_req_128bit);
 
 
 static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 					  void *ctx,
 					  void *ctx,
@@ -372,46 +271,6 @@ done:
 	return nbytes;
 	return nbytes;
 }
 }
 
 
-/* for implementations implementing faster XTS IV generator */
-int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
-			  struct blkcipher_desc *desc, struct scatterlist *dst,
-			  struct scatterlist *src, unsigned int nbytes,
-			  void (*tweak_fn)(void *ctx, u8 *dst, const u8 *src),
-			  void *tweak_ctx, void *crypt_ctx)
-{
-	const unsigned int bsize = 128 / 8;
-	bool fpu_enabled = false;
-	struct blkcipher_walk walk;
-	int err;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-
-	err = blkcipher_walk_virt(desc, &walk);
-	nbytes = walk.nbytes;
-	if (!nbytes)
-		return err;
-
-	/* set minimum length to bsize, for tweak_fn */
-	fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
-				     desc, fpu_enabled,
-				     nbytes < bsize ? bsize : nbytes);
-
-	/* calculate first value of T */
-	tweak_fn(tweak_ctx, walk.iv, walk.iv);
-
-	while (nbytes) {
-		nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
-
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-		nbytes = walk.nbytes;
-	}
-
-	glue_fpu_end(fpu_enabled);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
-
 int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 			struct skcipher_request *req,
 			struct skcipher_request *req,
 			common_glue_func_t tweak_fn, void *tweak_ctx,
 			common_glue_func_t tweak_fn, void *tweak_ctx,
@@ -429,9 +288,9 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 		return err;
 		return err;
 
 
 	/* set minimum length to bsize, for tweak_fn */
 	/* set minimum length to bsize, for tweak_fn */
-	fpu_enabled = glue_skwalk_fpu_begin(bsize, gctx->fpu_blocks_limit,
-					    &walk, fpu_enabled,
-					    nbytes < bsize ? bsize : nbytes);
+	fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+				     &walk, fpu_enabled,
+				     nbytes < bsize ? bsize : nbytes);
 
 
 	/* calculate first value of T */
 	/* calculate first value of T */
 	tweak_fn(tweak_ctx, walk.iv, walk.iv);
 	tweak_fn(tweak_ctx, walk.iv, walk.iv);

+ 97 - 381
arch/x86/crypto/serpent_avx2_glue.c

@@ -14,15 +14,12 @@
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
 #include <crypto/algapi.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
+#include <crypto/internal/simd.h>
 #include <crypto/serpent.h>
 #include <crypto/serpent.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/serpent-avx.h>
+#include <crypto/xts.h>
 #include <asm/crypto/glue_helper.h>
 #include <asm/crypto/glue_helper.h>
+#include <asm/crypto/serpent-avx.h>
 
 
 #define SERPENT_AVX2_PARALLEL_BLOCKS 16
 #define SERPENT_AVX2_PARALLEL_BLOCKS 16
 
 
@@ -40,6 +37,12 @@ asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst,
 asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst,
 asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst,
 				      const u8 *src, le128 *iv);
 				      const u8 *src, le128 *iv);
 
 
+static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
+}
+
 static const struct common_glue_ctx serpent_enc = {
 static const struct common_glue_ctx serpent_enc = {
 	.num_funcs = 3,
 	.num_funcs = 3,
 	.fpu_blocks_limit = 8,
 	.fpu_blocks_limit = 8,
@@ -136,403 +139,113 @@ static const struct common_glue_ctx serpent_dec_xts = {
 	} }
 	} }
 };
 };
 
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
 {
-	return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_enc, req);
 }
 }
 
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
 {
-	return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_dec, req);
 }
 }
 
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
-				       dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+					   req);
 }
 }
 
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
 {
-	return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
 }
 }
 
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
 {
-	return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+	return glue_ctr_req_128bit(&serpent_ctr, req);
 }
 }
 
 
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
 {
-	/* since reusing AVX functions, starts using FPU at 8 parallel blocks */
-	return glue_fpu_begin(SERPENT_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes);
-}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 
-static inline void serpent_fpu_end(bool fpu_enabled)
-{
-	glue_fpu_end(fpu_enabled);
+	return glue_xts_req_128bit(&serpent_enc_xts, req,
+				   XTS_TWEAK_CAST(__serpent_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 }
 
 
-struct crypt_priv {
-	struct serpent_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
 {
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) {
-		serpent_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
-		nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
-	}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 
-	while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) {
-		serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * SERPENT_PARALLEL_BLOCKS;
-		nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_encrypt(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&serpent_dec_xts, req,
+				   XTS_TWEAK_CAST(__serpent_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 }
 
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) {
-		serpent_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
-		nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
-	}
-
-	while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) {
-		serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
-		srcdst += bsize * SERPENT_PARALLEL_BLOCKS;
-		nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__serpent_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__serpent_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg srp_algs[10] = { {
-	.cra_name		= "__ecb-serpent-avx2",
-	.cra_driver_name	= "__driver-ecb-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[0].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-serpent-avx2",
-	.cra_driver_name	= "__driver-cbc-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[1].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-serpent-avx2",
-	.cra_driver_name	= "__driver-ctr-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[2].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-serpent-avx2",
-	.cra_driver_name	= "__driver-lrw-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[3].cra_list),
-	.cra_exit		= lrw_serpent_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= lrw_serpent_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-serpent-avx2",
-	.cra_driver_name	= "__driver-xts-serpent-avx2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[4].cra_list),
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= xts_serpent_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(serpent)",
-	.cra_driver_name	= "ecb-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[5].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(serpent)",
-	.cra_driver_name	= "cbc-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[6].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(serpent)",
-	.cra_driver_name	= "ctr-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[7].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(serpent)",
-	.cra_driver_name	= "lrw-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[8].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(serpent)",
-	.cra_driver_name	= "xts-serpent-avx2",
-	.cra_priority		= 600,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(srp_algs[9].cra_list),
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg serpent_algs[] = {
+	{
+		.base.cra_name		= "__ecb(serpent)",
+		.base.cra_driver_name	= "__ecb-serpent-avx2",
+		.base.cra_priority	= 600,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(serpent)",
+		.base.cra_driver_name	= "__cbc-serpent-avx2",
+		.base.cra_priority	= 600,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(serpent)",
+		.base.cra_driver_name	= "__ctr-serpent-avx2",
+		.base.cra_priority	= 600,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.chunksize		= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(serpent)",
+		.base.cra_driver_name	= "__xts-serpent-avx2",
+		.base.cra_priority	= 600,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= 2 * SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= xts_serpent_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
 	},
-} };
+};
+
+static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
 
 
 static int __init init(void)
 static int __init init(void)
 {
 {
@@ -548,12 +261,15 @@ static int __init init(void)
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}
 
 
-	return crypto_register_algs(srp_algs, ARRAY_SIZE(srp_algs));
+	return simd_register_skciphers_compat(serpent_algs,
+					      ARRAY_SIZE(serpent_algs),
+					      serpent_simd_algs);
 }
 }
 
 
 static void __exit fini(void)
 static void __exit fini(void)
 {
 {
-	crypto_unregister_algs(srp_algs, ARRAY_SIZE(srp_algs));
+	simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
+				  serpent_simd_algs);
 }
 }
 
 
 module_init(init);
 module_init(init);

+ 115 - 403
arch/x86/crypto/serpent_avx_glue.c

@@ -24,21 +24,15 @@
  */
  */
 
 
 #include <linux/module.h>
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
 #include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
 #include <crypto/serpent.h>
 #include <crypto/serpent.h>
-#include <crypto/cryptd.h>
-#include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
 #include <crypto/xts.h>
 #include <crypto/xts.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/serpent-avx.h>
 #include <asm/crypto/glue_helper.h>
 #include <asm/crypto/glue_helper.h>
+#include <asm/crypto/serpent-avx.h>
 
 
 /* 8-way parallel cipher functions */
 /* 8-way parallel cipher functions */
 asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
 asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
@@ -91,6 +85,31 @@ void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 }
 }
 EXPORT_SYMBOL_GPL(serpent_xts_dec);
 EXPORT_SYMBOL_GPL(serpent_xts_dec);
 
 
+static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
+}
+
+int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
+		       unsigned int keylen)
+{
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	/* first half of xts-key is for crypt */
+	err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
+}
+EXPORT_SYMBOL_GPL(xts_serpent_setkey);
 
 
 static const struct common_glue_ctx serpent_enc = {
 static const struct common_glue_ctx serpent_enc = {
 	.num_funcs = 2,
 	.num_funcs = 2,
@@ -170,423 +189,113 @@ static const struct common_glue_ctx serpent_dec_xts = {
 	} }
 	} }
 };
 };
 
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
 {
-	return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_enc, req);
 }
 }
 
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
 {
-	return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_dec, req);
 }
 }
 
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
-				     dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+					   req);
 }
 }
 
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
 {
-	return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
 }
 }
 
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
 {
-	return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+	return glue_ctr_req_128bit(&serpent_ctr, req);
 }
 }
 
 
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
 {
-	return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
-			      NULL, fpu_enabled, nbytes);
-}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 
-static inline void serpent_fpu_end(bool fpu_enabled)
-{
-	glue_fpu_end(fpu_enabled);
+	return glue_xts_req_128bit(&serpent_enc_xts, req,
+				   XTS_TWEAK_CAST(__serpent_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 }
 
 
-struct crypt_priv {
-	struct serpent_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
-		serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_encrypt(ctx->ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
 {
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 
-	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
-		serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-		       unsigned int keylen)
-{
-	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
-							SERPENT_BLOCK_SIZE);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table, key + keylen -
-						SERPENT_BLOCK_SIZE);
-}
-EXPORT_SYMBOL_GPL(lrw_serpent_setkey);
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
+	return glue_xts_req_128bit(&serpent_dec_xts, req,
+				   XTS_TWEAK_CAST(__serpent_encrypt),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 }
 
 
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-void lrw_serpent_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
-}
-EXPORT_SYMBOL_GPL(lrw_serpent_exit_tfm);
-
-int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-		       unsigned int keylen)
-{
-	struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = xts_check_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	/* first half of xts-key is for crypt */
-	err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
-}
-EXPORT_SYMBOL_GPL(xts_serpent_setkey);
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__serpent_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(__serpent_encrypt),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg serpent_algs[10] = { {
-	.cra_name		= "__ecb-serpent-avx",
-	.cra_driver_name	= "__driver-ecb-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-serpent-avx",
-	.cra_driver_name	= "__driver-cbc-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-serpent-avx",
-	.cra_driver_name	= "__driver-ctr-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-serpent-avx",
-	.cra_driver_name	= "__driver-lrw-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_serpent_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= lrw_serpent_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-serpent-avx",
-	.cra_driver_name	= "__driver-xts-serpent-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= xts_serpent_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(serpent)",
-	.cra_driver_name	= "ecb-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(serpent)",
-	.cra_driver_name	= "cbc-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(serpent)",
-	.cra_driver_name	= "ctr-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(serpent)",
-	.cra_driver_name	= "lrw-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(serpent)",
-	.cra_driver_name	= "xts-serpent-avx",
-	.cra_priority		= 500,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg serpent_algs[] = {
+	{
+		.base.cra_name		= "__ecb(serpent)",
+		.base.cra_driver_name	= "__ecb-serpent-avx",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(serpent)",
+		.base.cra_driver_name	= "__cbc-serpent-avx",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(serpent)",
+		.base.cra_driver_name	= "__ctr-serpent-avx",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.chunksize		= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(serpent)",
+		.base.cra_driver_name	= "__xts-serpent-avx",
+		.base.cra_priority	= 500,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= 2 * SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= xts_serpent_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
 	},
-} };
+};
+
+static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
 
 
 static int __init serpent_init(void)
 static int __init serpent_init(void)
 {
 {
@@ -598,12 +307,15 @@ static int __init serpent_init(void)
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}
 
 
-	return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+	return simd_register_skciphers_compat(serpent_algs,
+					      ARRAY_SIZE(serpent_algs),
+					      serpent_simd_algs);
 }
 }
 
 
 static void __exit serpent_exit(void)
 static void __exit serpent_exit(void)
 {
 {
-	crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+	simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
+				  serpent_simd_algs);
 }
 }
 
 
 module_init(serpent_init);
 module_init(serpent_init);

+ 69 - 450
arch/x86/crypto/serpent_sse2_glue.c

@@ -30,21 +30,22 @@
  */
  */
 
 
 #include <linux/module.h>
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
 #include <crypto/algapi.h>
-#include <crypto/serpent.h>
-#include <crypto/cryptd.h>
 #include <crypto/b128ops.h>
 #include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
+#include <crypto/internal/simd.h>
+#include <crypto/serpent.h>
 #include <asm/crypto/serpent-sse2.h>
 #include <asm/crypto/serpent-sse2.h>
 #include <asm/crypto/glue_helper.h>
 #include <asm/crypto/glue_helper.h>
 
 
+static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
+}
+
 static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
 static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
 {
 {
 	u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
 	u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
@@ -139,464 +140,79 @@ static const struct common_glue_ctx serpent_dec_cbc = {
 	} }
 	} }
 };
 };
 
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
-				     dst, src, nbytes);
+	return glue_ecb_req_128bit(&serpent_enc, req);
 }
 }
 
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
 {
-	return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_ecb_req_128bit(&serpent_dec, req);
 }
 }
 
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
 {
-	return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+					   req);
 }
 }
 
 
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
 {
-	return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
-			      NULL, fpu_enabled, nbytes);
+	return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
 }
 }
 
 
-static inline void serpent_fpu_end(bool fpu_enabled)
+static int ctr_crypt(struct skcipher_request *req)
 {
 {
-	glue_fpu_end(fpu_enabled);
+	return glue_ctr_req_128bit(&serpent_ctr, req);
 }
 }
 
 
-struct crypt_priv {
-	struct serpent_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
-		serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_encrypt(ctx->ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = SERPENT_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
-		serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		__serpent_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-struct serpent_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct serpent_ctx serpent_ctx;
-};
-
-static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
-							SERPENT_BLOCK_SIZE);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table, key + keylen -
-						SERPENT_BLOCK_SIZE);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->serpent_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static void lrw_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
-}
-
-struct serpent_xts_ctx {
-	struct serpent_ctx tweak_ctx;
-	struct serpent_ctx crypt_ctx;
+static struct skcipher_alg serpent_algs[] = {
+	{
+		.base.cra_name		= "__ecb(serpent)",
+		.base.cra_driver_name	= "__ecb-serpent-sse2",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(serpent)",
+		.base.cra_driver_name	= "__cbc-serpent-sse2",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(serpent)",
+		.base.cra_driver_name	= "__ctr-serpent-sse2",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= SERPENT_MIN_KEY_SIZE,
+		.max_keysize		= SERPENT_MAX_KEY_SIZE,
+		.ivsize			= SERPENT_BLOCK_SIZE,
+		.chunksize		= SERPENT_BLOCK_SIZE,
+		.setkey			= serpent_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	},
 };
 };
 
 
-static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen)
-{
-	struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = xts_check_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	/* first half of xts-key is for crypt */
-	err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->crypt_ctx,
-		.fpu_enabled = false,
-	};
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = xts_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[SERPENT_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->crypt_ctx,
-		.fpu_enabled = false,
-	};
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = xts_crypt(desc, dst, src, nbytes, &req);
-	serpent_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static struct crypto_alg serpent_algs[10] = { {
-	.cra_name		= "__ecb-serpent-sse2",
-	.cra_driver_name	= "__driver-ecb-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-serpent-sse2",
-	.cra_driver_name	= "__driver-cbc-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-serpent-sse2",
-	.cra_driver_name	= "__driver-ctr-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct serpent_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= serpent_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-serpent-sse2",
-	.cra_driver_name	= "__driver-lrw-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= lrw_serpent_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-serpent-sse2",
-	.cra_driver_name	= "__driver-xts-serpent-sse2",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= xts_serpent_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(serpent)",
-	.cra_driver_name	= "ecb-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(serpent)",
-	.cra_driver_name	= "cbc-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(serpent)",
-	.cra_driver_name	= "ctr-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(serpent)",
-	.cra_driver_name	= "lrw-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE +
-					  SERPENT_BLOCK_SIZE,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(serpent)",
-	.cra_driver_name	= "xts-serpent-sse2",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= SERPENT_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
-			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
-			.ivsize		= SERPENT_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-} };
+static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
 
 
 static int __init serpent_sse2_init(void)
 static int __init serpent_sse2_init(void)
 {
 {
@@ -605,12 +221,15 @@ static int __init serpent_sse2_init(void)
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}
 
 
-	return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+	return simd_register_skciphers_compat(serpent_algs,
+					      ARRAY_SIZE(serpent_algs),
+					      serpent_simd_algs);
 }
 }
 
 
 static void __exit serpent_sse2_exit(void)
 static void __exit serpent_sse2_exit(void)
 {
 {
-	crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+	simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
+				  serpent_simd_algs);
 }
 }
 
 
 module_init(serpent_sse2_init);
 module_init(serpent_sse2_init);

+ 3 - 25
arch/x86/crypto/sha1-mb/sha1_mb.c

@@ -106,13 +106,6 @@ static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)
 static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)
 static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)
 						(struct sha1_mb_mgr *state);
 						(struct sha1_mb_mgr *state);
 
 
-static inline void sha1_init_digest(uint32_t *digest)
-{
-	static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0,
-					SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 };
-	memcpy(digest, initial_digest, sizeof(initial_digest));
-}
-
 static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
 static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
 			 uint64_t total_len)
 			 uint64_t total_len)
 {
 {
@@ -244,11 +237,8 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
 					  uint32_t len,
 					  uint32_t len,
 					  int flags)
 					  int flags)
 {
 {
-	if (flags & (~HASH_ENTIRE)) {
-		/*
-		 * User should not pass anything other than FIRST, UPDATE, or
-		 * LAST
-		 */
+	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
+		/* User should not pass anything other than UPDATE or LAST */
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
 		return ctx;
 		return ctx;
 	}
 	}
@@ -259,24 +249,12 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
 		return ctx;
 		return ctx;
 	}
 	}
 
 
-	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+	if (ctx->status & HASH_CTX_STS_COMPLETE) {
 		/* Cannot update a finished job. */
 		/* Cannot update a finished job. */
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
 		return ctx;
 		return ctx;
 	}
 	}
 
 
-
-	if (flags & HASH_FIRST) {
-		/* Init digest */
-		sha1_init_digest(ctx->job.result_digest);
-
-		/* Reset byte counter */
-		ctx->total_length = 0;
-
-		/* Clear extra blocks */
-		ctx->partial_block_buffer_length = 0;
-	}
-
 	/*
 	/*
 	 * If we made it here, there were no errors during this call to
 	 * If we made it here, there were no errors during this call to
 	 * submit
 	 * submit

+ 3 - 5
arch/x86/crypto/sha1-mb/sha1_mb_ctx.h

@@ -57,11 +57,9 @@
 #include "sha1_mb_mgr.h"
 #include "sha1_mb_mgr.h"
 
 
 #define HASH_UPDATE          0x00
 #define HASH_UPDATE          0x00
-#define HASH_FIRST           0x01
-#define HASH_LAST            0x02
-#define HASH_ENTIRE          0x03
-#define HASH_DONE	     0x04
-#define HASH_FINAL	     0x08
+#define HASH_LAST            0x01
+#define HASH_DONE	     0x02
+#define HASH_FINAL	     0x04
 
 
 #define HASH_CTX_STS_IDLE       0x00
 #define HASH_CTX_STS_IDLE       0x00
 #define HASH_CTX_STS_PROCESSING 0x01
 #define HASH_CTX_STS_PROCESSING 0x01

+ 3 - 24
arch/x86/crypto/sha256-mb/sha256_mb.c

@@ -106,14 +106,6 @@ static asmlinkage struct job_sha256* (*sha256_job_mgr_flush)
 static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job)
 static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job)
 			(struct sha256_mb_mgr *state);
 			(struct sha256_mb_mgr *state);
 
 
-inline void sha256_init_digest(uint32_t *digest)
-{
-	static const uint32_t initial_digest[SHA256_DIGEST_LENGTH] = {
-				SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
-				SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7};
-	memcpy(digest, initial_digest, sizeof(initial_digest));
-}
-
 inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
 inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
 			 uint64_t total_len)
 			 uint64_t total_len)
 {
 {
@@ -245,10 +237,8 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
 					  uint32_t len,
 					  uint32_t len,
 					  int flags)
 					  int flags)
 {
 {
-	if (flags & (~HASH_ENTIRE)) {
-		/* User should not pass anything other than FIRST, UPDATE
-		 * or LAST
-		 */
+	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
+		/* User should not pass anything other than UPDATE or LAST */
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
 		return ctx;
 		return ctx;
 	}
 	}
@@ -259,23 +249,12 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
 		return ctx;
 		return ctx;
 	}
 	}
 
 
-	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+	if (ctx->status & HASH_CTX_STS_COMPLETE) {
 		/* Cannot update a finished job. */
 		/* Cannot update a finished job. */
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
 		return ctx;
 		return ctx;
 	}
 	}
 
 
-	if (flags & HASH_FIRST) {
-		/* Init digest */
-		sha256_init_digest(ctx->job.result_digest);
-
-		/* Reset byte counter */
-		ctx->total_length = 0;
-
-		/* Clear extra blocks */
-		ctx->partial_block_buffer_length = 0;
-	}
-
 	/* If we made it here, there was no error during this call to submit */
 	/* If we made it here, there was no error during this call to submit */
 	ctx->error = HASH_CTX_ERROR_NONE;
 	ctx->error = HASH_CTX_ERROR_NONE;
 
 

+ 3 - 5
arch/x86/crypto/sha256-mb/sha256_mb_ctx.h

@@ -57,11 +57,9 @@
 #include "sha256_mb_mgr.h"
 #include "sha256_mb_mgr.h"
 
 
 #define HASH_UPDATE          0x00
 #define HASH_UPDATE          0x00
-#define HASH_FIRST           0x01
-#define HASH_LAST            0x02
-#define HASH_ENTIRE          0x03
-#define HASH_DONE	     0x04
-#define HASH_FINAL	     0x08
+#define HASH_LAST            0x01
+#define HASH_DONE	     0x02
+#define HASH_FINAL	     0x04
 
 
 #define HASH_CTX_STS_IDLE       0x00
 #define HASH_CTX_STS_IDLE       0x00
 #define HASH_CTX_STS_PROCESSING 0x01
 #define HASH_CTX_STS_PROCESSING 0x01

+ 3 - 27
arch/x86/crypto/sha512-mb/sha512_mb.c

@@ -107,15 +107,6 @@ static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
 static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
 static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
 						(struct sha512_mb_mgr *state);
 						(struct sha512_mb_mgr *state);
 
 
-inline void sha512_init_digest(uint64_t *digest)
-{
-	static const uint64_t initial_digest[SHA512_DIGEST_LENGTH] = {
-					SHA512_H0, SHA512_H1, SHA512_H2,
-					SHA512_H3, SHA512_H4, SHA512_H5,
-					SHA512_H6, SHA512_H7 };
-	memcpy(digest, initial_digest, sizeof(initial_digest));
-}
-
 inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
 inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
 			 uint64_t total_len)
 			 uint64_t total_len)
 {
 {
@@ -263,11 +254,8 @@ static struct sha512_hash_ctx
 
 
 	mgr = cstate->mgr;
 	mgr = cstate->mgr;
 	spin_lock_irqsave(&cstate->work_lock, irqflags);
 	spin_lock_irqsave(&cstate->work_lock, irqflags);
-	if (flags & (~HASH_ENTIRE)) {
-		/*
-		 * User should not pass anything other than FIRST, UPDATE, or
-		 * LAST
-		 */
+	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
+		/* User should not pass anything other than UPDATE or LAST */
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
 		goto unlock;
 		goto unlock;
 	}
 	}
@@ -278,24 +266,12 @@ static struct sha512_hash_ctx
 		goto unlock;
 		goto unlock;
 	}
 	}
 
 
-	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+	if (ctx->status & HASH_CTX_STS_COMPLETE) {
 		/* Cannot update a finished job. */
 		/* Cannot update a finished job. */
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
 		goto unlock;
 		goto unlock;
 	}
 	}
 
 
-
-	if (flags & HASH_FIRST) {
-		/* Init digest */
-		sha512_init_digest(ctx->job.result_digest);
-
-		/* Reset byte counter */
-		ctx->total_length = 0;
-
-		/* Clear extra blocks */
-		ctx->partial_block_buffer_length = 0;
-	}
-
 	/*
 	/*
 	 * If we made it here, there were no errors during this call to
 	 * If we made it here, there were no errors during this call to
 	 * submit
 	 * submit

+ 3 - 5
arch/x86/crypto/sha512-mb/sha512_mb_ctx.h

@@ -57,11 +57,9 @@
 #include "sha512_mb_mgr.h"
 #include "sha512_mb_mgr.h"
 
 
 #define HASH_UPDATE          0x00
 #define HASH_UPDATE          0x00
-#define HASH_FIRST           0x01
-#define HASH_LAST            0x02
-#define HASH_ENTIRE          0x03
-#define HASH_DONE            0x04
-#define HASH_FINAL           0x08
+#define HASH_LAST            0x01
+#define HASH_DONE            0x02
+#define HASH_FINAL           0x04
 
 
 #define HASH_CTX_STS_IDLE       0x00
 #define HASH_CTX_STS_IDLE       0x00
 #define HASH_CTX_STS_PROCESSING 0x01
 #define HASH_CTX_STS_PROCESSING 0x01

+ 121 - 372
arch/x86/crypto/twofish_avx_glue.c

@@ -24,24 +24,15 @@
  */
  */
 
 
 #include <linux/module.h>
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
 #include <linux/err.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/algapi.h>
 #include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
 #include <crypto/twofish.h>
 #include <crypto/twofish.h>
-#include <crypto/cryptd.h>
-#include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
 #include <crypto/xts.h>
 #include <crypto/xts.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/twofish.h>
 #include <asm/crypto/glue_helper.h>
 #include <asm/crypto/glue_helper.h>
-#include <crypto/scatterwalk.h>
-#include <linux/workqueue.h>
-#include <linux/spinlock.h>
+#include <asm/crypto/twofish.h>
 
 
 #define TWOFISH_PARALLEL_BLOCKS 8
 #define TWOFISH_PARALLEL_BLOCKS 8
 
 
@@ -61,6 +52,12 @@ asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
 asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
 asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
 				     const u8 *src, le128 *iv);
 				     const u8 *src, le128 *iv);
 
 
+static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return twofish_setkey(&tfm->base, key, keylen);
+}
+
 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
 					const u8 *src)
 					const u8 *src)
 {
 {
@@ -79,6 +76,31 @@ static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 				  GLUE_FUNC_CAST(twofish_dec_blk));
 				  GLUE_FUNC_CAST(twofish_dec_blk));
 }
 }
 
 
+struct twofish_xts_ctx {
+	struct twofish_ctx tweak_ctx;
+	struct twofish_ctx crypt_ctx;
+};
+
+static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	u32 *flags = &tfm->base.crt_flags;
+	int err;
+
+	err = xts_verify_key(tfm, key, keylen);
+	if (err)
+		return err;
+
+	/* first half of xts-key is for crypt */
+	err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
+				flags);
+}
 
 
 static const struct common_glue_ctx twofish_enc = {
 static const struct common_glue_ctx twofish_enc = {
 	.num_funcs = 3,
 	.num_funcs = 3,
@@ -170,389 +192,113 @@ static const struct common_glue_ctx twofish_dec_xts = {
 	} }
 	} }
 };
 };
 
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
 {
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
-				       dst, src, nbytes);
+	return glue_ecb_req_128bit(&twofish_enc, req);
 }
 }
 
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
 {
-	return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
-				       nbytes);
+	return glue_ecb_req_128bit(&twofish_dec, req);
 }
 }
 
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
 {
-	return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
+					   req);
 }
 }
 
 
-static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
 {
-	return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL,
-			      fpu_enabled, nbytes);
+	return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
 }
 }
 
 
-static inline void twofish_fpu_end(bool fpu_enabled)
+static int ctr_crypt(struct skcipher_request *req)
 {
 {
-	glue_fpu_end(fpu_enabled);
+	return glue_ctr_req_128bit(&twofish_ctr, req);
 }
 }
 
 
-struct crypt_priv {
-	struct twofish_ctx *ctx;
-	bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
 {
-	const unsigned int bsize = TF_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
-		twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
-		twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 
-	nbytes %= bsize * 3;
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		twofish_enc_blk(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&twofish_enc_xts, req,
+				   XTS_TWEAK_CAST(twofish_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 }
 
 
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
 {
-	const unsigned int bsize = TF_BLOCK_SIZE;
-	struct crypt_priv *ctx = priv;
-	int i;
-
-	ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
-
-	if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
-		twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
-		return;
-	}
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 
-	for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
-		twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
-
-	nbytes %= bsize * 3;
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		twofish_dec_blk(ctx->ctx, srcdst, srcdst);
+	return glue_xts_req_128bit(&twofish_dec_xts, req,
+				   XTS_TWEAK_CAST(twofish_enc_blk),
+				   &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 }
 
 
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[TWOFISH_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->twofish_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	twofish_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[TWOFISH_PARALLEL_BLOCKS];
-	struct crypt_priv crypt_ctx = {
-		.ctx = &ctx->twofish_ctx,
-		.fpu_enabled = false,
-	};
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	twofish_fpu_end(crypt_ctx.fpu_enabled);
-
-	return ret;
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(twofish_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(twofish_enc_blk),
-				     &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg twofish_algs[10] = { {
-	.cra_name		= "__ecb-twofish-avx",
-	.cra_driver_name	= "__driver-ecb-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-twofish-avx",
-	.cra_driver_name	= "__driver-cbc-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__ctr-twofish-avx",
-	.cra_driver_name	= "__driver-ctr-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "__lrw-twofish-avx",
-	.cra_driver_name	= "__driver-lrw-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_twofish_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE +
-					  TF_BLOCK_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE +
-					  TF_BLOCK_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= lrw_twofish_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-twofish-avx",
-	.cra_driver_name	= "__driver-xts-twofish-avx",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE * 2,
-			.max_keysize	= TF_MAX_KEY_SIZE * 2,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= xts_twofish_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(twofish)",
-	.cra_driver_name	= "ecb-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(twofish)",
-	.cra_driver_name	= "cbc-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= __ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(twofish)",
-	.cra_driver_name	= "ctr-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
-		},
-	},
-}, {
-	.cra_name		= "lrw(twofish)",
-	.cra_driver_name	= "lrw-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE +
-					  TF_BLOCK_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE +
-					  TF_BLOCK_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(twofish)",
-	.cra_driver_name	= "xts-twofish-avx",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE * 2,
-			.max_keysize	= TF_MAX_KEY_SIZE * 2,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+static struct skcipher_alg twofish_algs[] = {
+	{
+		.base.cra_name		= "__ecb(twofish)",
+		.base.cra_driver_name	= "__ecb-twofish-avx",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "__cbc(twofish)",
+		.base.cra_driver_name	= "__cbc-twofish-avx",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(twofish)",
+		.base.cra_driver_name	= "__ctr-twofish-avx",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.chunksize		= TF_BLOCK_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
+	}, {
+		.base.cra_name		= "__xts(twofish)",
+		.base.cra_driver_name	= "__xts-twofish-avx",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_xts_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= 2 * TF_MIN_KEY_SIZE,
+		.max_keysize		= 2 * TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.setkey			= xts_twofish_setkey,
+		.encrypt		= xts_encrypt,
+		.decrypt		= xts_decrypt,
 	},
 	},
-} };
+};
+
+static struct simd_skcipher_alg *twofish_simd_algs[ARRAY_SIZE(twofish_algs)];
 
 
 static int __init twofish_init(void)
 static int __init twofish_init(void)
 {
 {
@@ -563,12 +309,15 @@ static int __init twofish_init(void)
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}
 
 
-	return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
+	return simd_register_skciphers_compat(twofish_algs,
+					      ARRAY_SIZE(twofish_algs),
+					      twofish_simd_algs);
 }
 }
 
 
 static void __exit twofish_exit(void)
 static void __exit twofish_exit(void)
 {
 {
-	crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
+	simd_unregister_skciphers(twofish_algs, ARRAY_SIZE(twofish_algs),
+				  twofish_simd_algs);
 }
 }
 
 
 module_init(twofish_init);
 module_init(twofish_init);

+ 67 - 272
arch/x86/crypto/twofish_glue_3way.c

@@ -20,22 +20,26 @@
  *
  *
  */
  */
 
 
-#include <asm/processor.h>
+#include <asm/crypto/glue_helper.h>
+#include <asm/crypto/twofish.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/twofish.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/types.h>
-#include <crypto/algapi.h>
-#include <crypto/twofish.h>
-#include <crypto/b128ops.h>
-#include <asm/crypto/twofish.h>
-#include <asm/crypto/glue_helper.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
 
 
 EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
 EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
 EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
 EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
 
 
+static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
+				   const u8 *key, unsigned int keylen)
+{
+	return twofish_setkey(&tfm->base, key, keylen);
+}
+
 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
 					const u8 *src)
 					const u8 *src)
 {
 {
@@ -151,284 +155,74 @@ static const struct common_glue_ctx twofish_dec_cbc = {
 	} }
 	} }
 };
 };
 
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
-				       dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
-				       nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		     struct scatterlist *src, unsigned int nbytes)
-{
-	return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
-}
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
 {
-	const unsigned int bsize = TF_BLOCK_SIZE;
-	struct twofish_ctx *ctx = priv;
-	int i;
-
-	if (nbytes == 3 * bsize) {
-		twofish_enc_blk_3way(ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		twofish_enc_blk(ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
-	const unsigned int bsize = TF_BLOCK_SIZE;
-	struct twofish_ctx *ctx = priv;
-	int i;
-
-	if (nbytes == 3 * bsize) {
-		twofish_dec_blk_3way(ctx, srcdst, srcdst);
-		return;
-	}
-
-	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
-		twofish_dec_blk(ctx, srcdst, srcdst);
-}
-
-int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-		       unsigned int keylen)
-{
-	struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE,
-			       &tfm->crt_flags);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE);
+	return glue_ecb_req_128bit(&twofish_enc, req);
 }
 }
-EXPORT_SYMBOL_GPL(lrw_twofish_setkey);
 
 
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
 {
-	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[3];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->twofish_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
+	return glue_ecb_req_128bit(&twofish_dec, req);
 }
 }
 
 
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
 {
-	struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[3];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = &ctx->twofish_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return lrw_crypt(desc, dst, src, nbytes, &req);
+	return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
+					   req);
 }
 }
 
 
-void lrw_twofish_exit_tfm(struct crypto_tfm *tfm)
+static int cbc_decrypt(struct skcipher_request *req)
 {
 {
-	struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
-}
-EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm);
-
-int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-		       unsigned int keylen)
-{
-	struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	int err;
-
-	err = xts_check_key(tfm, key, keylen);
-	if (err)
-		return err;
-
-	/* first half of xts-key is for crypt */
-	err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
-	if (err)
-		return err;
-
-	/* second half of xts-key is for tweak */
-	return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
-				flags);
-}
-EXPORT_SYMBOL_GPL(xts_twofish_setkey);
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[3];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = encrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
+	return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
 }
 }
 
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
 {
-	struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	le128 buf[3];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = &ctx->tweak_ctx,
-		.tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
-		.crypt_ctx = &ctx->crypt_ctx,
-		.crypt_fn = decrypt_callback,
-	};
-
-	return xts_crypt(desc, dst, src, nbytes, &req);
+	return glue_ctr_req_128bit(&twofish_ctr, req);
 }
 }
 
 
-static struct crypto_alg tf_algs[5] = { {
-	.cra_name		= "ecb(twofish)",
-	.cra_driver_name	= "ecb-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "cbc(twofish)",
-	.cra_driver_name	= "cbc-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ctr(twofish)",
-	.cra_driver_name	= "ctr-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct twofish_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= twofish_setkey,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
-		},
-	},
-}, {
-	.cra_name		= "lrw(twofish)",
-	.cra_driver_name	= "lrw-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_twofish_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
-			.max_keysize	= TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= lrw_twofish_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(twofish)",
-	.cra_driver_name	= "xts-twofish-3way",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		= TF_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct twofish_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= TF_MIN_KEY_SIZE * 2,
-			.max_keysize	= TF_MAX_KEY_SIZE * 2,
-			.ivsize		= TF_BLOCK_SIZE,
-			.setkey		= xts_twofish_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
+static struct skcipher_alg tf_skciphers[] = {
+	{
+		.base.cra_name		= "ecb(twofish)",
+		.base.cra_driver_name	= "ecb-twofish-3way",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= ecb_encrypt,
+		.decrypt		= ecb_decrypt,
+	}, {
+		.base.cra_name		= "cbc(twofish)",
+		.base.cra_driver_name	= "cbc-twofish-3way",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= TF_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= cbc_encrypt,
+		.decrypt		= cbc_decrypt,
+	}, {
+		.base.cra_name		= "ctr(twofish)",
+		.base.cra_driver_name	= "ctr-twofish-3way",
+		.base.cra_priority	= 300,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= TF_MIN_KEY_SIZE,
+		.max_keysize		= TF_MAX_KEY_SIZE,
+		.ivsize			= TF_BLOCK_SIZE,
+		.chunksize		= TF_BLOCK_SIZE,
+		.setkey			= twofish_setkey_skcipher,
+		.encrypt		= ctr_crypt,
+		.decrypt		= ctr_crypt,
 	},
 	},
-} };
+};
 
 
 static bool is_blacklisted_cpu(void)
 static bool is_blacklisted_cpu(void)
 {
 {
@@ -478,12 +272,13 @@ static int __init init(void)
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}
 
 
-	return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
+	return crypto_register_skciphers(tf_skciphers,
+					 ARRAY_SIZE(tf_skciphers));
 }
 }
 
 
 static void __exit fini(void)
 static void __exit fini(void)
 {
 {
-	crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
+	crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers));
 }
 }
 
 
 module_init(init);
 module_init(init);

+ 5 - 11
arch/x86/include/asm/crypto/camellia.h

@@ -2,8 +2,9 @@
 #ifndef ASM_X86_CAMELLIA_H
 #ifndef ASM_X86_CAMELLIA_H
 #define ASM_X86_CAMELLIA_H
 #define ASM_X86_CAMELLIA_H
 
 
-#include <linux/kernel.h>
+#include <crypto/b128ops.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
+#include <linux/kernel.h>
 
 
 #define CAMELLIA_MIN_KEY_SIZE	16
 #define CAMELLIA_MIN_KEY_SIZE	16
 #define CAMELLIA_MAX_KEY_SIZE	32
 #define CAMELLIA_MAX_KEY_SIZE	32
@@ -11,16 +12,13 @@
 #define CAMELLIA_TABLE_BYTE_LEN	272
 #define CAMELLIA_TABLE_BYTE_LEN	272
 #define CAMELLIA_PARALLEL_BLOCKS 2
 #define CAMELLIA_PARALLEL_BLOCKS 2
 
 
+struct crypto_skcipher;
+
 struct camellia_ctx {
 struct camellia_ctx {
 	u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
 	u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
 	u32 key_length;
 	u32 key_length;
 };
 };
 
 
-struct camellia_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct camellia_ctx camellia_ctx;
-};
-
 struct camellia_xts_ctx {
 struct camellia_xts_ctx {
 	struct camellia_ctx tweak_ctx;
 	struct camellia_ctx tweak_ctx;
 	struct camellia_ctx crypt_ctx;
 	struct camellia_ctx crypt_ctx;
@@ -30,11 +28,7 @@ extern int __camellia_setkey(struct camellia_ctx *cctx,
 			     const unsigned char *key,
 			     const unsigned char *key,
 			     unsigned int key_len, u32 *flags);
 			     unsigned int key_len, u32 *flags);
 
 
-extern int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
-			       unsigned int keylen);
-extern void lrw_camellia_exit_tfm(struct crypto_tfm *tfm);
-
-extern int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
+extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			       unsigned int keylen);
 			       unsigned int keylen);
 
 
 /* regular block cipher functions */
 /* regular block cipher functions */

+ 12 - 63
arch/x86/include/asm/crypto/glue_helper.h

@@ -45,7 +45,7 @@ struct common_glue_ctx {
 };
 };
 
 
 static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
 static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
-				  struct blkcipher_desc *desc,
+				  struct skcipher_walk *walk,
 				  bool fpu_enabled, unsigned int nbytes)
 				  bool fpu_enabled, unsigned int nbytes)
 {
 {
 	if (likely(fpu_blocks_limit < 0))
 	if (likely(fpu_blocks_limit < 0))
@@ -61,33 +61,6 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
 	if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
 	if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
 		return false;
 		return false;
 
 
-	if (desc) {
-		/* prevent sleeping if FPU is in use */
-		desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	}
-
-	kernel_fpu_begin();
-	return true;
-}
-
-static inline bool glue_skwalk_fpu_begin(unsigned int bsize,
-					 int fpu_blocks_limit,
-					 struct skcipher_walk *walk,
-					 bool fpu_enabled, unsigned int nbytes)
-{
-	if (likely(fpu_blocks_limit < 0))
-		return false;
-
-	if (fpu_enabled)
-		return true;
-
-	/*
-	 * Vector-registers are only used when chunk to be processed is large
-	 * enough, so do not enable FPU until it is necessary.
-	 */
-	if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
-		return false;
-
 	/* prevent sleeping if FPU is in use */
 	/* prevent sleeping if FPU is in use */
 	skcipher_walk_atomise(walk);
 	skcipher_walk_atomise(walk);
 
 
@@ -126,41 +99,17 @@ static inline void le128_inc(le128 *i)
 	i->b = cpu_to_le64(b);
 	i->b = cpu_to_le64(b);
 }
 }
 
 
-extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
-				 struct blkcipher_desc *desc,
-				 struct scatterlist *dst,
-				 struct scatterlist *src, unsigned int nbytes);
-
-extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
-				   struct blkcipher_desc *desc,
-				   struct scatterlist *dst,
-				   struct scatterlist *src,
-				   unsigned int nbytes);
-
-extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
-				   struct blkcipher_desc *desc,
-				   struct scatterlist *dst,
-				   struct scatterlist *src,
-				   unsigned int nbytes);
-
-extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
-				 struct blkcipher_desc *desc,
-				 struct scatterlist *dst,
-				 struct scatterlist *src, unsigned int nbytes);
-
-extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
-				 struct blkcipher_desc *desc,
-				 struct scatterlist *dst,
-				 struct scatterlist *src, unsigned int nbytes,
-				 common_glue_func_t tweak_fn, void *tweak_ctx,
-				 void *crypt_ctx);
-
-extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
-				 struct blkcipher_desc *desc,
-				 struct scatterlist *dst,
-				 struct scatterlist *src, unsigned int nbytes,
-				 common_glue_func_t tweak_fn, void *tweak_ctx,
-				 void *crypt_ctx);
+extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
+			       struct skcipher_request *req);
+
+extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn,
+				       struct skcipher_request *req);
+
+extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
+				       struct skcipher_request *req);
+
+extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
+			       struct skcipher_request *req);
 
 
 extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 			       struct skcipher_request *req,
 			       struct skcipher_request *req,

+ 5 - 12
arch/x86/include/asm/crypto/serpent-avx.h

@@ -2,15 +2,13 @@
 #ifndef ASM_X86_SERPENT_AVX_H
 #ifndef ASM_X86_SERPENT_AVX_H
 #define ASM_X86_SERPENT_AVX_H
 #define ASM_X86_SERPENT_AVX_H
 
 
-#include <linux/crypto.h>
+#include <crypto/b128ops.h>
 #include <crypto/serpent.h>
 #include <crypto/serpent.h>
+#include <linux/types.h>
 
 
-#define SERPENT_PARALLEL_BLOCKS 8
+struct crypto_skcipher;
 
 
-struct serpent_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct serpent_ctx serpent_ctx;
-};
+#define SERPENT_PARALLEL_BLOCKS 8
 
 
 struct serpent_xts_ctx {
 struct serpent_xts_ctx {
 	struct serpent_ctx tweak_ctx;
 	struct serpent_ctx tweak_ctx;
@@ -38,12 +36,7 @@ extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
 extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
 extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
 extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
 extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
 
 
-extern int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen);
-
-extern void lrw_serpent_exit_tfm(struct crypto_tfm *tfm);
-
-extern int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
+extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			      unsigned int keylen);
 			      unsigned int keylen);
 
 
 #endif
 #endif

+ 0 - 19
arch/x86/include/asm/crypto/twofish.h

@@ -4,19 +4,8 @@
 
 
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <crypto/twofish.h>
 #include <crypto/twofish.h>
-#include <crypto/lrw.h>
 #include <crypto/b128ops.h>
 #include <crypto/b128ops.h>
 
 
-struct twofish_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	struct twofish_ctx twofish_ctx;
-};
-
-struct twofish_xts_ctx {
-	struct twofish_ctx tweak_ctx;
-	struct twofish_ctx crypt_ctx;
-};
-
 /* regular block cipher functions from twofish_x86_64 module */
 /* regular block cipher functions from twofish_x86_64 module */
 asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
 asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
 				const u8 *src);
 				const u8 *src);
@@ -36,12 +25,4 @@ extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
 extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
 extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
 				     le128 *iv);
 				     le128 *iv);
 
 
-extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen);
-
-extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
-
-extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
-			      unsigned int keylen);
-
 #endif /* ASM_X86_TWOFISH_H */
 #endif /* ASM_X86_TWOFISH_H */

+ 69 - 60
crypto/Kconfig

@@ -245,10 +245,6 @@ config CRYPTO_TEST
 	help
 	help
 	  Quick & dirty crypto test module.
 	  Quick & dirty crypto test module.
 
 
-config CRYPTO_ABLK_HELPER
-	tristate
-	select CRYPTO_CRYPTD
-
 config CRYPTO_SIMD
 config CRYPTO_SIMD
 	tristate
 	tristate
 	select CRYPTO_CRYPTD
 	select CRYPTO_CRYPTD
@@ -324,6 +320,14 @@ config CRYPTO_CBC
 	  CBC: Cipher Block Chaining mode
 	  CBC: Cipher Block Chaining mode
 	  This block cipher algorithm is required for IPSec.
 	  This block cipher algorithm is required for IPSec.
 
 
+config CRYPTO_CFB
+	tristate "CFB support"
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_MANAGER
+	help
+	  CFB: Cipher FeedBack mode
+	  This block cipher algorithm is required for TPM2 Cryptography.
+
 config CRYPTO_CTR
 config CRYPTO_CTR
 	tristate "CTR support"
 	tristate "CTR support"
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_BLKCIPHER
@@ -1114,7 +1118,7 @@ config CRYPTO_BLOWFISH_COMMON
 config CRYPTO_BLOWFISH_X86_64
 config CRYPTO_BLOWFISH_X86_64
 	tristate "Blowfish cipher algorithm (x86_64)"
 	tristate "Blowfish cipher algorithm (x86_64)"
 	depends on X86 && 64BIT
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_BLOWFISH_COMMON
 	select CRYPTO_BLOWFISH_COMMON
 	help
 	help
 	  Blowfish cipher algorithm (x86_64), by Bruce Schneier.
 	  Blowfish cipher algorithm (x86_64), by Bruce Schneier.
@@ -1145,10 +1149,8 @@ config CRYPTO_CAMELLIA_X86_64
 	tristate "Camellia cipher algorithm (x86_64)"
 	tristate "Camellia cipher algorithm (x86_64)"
 	depends on X86 && 64BIT
 	depends on X86 && 64BIT
 	depends on CRYPTO
 	depends on CRYPTO
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	help
 	  Camellia cipher algorithm module (x86_64).
 	  Camellia cipher algorithm module (x86_64).
 
 
@@ -1164,12 +1166,10 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64
 	tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX)"
 	tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX)"
 	depends on X86 && 64BIT
 	depends on X86 && 64BIT
 	depends on CRYPTO
 	depends on CRYPTO
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_CAMELLIA_X86_64
 	select CRYPTO_CAMELLIA_X86_64
-	select CRYPTO_LRW
+	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_SIMD
 	select CRYPTO_XTS
 	select CRYPTO_XTS
 	help
 	help
 	  Camellia cipher algorithm module (x86_64/AES-NI/AVX).
 	  Camellia cipher algorithm module (x86_64/AES-NI/AVX).
@@ -1186,14 +1186,7 @@ config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64
 	tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX2)"
 	tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX2)"
 	depends on X86 && 64BIT
 	depends on X86 && 64BIT
 	depends on CRYPTO
 	depends on CRYPTO
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_CAMELLIA_X86_64
 	select CRYPTO_CAMELLIA_AESNI_AVX_X86_64
 	select CRYPTO_CAMELLIA_AESNI_AVX_X86_64
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	help
 	  Camellia cipher algorithm module (x86_64/AES-NI/AVX2).
 	  Camellia cipher algorithm module (x86_64/AES-NI/AVX2).
 
 
@@ -1238,11 +1231,10 @@ config CRYPTO_CAST5
 config CRYPTO_CAST5_AVX_X86_64
 config CRYPTO_CAST5_AVX_X86_64
 	tristate "CAST5 (CAST-128) cipher algorithm (x86_64/AVX)"
 	tristate "CAST5 (CAST-128) cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_CAST_COMMON
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_CAST5
 	select CRYPTO_CAST5
+	select CRYPTO_CAST_COMMON
+	select CRYPTO_SIMD
 	help
 	help
 	  The CAST5 encryption algorithm (synonymous with CAST-128) is
 	  The CAST5 encryption algorithm (synonymous with CAST-128) is
 	  described in RFC2144.
 	  described in RFC2144.
@@ -1261,13 +1253,11 @@ config CRYPTO_CAST6
 config CRYPTO_CAST6_AVX_X86_64
 config CRYPTO_CAST6_AVX_X86_64
 	tristate "CAST6 (CAST-256) cipher algorithm (x86_64/AVX)"
 	tristate "CAST6 (CAST-256) cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_CAST_COMMON
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_CAST6
 	select CRYPTO_CAST6
-	select CRYPTO_LRW
+	select CRYPTO_CAST_COMMON
+	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_SIMD
 	select CRYPTO_XTS
 	select CRYPTO_XTS
 	help
 	help
 	  The CAST6 encryption algorithm (synonymous with CAST-256) is
 	  The CAST6 encryption algorithm (synonymous with CAST-256) is
@@ -1294,7 +1284,7 @@ config CRYPTO_DES_SPARC64
 config CRYPTO_DES3_EDE_X86_64
 config CRYPTO_DES3_EDE_X86_64
 	tristate "Triple DES EDE cipher algorithm (x86-64)"
 	tristate "Triple DES EDE cipher algorithm (x86-64)"
 	depends on X86 && 64BIT
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_DES
 	select CRYPTO_DES
 	help
 	help
 	  Triple DES EDE (FIPS 46-3) algorithm.
 	  Triple DES EDE (FIPS 46-3) algorithm.
@@ -1422,13 +1412,10 @@ config CRYPTO_SERPENT
 config CRYPTO_SERPENT_SSE2_X86_64
 config CRYPTO_SERPENT_SSE2_X86_64
 	tristate "Serpent cipher algorithm (x86_64/SSE2)"
 	tristate "Serpent cipher algorithm (x86_64/SSE2)"
 	depends on X86 && 64BIT
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
 	select CRYPTO_SERPENT
-	select CRYPTO_LRW
-	select CRYPTO_XTS
+	select CRYPTO_SIMD
 	help
 	help
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 
 
@@ -1444,13 +1431,10 @@ config CRYPTO_SERPENT_SSE2_X86_64
 config CRYPTO_SERPENT_SSE2_586
 config CRYPTO_SERPENT_SSE2_586
 	tristate "Serpent cipher algorithm (i586/SSE2)"
 	tristate "Serpent cipher algorithm (i586/SSE2)"
 	depends on X86 && !64BIT
 	depends on X86 && !64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
 	select CRYPTO_SERPENT
-	select CRYPTO_LRW
-	select CRYPTO_XTS
+	select CRYPTO_SIMD
 	help
 	help
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 
 
@@ -1466,12 +1450,10 @@ config CRYPTO_SERPENT_SSE2_586
 config CRYPTO_SERPENT_AVX_X86_64
 config CRYPTO_SERPENT_AVX_X86_64
 	tristate "Serpent cipher algorithm (x86_64/AVX)"
 	tristate "Serpent cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_SERPENT
 	select CRYPTO_SERPENT
-	select CRYPTO_LRW
+	select CRYPTO_SIMD
 	select CRYPTO_XTS
 	select CRYPTO_XTS
 	help
 	help
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
@@ -1488,14 +1470,7 @@ config CRYPTO_SERPENT_AVX_X86_64
 config CRYPTO_SERPENT_AVX2_X86_64
 config CRYPTO_SERPENT_AVX2_X86_64
 	tristate "Serpent cipher algorithm (x86_64/AVX2)"
 	tristate "Serpent cipher algorithm (x86_64/AVX2)"
 	depends on X86 && 64BIT
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
-	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_SERPENT
 	select CRYPTO_SERPENT_AVX_X86_64
 	select CRYPTO_SERPENT_AVX_X86_64
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	help
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 	  Serpent cipher algorithm, by Anderson, Biham & Knudsen.
 
 
@@ -1508,6 +1483,45 @@ config CRYPTO_SERPENT_AVX2_X86_64
 	  See also:
 	  See also:
 	  <http://www.cl.cam.ac.uk/~rja14/serpent.html>
 	  <http://www.cl.cam.ac.uk/~rja14/serpent.html>
 
 
+config CRYPTO_SM4
+	tristate "SM4 cipher algorithm"
+	select CRYPTO_ALGAPI
+	help
+	  SM4 cipher algorithms (OSCCA GB/T 32907-2016).
+
+	  SM4 (GBT.32907-2016) is a cryptographic standard issued by the
+	  Organization of State Commercial Administration of China (OSCCA)
+	  as an authorized cryptographic algorithms for the use within China.
+
+	  SMS4 was originally created for use in protecting wireless
+	  networks, and is mandated in the Chinese National Standard for
+	  Wireless LAN WAPI (Wired Authentication and Privacy Infrastructure)
+	  (GB.15629.11-2003).
+
+	  The latest SM4 standard (GBT.32907-2016) was proposed by OSCCA and
+	  standardized through TC 260 of the Standardization Administration
+	  of the People's Republic of China (SAC).
+
+	  The input, output, and key of SMS4 are each 128 bits.
+
+	  See also: <https://eprint.iacr.org/2008/329.pdf>
+
+	  If unsure, say N.
+
+config CRYPTO_SPECK
+	tristate "Speck cipher algorithm"
+	select CRYPTO_ALGAPI
+	help
+	  Speck is a lightweight block cipher that is tuned for optimal
+	  performance in software (rather than hardware).
+
+	  Speck may not be as secure as AES, and should only be used on systems
+	  where AES is not fast enough.
+
+	  See also: <https://eprint.iacr.org/2013/404.pdf>
+
+	  If unsure, say N.
+
 config CRYPTO_TEA
 config CRYPTO_TEA
 	tristate "TEA, XTEA and XETA cipher algorithms"
 	tristate "TEA, XTEA and XETA cipher algorithms"
 	select CRYPTO_ALGAPI
 	select CRYPTO_ALGAPI
@@ -1581,12 +1595,10 @@ config CRYPTO_TWOFISH_X86_64
 config CRYPTO_TWOFISH_X86_64_3WAY
 config CRYPTO_TWOFISH_X86_64_3WAY
 	tristate "Twofish cipher algorithm (x86_64, 3-way parallel)"
 	tristate "Twofish cipher algorithm (x86_64, 3-way parallel)"
 	depends on X86 && 64BIT
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_TWOFISH_COMMON
 	select CRYPTO_TWOFISH_COMMON
 	select CRYPTO_TWOFISH_X86_64
 	select CRYPTO_TWOFISH_X86_64
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_GLUE_HELPER_X86
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	help
 	  Twofish cipher algorithm (x86_64, 3-way parallel).
 	  Twofish cipher algorithm (x86_64, 3-way parallel).
 
 
@@ -1604,15 +1616,12 @@ config CRYPTO_TWOFISH_X86_64_3WAY
 config CRYPTO_TWOFISH_AVX_X86_64
 config CRYPTO_TWOFISH_AVX_X86_64
 	tristate "Twofish cipher algorithm (x86_64/AVX)"
 	tristate "Twofish cipher algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
 	depends on X86 && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86
 	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_SIMD
 	select CRYPTO_TWOFISH_COMMON
 	select CRYPTO_TWOFISH_COMMON
 	select CRYPTO_TWOFISH_X86_64
 	select CRYPTO_TWOFISH_X86_64
 	select CRYPTO_TWOFISH_X86_64_3WAY
 	select CRYPTO_TWOFISH_X86_64_3WAY
-	select CRYPTO_LRW
-	select CRYPTO_XTS
 	help
 	help
 	  Twofish cipher algorithm (x86_64/AVX).
 	  Twofish cipher algorithm (x86_64/AVX).
 
 

+ 3 - 1
crypto/Makefile

@@ -78,6 +78,7 @@ obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
 obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
 obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
 obj-$(CONFIG_CRYPTO_ECB) += ecb.o
 obj-$(CONFIG_CRYPTO_ECB) += ecb.o
 obj-$(CONFIG_CRYPTO_CBC) += cbc.o
 obj-$(CONFIG_CRYPTO_CBC) += cbc.o
+obj-$(CONFIG_CRYPTO_CFB) += cfb.o
 obj-$(CONFIG_CRYPTO_PCBC) += pcbc.o
 obj-$(CONFIG_CRYPTO_PCBC) += pcbc.o
 obj-$(CONFIG_CRYPTO_CTS) += cts.o
 obj-$(CONFIG_CRYPTO_CTS) += cts.o
 obj-$(CONFIG_CRYPTO_LRW) += lrw.o
 obj-$(CONFIG_CRYPTO_LRW) += lrw.o
@@ -100,6 +101,7 @@ obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
 CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure)  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
 CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure)  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
 CFLAGS_aes_generic.o := $(call cc-option,-fno-code-hoisting) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83356
 CFLAGS_aes_generic.o := $(call cc-option,-fno-code-hoisting) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83356
+obj-$(CONFIG_CRYPTO_SM4) += sm4_generic.o
 obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
 obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
 obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
 obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
 obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
 obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
@@ -110,6 +112,7 @@ obj-$(CONFIG_CRYPTO_TEA) += tea.o
 obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
 obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
 obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
 obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
 obj-$(CONFIG_CRYPTO_SEED) += seed.o
 obj-$(CONFIG_CRYPTO_SEED) += seed.o
+obj-$(CONFIG_CRYPTO_SPECK) += speck.o
 obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
 obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
 obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
 obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
 obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
 obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
@@ -149,6 +152,5 @@ obj-$(CONFIG_XOR_BLOCKS) += xor.o
 obj-$(CONFIG_ASYNC_CORE) += async_tx/
 obj-$(CONFIG_ASYNC_CORE) += async_tx/
 obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/
 obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/
 obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o
 obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o
-obj-$(CONFIG_CRYPTO_ABLK_HELPER) += ablk_helper.o
 crypto_simd-y := simd.o
 crypto_simd-y := simd.o
 obj-$(CONFIG_CRYPTO_SIMD) += crypto_simd.o
 obj-$(CONFIG_CRYPTO_SIMD) += crypto_simd.o

+ 0 - 150
crypto/ablk_helper.c

@@ -1,150 +0,0 @@
-/*
- * Shared async block cipher helpers
- *
- * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * Based on aesni-intel_glue.c by:
- *  Copyright (C) 2008, Intel Corp.
- *    Author: Huang Ying <ying.huang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/crypto.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <crypto/algapi.h>
-#include <crypto/cryptd.h>
-#include <crypto/ablk_helper.h>
-#include <asm/simd.h>
-
-int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
-		 unsigned int key_len)
-{
-	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
-	int err;
-
-	crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
-				    & CRYPTO_TFM_REQ_MASK);
-	err = crypto_ablkcipher_setkey(child, key, key_len);
-	crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
-				    & CRYPTO_TFM_RES_MASK);
-	return err;
-}
-EXPORT_SYMBOL_GPL(ablk_set_key);
-
-int __ablk_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	struct blkcipher_desc desc;
-
-	desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
-	desc.info = req->info;
-	desc.flags = 0;
-
-	return crypto_blkcipher_crt(desc.tfm)->encrypt(
-		&desc, req->dst, req->src, req->nbytes);
-}
-EXPORT_SYMBOL_GPL(__ablk_encrypt);
-
-int ablk_encrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
-	if (!may_use_simd() ||
-	    (in_atomic() && cryptd_ablkcipher_queued(ctx->cryptd_tfm))) {
-		struct ablkcipher_request *cryptd_req =
-			ablkcipher_request_ctx(req);
-
-		*cryptd_req = *req;
-		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-
-		return crypto_ablkcipher_encrypt(cryptd_req);
-	} else {
-		return __ablk_encrypt(req);
-	}
-}
-EXPORT_SYMBOL_GPL(ablk_encrypt);
-
-int ablk_decrypt(struct ablkcipher_request *req)
-{
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
-	if (!may_use_simd() ||
-	    (in_atomic() && cryptd_ablkcipher_queued(ctx->cryptd_tfm))) {
-		struct ablkcipher_request *cryptd_req =
-			ablkcipher_request_ctx(req);
-
-		*cryptd_req = *req;
-		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-
-		return crypto_ablkcipher_decrypt(cryptd_req);
-	} else {
-		struct blkcipher_desc desc;
-
-		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
-		desc.info = req->info;
-		desc.flags = 0;
-
-		return crypto_blkcipher_crt(desc.tfm)->decrypt(
-			&desc, req->dst, req->src, req->nbytes);
-	}
-}
-EXPORT_SYMBOL_GPL(ablk_decrypt);
-
-void ablk_exit(struct crypto_tfm *tfm)
-{
-	struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	cryptd_free_ablkcipher(ctx->cryptd_tfm);
-}
-EXPORT_SYMBOL_GPL(ablk_exit);
-
-int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
-{
-	struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct cryptd_ablkcipher *cryptd_tfm;
-
-	cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, CRYPTO_ALG_INTERNAL,
-					     CRYPTO_ALG_INTERNAL);
-	if (IS_ERR(cryptd_tfm))
-		return PTR_ERR(cryptd_tfm);
-
-	ctx->cryptd_tfm = cryptd_tfm;
-	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
-		crypto_ablkcipher_reqsize(&cryptd_tfm->base);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ablk_init_common);
-
-int ablk_init(struct crypto_tfm *tfm)
-{
-	char drv_name[CRYPTO_MAX_ALG_NAME];
-
-	snprintf(drv_name, sizeof(drv_name), "__driver-%s",
-					crypto_tfm_alg_driver_name(tfm));
-
-	return ablk_init_common(tfm, drv_name);
-}
-EXPORT_SYMBOL_GPL(ablk_init);
-
-MODULE_LICENSE("GPL");

+ 6 - 19
crypto/ahash.c

@@ -92,13 +92,14 @@ int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err)
 
 
 	if (nbytes && walk->offset & alignmask && !err) {
 	if (nbytes && walk->offset & alignmask && !err) {
 		walk->offset = ALIGN(walk->offset, alignmask + 1);
 		walk->offset = ALIGN(walk->offset, alignmask + 1);
-		walk->data += walk->offset;
-
 		nbytes = min(nbytes,
 		nbytes = min(nbytes,
 			     ((unsigned int)(PAGE_SIZE)) - walk->offset);
 			     ((unsigned int)(PAGE_SIZE)) - walk->offset);
 		walk->entrylen -= nbytes;
 		walk->entrylen -= nbytes;
 
 
-		return nbytes;
+		if (nbytes) {
+			walk->data += walk->offset;
+			return nbytes;
+		}
 	}
 	}
 
 
 	if (walk->flags & CRYPTO_ALG_ASYNC)
 	if (walk->flags & CRYPTO_ALG_ASYNC)
@@ -446,24 +447,12 @@ static int ahash_def_finup(struct ahash_request *req)
 	return ahash_def_finup_finish1(req, err);
 	return ahash_def_finup_finish1(req, err);
 }
 }
 
 
-static int ahash_no_export(struct ahash_request *req, void *out)
-{
-	return -ENOSYS;
-}
-
-static int ahash_no_import(struct ahash_request *req, const void *in)
-{
-	return -ENOSYS;
-}
-
 static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
 static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
 {
 {
 	struct crypto_ahash *hash = __crypto_ahash_cast(tfm);
 	struct crypto_ahash *hash = __crypto_ahash_cast(tfm);
 	struct ahash_alg *alg = crypto_ahash_alg(hash);
 	struct ahash_alg *alg = crypto_ahash_alg(hash);
 
 
 	hash->setkey = ahash_nosetkey;
 	hash->setkey = ahash_nosetkey;
-	hash->export = ahash_no_export;
-	hash->import = ahash_no_import;
 
 
 	if (tfm->__crt_alg->cra_type != &crypto_ahash_type)
 	if (tfm->__crt_alg->cra_type != &crypto_ahash_type)
 		return crypto_init_shash_ops_async(tfm);
 		return crypto_init_shash_ops_async(tfm);
@@ -473,16 +462,14 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
 	hash->final = alg->final;
 	hash->final = alg->final;
 	hash->finup = alg->finup ?: ahash_def_finup;
 	hash->finup = alg->finup ?: ahash_def_finup;
 	hash->digest = alg->digest;
 	hash->digest = alg->digest;
+	hash->export = alg->export;
+	hash->import = alg->import;
 
 
 	if (alg->setkey) {
 	if (alg->setkey) {
 		hash->setkey = alg->setkey;
 		hash->setkey = alg->setkey;
 		if (!(alg->halg.base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY))
 		if (!(alg->halg.base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY))
 			crypto_ahash_set_flags(hash, CRYPTO_TFM_NEED_KEY);
 			crypto_ahash_set_flags(hash, CRYPTO_TFM_NEED_KEY);
 	}
 	}
-	if (alg->export)
-		hash->export = alg->export;
-	if (alg->import)
-		hash->import = alg->import;
 
 
 	return 0;
 	return 0;
 }
 }

+ 0 - 8
crypto/algapi.c

@@ -543,9 +543,6 @@ int crypto_register_instance(struct crypto_template *tmpl,
 	inst->alg.cra_module = tmpl->module;
 	inst->alg.cra_module = tmpl->module;
 	inst->alg.cra_flags |= CRYPTO_ALG_INSTANCE;
 	inst->alg.cra_flags |= CRYPTO_ALG_INSTANCE;
 
 
-	if (unlikely(!crypto_mod_get(&inst->alg)))
-		return -EAGAIN;
-
 	down_write(&crypto_alg_sem);
 	down_write(&crypto_alg_sem);
 
 
 	larval = __crypto_register_alg(&inst->alg);
 	larval = __crypto_register_alg(&inst->alg);
@@ -563,14 +560,9 @@ unlock:
 		goto err;
 		goto err;
 
 
 	crypto_wait_for_test(larval);
 	crypto_wait_for_test(larval);
-
-	/* Remove instance if test failed */
-	if (!(inst->alg.cra_flags & CRYPTO_ALG_TESTED))
-		crypto_unregister_instance(inst);
 	err = 0;
 	err = 0;
 
 
 err:
 err:
-	crypto_mod_put(&inst->alg);
 	return err;
 	return err;
 }
 }
 EXPORT_SYMBOL_GPL(crypto_register_instance);
 EXPORT_SYMBOL_GPL(crypto_register_instance);

+ 16 - 18
crypto/api.c

@@ -193,17 +193,24 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
 	return alg;
 	return alg;
 }
 }
 
 
-struct crypto_alg *crypto_alg_lookup(const char *name, u32 type, u32 mask)
+static struct crypto_alg *crypto_alg_lookup(const char *name, u32 type,
+					    u32 mask)
 {
 {
 	struct crypto_alg *alg;
 	struct crypto_alg *alg;
+	u32 test = 0;
+
+	if (!((type | mask) & CRYPTO_ALG_TESTED))
+		test |= CRYPTO_ALG_TESTED;
 
 
 	down_read(&crypto_alg_sem);
 	down_read(&crypto_alg_sem);
-	alg = __crypto_alg_lookup(name, type, mask);
+	alg = __crypto_alg_lookup(name, type | test, mask | test);
+	if (!alg && test)
+		alg = __crypto_alg_lookup(name, type, mask) ?
+		      ERR_PTR(-ELIBBAD) : NULL;
 	up_read(&crypto_alg_sem);
 	up_read(&crypto_alg_sem);
 
 
 	return alg;
 	return alg;
 }
 }
-EXPORT_SYMBOL_GPL(crypto_alg_lookup);
 
 
 static struct crypto_alg *crypto_larval_lookup(const char *name, u32 type,
 static struct crypto_alg *crypto_larval_lookup(const char *name, u32 type,
 					       u32 mask)
 					       u32 mask)
@@ -227,10 +234,12 @@ static struct crypto_alg *crypto_larval_lookup(const char *name, u32 type,
 		alg = crypto_alg_lookup(name, type, mask);
 		alg = crypto_alg_lookup(name, type, mask);
 	}
 	}
 
 
-	if (alg)
-		return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg;
+	if (!IS_ERR_OR_NULL(alg) && crypto_is_larval(alg))
+		alg = crypto_larval_wait(alg);
+	else if (!alg)
+		alg = crypto_larval_add(name, type, mask);
 
 
-	return crypto_larval_add(name, type, mask);
+	return alg;
 }
 }
 
 
 int crypto_probing_notify(unsigned long val, void *v)
 int crypto_probing_notify(unsigned long val, void *v)
@@ -253,11 +262,6 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
 	struct crypto_alg *larval;
 	struct crypto_alg *larval;
 	int ok;
 	int ok;
 
 
-	if (!((type | mask) & CRYPTO_ALG_TESTED)) {
-		type |= CRYPTO_ALG_TESTED;
-		mask |= CRYPTO_ALG_TESTED;
-	}
-
 	/*
 	/*
 	 * If the internal flag is set for a cipher, require a caller to
 	 * If the internal flag is set for a cipher, require a caller to
 	 * to invoke the cipher with the internal flag to use that cipher.
 	 * to invoke the cipher with the internal flag to use that cipher.
@@ -485,20 +489,14 @@ struct crypto_alg *crypto_find_alg(const char *alg_name,
 				   const struct crypto_type *frontend,
 				   const struct crypto_type *frontend,
 				   u32 type, u32 mask)
 				   u32 type, u32 mask)
 {
 {
-	struct crypto_alg *(*lookup)(const char *name, u32 type, u32 mask) =
-		crypto_alg_mod_lookup;
-
 	if (frontend) {
 	if (frontend) {
 		type &= frontend->maskclear;
 		type &= frontend->maskclear;
 		mask &= frontend->maskclear;
 		mask &= frontend->maskclear;
 		type |= frontend->type;
 		type |= frontend->type;
 		mask |= frontend->maskset;
 		mask |= frontend->maskset;
-
-		if (frontend->lookup)
-			lookup = frontend->lookup;
 	}
 	}
 
 
-	return lookup(alg_name, type, mask);
+	return crypto_alg_mod_lookup(alg_name, type, mask);
 }
 }
 EXPORT_SYMBOL_GPL(crypto_find_alg);
 EXPORT_SYMBOL_GPL(crypto_find_alg);
 
 

+ 353 - 0
crypto/cfb.c

@@ -0,0 +1,353 @@
+//SPDX-License-Identifier: GPL-2.0
+/*
+ * CFB: Cipher FeedBack mode
+ *
+ * Copyright (c) 2018 James.Bottomley@HansenPartnership.com
+ *
+ * CFB is a stream cipher mode which is layered on to a block
+ * encryption scheme.  It works very much like a one time pad where
+ * the pad is generated initially from the encrypted IV and then
+ * subsequently from the encrypted previous block of ciphertext.  The
+ * pad is XOR'd into the plain text to get the final ciphertext.
+ *
+ * The scheme of CFB is best described by wikipedia:
+ *
+ * https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#CFB
+ *
+ * Note that since the pad for both encryption and decryption is
+ * generated by an encryption operation, CFB never uses the block
+ * decryption function.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+struct crypto_cfb_ctx {
+	struct crypto_cipher *child;
+};
+
+static unsigned int crypto_cfb_bsize(struct crypto_skcipher *tfm)
+{
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_cipher *child = ctx->child;
+
+	return crypto_cipher_blocksize(child);
+}
+
+static void crypto_cfb_encrypt_one(struct crypto_skcipher *tfm,
+					  const u8 *src, u8 *dst)
+{
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_cipher_encrypt_one(ctx->child, dst, src);
+}
+
+/* final encrypt and decrypt is the same */
+static void crypto_cfb_final(struct skcipher_walk *walk,
+			     struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	const unsigned long alignmask = crypto_skcipher_alignmask(tfm);
+	u8 tmp[bsize + alignmask];
+	u8 *stream = PTR_ALIGN(tmp + 0, alignmask + 1);
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+	unsigned int nbytes = walk->nbytes;
+
+	crypto_cfb_encrypt_one(tfm, iv, stream);
+	crypto_xor_cpy(dst, stream, src, nbytes);
+}
+
+static int crypto_cfb_encrypt_segment(struct skcipher_walk *walk,
+				      struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		crypto_cfb_encrypt_one(tfm, iv, dst);
+		crypto_xor(dst, src, bsize);
+		memcpy(iv, dst, bsize);
+
+		src += bsize;
+		dst += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	return nbytes;
+}
+
+static int crypto_cfb_encrypt_inplace(struct skcipher_walk *walk,
+				      struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *iv = walk->iv;
+	u8 tmp[bsize];
+
+	do {
+		crypto_cfb_encrypt_one(tfm, iv, tmp);
+		crypto_xor(src, tmp, bsize);
+		iv = src;
+
+		src += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cfb_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct skcipher_walk walk;
+	unsigned int bsize = crypto_cfb_bsize(tfm);
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while (walk.nbytes >= bsize) {
+		if (walk.src.virt.addr == walk.dst.virt.addr)
+			err = crypto_cfb_encrypt_inplace(&walk, tfm);
+		else
+			err = crypto_cfb_encrypt_segment(&walk, tfm);
+		err = skcipher_walk_done(&walk, err);
+	}
+
+	if (walk.nbytes) {
+		crypto_cfb_final(&walk, tfm);
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	return err;
+}
+
+static int crypto_cfb_decrypt_segment(struct skcipher_walk *walk,
+				      struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		crypto_cfb_encrypt_one(tfm, iv, dst);
+		crypto_xor(dst, iv, bsize);
+		iv = src;
+
+		src += bsize;
+		dst += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cfb_decrypt_inplace(struct skcipher_walk *walk,
+				      struct crypto_skcipher *tfm)
+{
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *iv = walk->iv;
+	u8 tmp[bsize];
+
+	do {
+		crypto_cfb_encrypt_one(tfm, iv, tmp);
+		memcpy(iv, src, bsize);
+		crypto_xor(src, tmp, bsize);
+		src += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static int crypto_cfb_decrypt_blocks(struct skcipher_walk *walk,
+				     struct crypto_skcipher *tfm)
+{
+	if (walk->src.virt.addr == walk->dst.virt.addr)
+		return crypto_cfb_decrypt_inplace(walk, tfm);
+	else
+		return crypto_cfb_decrypt_segment(walk, tfm);
+}
+
+static int crypto_cfb_setkey(struct crypto_skcipher *parent, const u8 *key,
+			     unsigned int keylen)
+{
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(parent);
+	struct crypto_cipher *child = ctx->child;
+	int err;
+
+	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+				       CRYPTO_TFM_REQ_MASK);
+	err = crypto_cipher_setkey(child, key, keylen);
+	crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) &
+					  CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static int crypto_cfb_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct skcipher_walk walk;
+	const unsigned int bsize = crypto_cfb_bsize(tfm);
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while (walk.nbytes >= bsize) {
+		err = crypto_cfb_decrypt_blocks(&walk, tfm);
+		err = skcipher_walk_done(&walk, err);
+	}
+
+	if (walk.nbytes) {
+		crypto_cfb_final(&walk, tfm);
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	return err;
+}
+
+static int crypto_cfb_init_tfm(struct crypto_skcipher *tfm)
+{
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_cipher *cipher;
+
+	cipher = crypto_spawn_cipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	ctx->child = cipher;
+	return 0;
+}
+
+static void crypto_cfb_exit_tfm(struct crypto_skcipher *tfm)
+{
+	struct crypto_cfb_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_cipher(ctx->child);
+}
+
+static void crypto_cfb_free(struct skcipher_instance *inst)
+{
+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
+	kfree(inst);
+}
+
+static int crypto_cfb_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct skcipher_instance *inst;
+	struct crypto_attr_type *algt;
+	struct crypto_spawn *spawn;
+	struct crypto_alg *alg;
+	u32 mask;
+	int err;
+
+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER);
+	if (err)
+		return err;
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	algt = crypto_get_attr_type(tb);
+	err = PTR_ERR(algt);
+	if (IS_ERR(algt))
+		goto err_free_inst;
+
+	mask = CRYPTO_ALG_TYPE_MASK |
+		crypto_requires_off(algt->type, algt->mask,
+				    CRYPTO_ALG_NEED_FALLBACK);
+
+	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, mask);
+	err = PTR_ERR(alg);
+	if (IS_ERR(alg))
+		goto err_free_inst;
+
+	spawn = skcipher_instance_ctx(inst);
+	err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
+				CRYPTO_ALG_TYPE_MASK);
+	crypto_mod_put(alg);
+	if (err)
+		goto err_free_inst;
+
+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "cfb", alg);
+	if (err)
+		goto err_drop_spawn;
+
+	inst->alg.base.cra_priority = alg->cra_priority;
+	/* we're a stream cipher independend of the crypto cra_blocksize */
+	inst->alg.base.cra_blocksize = 1;
+	inst->alg.base.cra_alignmask = alg->cra_alignmask;
+
+	inst->alg.ivsize = alg->cra_blocksize;
+	inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
+	inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
+
+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_cfb_ctx);
+
+	inst->alg.init = crypto_cfb_init_tfm;
+	inst->alg.exit = crypto_cfb_exit_tfm;
+
+	inst->alg.setkey = crypto_cfb_setkey;
+	inst->alg.encrypt = crypto_cfb_encrypt;
+	inst->alg.decrypt = crypto_cfb_decrypt;
+
+	inst->free = crypto_cfb_free;
+
+	err = skcipher_register_instance(tmpl, inst);
+	if (err)
+		goto err_drop_spawn;
+
+out:
+	return err;
+
+err_drop_spawn:
+	crypto_drop_spawn(spawn);
+err_free_inst:
+	kfree(inst);
+	goto out;
+}
+
+static struct crypto_template crypto_cfb_tmpl = {
+	.name = "cfb",
+	.create = crypto_cfb_create,
+	.module = THIS_MODULE,
+};
+
+static int __init crypto_cfb_module_init(void)
+{
+	return crypto_register_template(&crypto_cfb_tmpl);
+}
+
+static void __exit crypto_cfb_module_exit(void)
+{
+	crypto_unregister_template(&crypto_cfb_tmpl);
+}
+
+module_init(crypto_cfb_module_init);
+module_exit(crypto_cfb_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CFB block cipher algorithm");
+MODULE_ALIAS_CRYPTO("cfb");

+ 164 - 137
crypto/crypto_engine.c

@@ -15,12 +15,49 @@
 #include <linux/err.h>
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
 #include <crypto/engine.h>
 #include <crypto/engine.h>
-#include <crypto/internal/hash.h>
 #include <uapi/linux/sched/types.h>
 #include <uapi/linux/sched/types.h>
 #include "internal.h"
 #include "internal.h"
 
 
 #define CRYPTO_ENGINE_MAX_QLEN 10
 #define CRYPTO_ENGINE_MAX_QLEN 10
 
 
+/**
+ * crypto_finalize_request - finalize one request if the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+static void crypto_finalize_request(struct crypto_engine *engine,
+			     struct crypto_async_request *req, int err)
+{
+	unsigned long flags;
+	bool finalize_cur_req = false;
+	int ret;
+	struct crypto_engine_ctx *enginectx;
+
+	spin_lock_irqsave(&engine->queue_lock, flags);
+	if (engine->cur_req == req)
+		finalize_cur_req = true;
+	spin_unlock_irqrestore(&engine->queue_lock, flags);
+
+	if (finalize_cur_req) {
+		enginectx = crypto_tfm_ctx(req->tfm);
+		if (engine->cur_req_prepared &&
+		    enginectx->op.unprepare_request) {
+			ret = enginectx->op.unprepare_request(engine, req);
+			if (ret)
+				dev_err(engine->dev, "failed to unprepare request\n");
+		}
+		spin_lock_irqsave(&engine->queue_lock, flags);
+		engine->cur_req = NULL;
+		engine->cur_req_prepared = false;
+		spin_unlock_irqrestore(&engine->queue_lock, flags);
+	}
+
+	req->complete(req, err);
+
+	kthread_queue_work(engine->kworker, &engine->pump_requests);
+}
+
 /**
 /**
  * crypto_pump_requests - dequeue one request from engine queue to process
  * crypto_pump_requests - dequeue one request from engine queue to process
  * @engine: the hardware engine
  * @engine: the hardware engine
@@ -34,11 +71,10 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 				 bool in_kthread)
 				 bool in_kthread)
 {
 {
 	struct crypto_async_request *async_req, *backlog;
 	struct crypto_async_request *async_req, *backlog;
-	struct ahash_request *hreq;
-	struct ablkcipher_request *breq;
 	unsigned long flags;
 	unsigned long flags;
 	bool was_busy = false;
 	bool was_busy = false;
-	int ret, rtype;
+	int ret;
+	struct crypto_engine_ctx *enginectx;
 
 
 	spin_lock_irqsave(&engine->queue_lock, flags);
 	spin_lock_irqsave(&engine->queue_lock, flags);
 
 
@@ -94,7 +130,6 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 
 
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 
 
-	rtype = crypto_tfm_alg_type(engine->cur_req->tfm);
 	/* Until here we get the request need to be encrypted successfully */
 	/* Until here we get the request need to be encrypted successfully */
 	if (!was_busy && engine->prepare_crypt_hardware) {
 	if (!was_busy && engine->prepare_crypt_hardware) {
 		ret = engine->prepare_crypt_hardware(engine);
 		ret = engine->prepare_crypt_hardware(engine);
@@ -104,57 +139,31 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 		}
 		}
 	}
 	}
 
 
-	switch (rtype) {
-	case CRYPTO_ALG_TYPE_AHASH:
-		hreq = ahash_request_cast(engine->cur_req);
-		if (engine->prepare_hash_request) {
-			ret = engine->prepare_hash_request(engine, hreq);
-			if (ret) {
-				dev_err(engine->dev, "failed to prepare request: %d\n",
-					ret);
-				goto req_err;
-			}
-			engine->cur_req_prepared = true;
-		}
-		ret = engine->hash_one_request(engine, hreq);
-		if (ret) {
-			dev_err(engine->dev, "failed to hash one request from queue\n");
-			goto req_err;
-		}
-		return;
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		breq = ablkcipher_request_cast(engine->cur_req);
-		if (engine->prepare_cipher_request) {
-			ret = engine->prepare_cipher_request(engine, breq);
-			if (ret) {
-				dev_err(engine->dev, "failed to prepare request: %d\n",
-					ret);
-				goto req_err;
-			}
-			engine->cur_req_prepared = true;
-		}
-		ret = engine->cipher_one_request(engine, breq);
+	enginectx = crypto_tfm_ctx(async_req->tfm);
+
+	if (enginectx->op.prepare_request) {
+		ret = enginectx->op.prepare_request(engine, async_req);
 		if (ret) {
 		if (ret) {
-			dev_err(engine->dev, "failed to cipher one request from queue\n");
+			dev_err(engine->dev, "failed to prepare request: %d\n",
+				ret);
 			goto req_err;
 			goto req_err;
 		}
 		}
-		return;
-	default:
-		dev_err(engine->dev, "failed to prepare request of unknown type\n");
-		return;
+		engine->cur_req_prepared = true;
+	}
+	if (!enginectx->op.do_one_request) {
+		dev_err(engine->dev, "failed to do request\n");
+		ret = -EINVAL;
+		goto req_err;
 	}
 	}
+	ret = enginectx->op.do_one_request(engine, async_req);
+	if (ret) {
+		dev_err(engine->dev, "Failed to do one request from queue: %d\n", ret);
+		goto req_err;
+	}
+	return;
 
 
 req_err:
 req_err:
-	switch (rtype) {
-	case CRYPTO_ALG_TYPE_AHASH:
-		hreq = ahash_request_cast(engine->cur_req);
-		crypto_finalize_hash_request(engine, hreq, ret);
-		break;
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		breq = ablkcipher_request_cast(engine->cur_req);
-		crypto_finalize_cipher_request(engine, breq, ret);
-		break;
-	}
+	crypto_finalize_request(engine, async_req, ret);
 	return;
 	return;
 
 
 out:
 out:
@@ -170,13 +179,12 @@ static void crypto_pump_work(struct kthread_work *work)
 }
 }
 
 
 /**
 /**
- * crypto_transfer_cipher_request - transfer the new request into the
- * enginequeue
+ * crypto_transfer_request - transfer the new request into the engine queue
  * @engine: the hardware engine
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  * @req: the request need to be listed into the engine queue
  */
  */
-int crypto_transfer_cipher_request(struct crypto_engine *engine,
-				   struct ablkcipher_request *req,
+static int crypto_transfer_request(struct crypto_engine *engine,
+				   struct crypto_async_request *req,
 				   bool need_pump)
 				   bool need_pump)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
@@ -189,7 +197,7 @@ int crypto_transfer_cipher_request(struct crypto_engine *engine,
 		return -ESHUTDOWN;
 		return -ESHUTDOWN;
 	}
 	}
 
 
-	ret = ablkcipher_enqueue_request(&engine->queue, req);
+	ret = crypto_enqueue_request(&engine->queue, req);
 
 
 	if (!engine->busy && need_pump)
 	if (!engine->busy && need_pump)
 		kthread_queue_work(engine->kworker, &engine->pump_requests);
 		kthread_queue_work(engine->kworker, &engine->pump_requests);
@@ -197,102 +205,131 @@ int crypto_transfer_cipher_request(struct crypto_engine *engine,
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 	return ret;
 	return ret;
 }
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request);
 
 
 /**
 /**
- * crypto_transfer_cipher_request_to_engine - transfer one request to list
+ * crypto_transfer_request_to_engine - transfer one request to list
  * into the engine queue
  * into the engine queue
  * @engine: the hardware engine
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  * @req: the request need to be listed into the engine queue
  */
  */
-int crypto_transfer_cipher_request_to_engine(struct crypto_engine *engine,
-					     struct ablkcipher_request *req)
+static int crypto_transfer_request_to_engine(struct crypto_engine *engine,
+					     struct crypto_async_request *req)
 {
 {
-	return crypto_transfer_cipher_request(engine, req, true);
+	return crypto_transfer_request(engine, req, true);
 }
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request_to_engine);
 
 
 /**
 /**
- * crypto_transfer_hash_request - transfer the new request into the
- * enginequeue
+ * crypto_transfer_ablkcipher_request_to_engine - transfer one ablkcipher_request
+ * to list into the engine queue
  * @engine: the hardware engine
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  * @req: the request need to be listed into the engine queue
+ * TODO: Remove this function when skcipher conversion is finished
  */
  */
-int crypto_transfer_hash_request(struct crypto_engine *engine,
-				 struct ahash_request *req, bool need_pump)
+int crypto_transfer_ablkcipher_request_to_engine(struct crypto_engine *engine,
+						 struct ablkcipher_request *req)
 {
 {
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&engine->queue_lock, flags);
-
-	if (!engine->running) {
-		spin_unlock_irqrestore(&engine->queue_lock, flags);
-		return -ESHUTDOWN;
-	}
-
-	ret = ahash_enqueue_request(&engine->queue, req);
+	return crypto_transfer_request_to_engine(engine, &req->base);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_ablkcipher_request_to_engine);
 
 
-	if (!engine->busy && need_pump)
-		kthread_queue_work(engine->kworker, &engine->pump_requests);
+/**
+ * crypto_transfer_aead_request_to_engine - transfer one aead_request
+ * to list into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_aead_request_to_engine(struct crypto_engine *engine,
+					   struct aead_request *req)
+{
+	return crypto_transfer_request_to_engine(engine, &req->base);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_aead_request_to_engine);
 
 
-	spin_unlock_irqrestore(&engine->queue_lock, flags);
-	return ret;
+/**
+ * crypto_transfer_akcipher_request_to_engine - transfer one akcipher_request
+ * to list into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_akcipher_request_to_engine(struct crypto_engine *engine,
+					       struct akcipher_request *req)
+{
+	return crypto_transfer_request_to_engine(engine, &req->base);
 }
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_hash_request);
+EXPORT_SYMBOL_GPL(crypto_transfer_akcipher_request_to_engine);
 
 
 /**
 /**
- * crypto_transfer_hash_request_to_engine - transfer one request to list
- * into the engine queue
+ * crypto_transfer_hash_request_to_engine - transfer one ahash_request
+ * to list into the engine queue
  * @engine: the hardware engine
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  * @req: the request need to be listed into the engine queue
  */
  */
 int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine,
 int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine,
 					   struct ahash_request *req)
 					   struct ahash_request *req)
 {
 {
-	return crypto_transfer_hash_request(engine, req, true);
+	return crypto_transfer_request_to_engine(engine, &req->base);
 }
 }
 EXPORT_SYMBOL_GPL(crypto_transfer_hash_request_to_engine);
 EXPORT_SYMBOL_GPL(crypto_transfer_hash_request_to_engine);
 
 
 /**
 /**
- * crypto_finalize_cipher_request - finalize one request if the request is done
+ * crypto_transfer_skcipher_request_to_engine - transfer one skcipher_request
+ * to list into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_skcipher_request_to_engine(struct crypto_engine *engine,
+					       struct skcipher_request *req)
+{
+	return crypto_transfer_request_to_engine(engine, &req->base);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_skcipher_request_to_engine);
+
+/**
+ * crypto_finalize_ablkcipher_request - finalize one ablkcipher_request if
+ * the request is done
  * @engine: the hardware engine
  * @engine: the hardware engine
  * @req: the request need to be finalized
  * @req: the request need to be finalized
  * @err: error number
  * @err: error number
+ * TODO: Remove this function when skcipher conversion is finished
  */
  */
-void crypto_finalize_cipher_request(struct crypto_engine *engine,
-				    struct ablkcipher_request *req, int err)
+void crypto_finalize_ablkcipher_request(struct crypto_engine *engine,
+					struct ablkcipher_request *req, int err)
 {
 {
-	unsigned long flags;
-	bool finalize_cur_req = false;
-	int ret;
-
-	spin_lock_irqsave(&engine->queue_lock, flags);
-	if (engine->cur_req == &req->base)
-		finalize_cur_req = true;
-	spin_unlock_irqrestore(&engine->queue_lock, flags);
-
-	if (finalize_cur_req) {
-		if (engine->cur_req_prepared &&
-		    engine->unprepare_cipher_request) {
-			ret = engine->unprepare_cipher_request(engine, req);
-			if (ret)
-				dev_err(engine->dev, "failed to unprepare request\n");
-		}
-		spin_lock_irqsave(&engine->queue_lock, flags);
-		engine->cur_req = NULL;
-		engine->cur_req_prepared = false;
-		spin_unlock_irqrestore(&engine->queue_lock, flags);
-	}
+	return crypto_finalize_request(engine, &req->base, err);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_ablkcipher_request);
 
 
-	req->base.complete(&req->base, err);
+/**
+ * crypto_finalize_aead_request - finalize one aead_request if
+ * the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_aead_request(struct crypto_engine *engine,
+				  struct aead_request *req, int err)
+{
+	return crypto_finalize_request(engine, &req->base, err);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_aead_request);
 
 
-	kthread_queue_work(engine->kworker, &engine->pump_requests);
+/**
+ * crypto_finalize_akcipher_request - finalize one akcipher_request if
+ * the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_akcipher_request(struct crypto_engine *engine,
+				      struct akcipher_request *req, int err)
+{
+	return crypto_finalize_request(engine, &req->base, err);
 }
 }
-EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
+EXPORT_SYMBOL_GPL(crypto_finalize_akcipher_request);
 
 
 /**
 /**
- * crypto_finalize_hash_request - finalize one request if the request is done
+ * crypto_finalize_hash_request - finalize one ahash_request if
+ * the request is done
  * @engine: the hardware engine
  * @engine: the hardware engine
  * @req: the request need to be finalized
  * @req: the request need to be finalized
  * @err: error number
  * @err: error number
@@ -300,34 +337,24 @@ EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
 void crypto_finalize_hash_request(struct crypto_engine *engine,
 void crypto_finalize_hash_request(struct crypto_engine *engine,
 				  struct ahash_request *req, int err)
 				  struct ahash_request *req, int err)
 {
 {
-	unsigned long flags;
-	bool finalize_cur_req = false;
-	int ret;
-
-	spin_lock_irqsave(&engine->queue_lock, flags);
-	if (engine->cur_req == &req->base)
-		finalize_cur_req = true;
-	spin_unlock_irqrestore(&engine->queue_lock, flags);
-
-	if (finalize_cur_req) {
-		if (engine->cur_req_prepared &&
-		    engine->unprepare_hash_request) {
-			ret = engine->unprepare_hash_request(engine, req);
-			if (ret)
-				dev_err(engine->dev, "failed to unprepare request\n");
-		}
-		spin_lock_irqsave(&engine->queue_lock, flags);
-		engine->cur_req = NULL;
-		engine->cur_req_prepared = false;
-		spin_unlock_irqrestore(&engine->queue_lock, flags);
-	}
-
-	req->base.complete(&req->base, err);
-
-	kthread_queue_work(engine->kworker, &engine->pump_requests);
+	return crypto_finalize_request(engine, &req->base, err);
 }
 }
 EXPORT_SYMBOL_GPL(crypto_finalize_hash_request);
 EXPORT_SYMBOL_GPL(crypto_finalize_hash_request);
 
 
+/**
+ * crypto_finalize_skcipher_request - finalize one skcipher_request if
+ * the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_skcipher_request(struct crypto_engine *engine,
+				      struct skcipher_request *req, int err)
+{
+	return crypto_finalize_request(engine, &req->base, err);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_skcipher_request);
+
 /**
 /**
  * crypto_engine_start - start the hardware engine
  * crypto_engine_start - start the hardware engine
  * @engine: the hardware engine need to be started
  * @engine: the hardware engine need to be started

+ 1 - 1
crypto/crypto_user.c

@@ -271,7 +271,7 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
 		return -ENOENT;
 		return -ENOENT;
 
 
 	err = -ENOMEM;
 	err = -ENOMEM;
-	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!skb)
 	if (!skb)
 		goto drop_alg;
 		goto drop_alg;
 
 

+ 17 - 6
crypto/ecc.c

@@ -1025,9 +1025,7 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 {
 {
 	int ret = 0;
 	int ret = 0;
 	struct ecc_point *product, *pk;
 	struct ecc_point *product, *pk;
-	u64 priv[ndigits];
-	u64 rand_z[ndigits];
-	unsigned int nbytes;
+	u64 *priv, *rand_z;
 	const struct ecc_curve *curve = ecc_get_curve(curve_id);
 	const struct ecc_curve *curve = ecc_get_curve(curve_id);
 
 
 	if (!private_key || !public_key || !curve) {
 	if (!private_key || !public_key || !curve) {
@@ -1035,14 +1033,22 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 		goto out;
 		goto out;
 	}
 	}
 
 
-	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	priv = kmalloc_array(ndigits, sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 
-	get_random_bytes(rand_z, nbytes);
+	rand_z = kmalloc_array(ndigits, sizeof(*rand_z), GFP_KERNEL);
+	if (!rand_z) {
+		ret = -ENOMEM;
+		goto kfree_out;
+	}
 
 
 	pk = ecc_alloc_point(ndigits);
 	pk = ecc_alloc_point(ndigits);
 	if (!pk) {
 	if (!pk) {
 		ret = -ENOMEM;
 		ret = -ENOMEM;
-		goto out;
+		goto kfree_out;
 	}
 	}
 
 
 	product = ecc_alloc_point(ndigits);
 	product = ecc_alloc_point(ndigits);
@@ -1051,6 +1057,8 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 		goto err_alloc_product;
 		goto err_alloc_product;
 	}
 	}
 
 
+	get_random_bytes(rand_z, ndigits << ECC_DIGITS_TO_BYTES_SHIFT);
+
 	ecc_swap_digits(public_key, pk->x, ndigits);
 	ecc_swap_digits(public_key, pk->x, ndigits);
 	ecc_swap_digits(&public_key[ndigits], pk->y, ndigits);
 	ecc_swap_digits(&public_key[ndigits], pk->y, ndigits);
 	ecc_swap_digits(private_key, priv, ndigits);
 	ecc_swap_digits(private_key, priv, ndigits);
@@ -1065,6 +1073,9 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 	ecc_free_point(product);
 	ecc_free_point(product);
 err_alloc_product:
 err_alloc_product:
 	ecc_free_point(pk);
 	ecc_free_point(pk);
+kfree_out:
+	kzfree(priv);
+	kzfree(rand_z);
 out:
 out:
 	return ret;
 	return ret;
 }
 }

+ 17 - 6
crypto/ecdh.c

@@ -89,12 +89,19 @@ static int ecdh_compute_value(struct kpp_request *req)
 		if (!shared_secret)
 		if (!shared_secret)
 			goto free_pubkey;
 			goto free_pubkey;
 
 
-		copied = sg_copy_to_buffer(req->src, 1, public_key,
-					   public_key_sz);
-		if (copied != public_key_sz) {
-			ret = -EINVAL;
+		/* from here on it's invalid parameters */
+		ret = -EINVAL;
+
+		/* must have exactly two points to be on the curve */
+		if (public_key_sz != req->src_len)
+			goto free_all;
+
+		copied = sg_copy_to_buffer(req->src,
+					   sg_nents_for_len(req->src,
+							    public_key_sz),
+					   public_key, public_key_sz);
+		if (copied != public_key_sz)
 			goto free_all;
 			goto free_all;
-		}
 
 
 		ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits,
 		ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits,
 						ctx->private_key, public_key,
 						ctx->private_key, public_key,
@@ -111,7 +118,11 @@ static int ecdh_compute_value(struct kpp_request *req)
 	if (ret < 0)
 	if (ret < 0)
 		goto free_all;
 		goto free_all;
 
 
-	copied = sg_copy_from_buffer(req->dst, 1, buf, nbytes);
+	/* might want less than we've got */
+	nbytes = min_t(size_t, nbytes, req->dst_len);
+	copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+								nbytes),
+				     buf, nbytes);
 	if (copied != nbytes)
 	if (copied != nbytes)
 		ret = -EINVAL;
 		ret = -EINVAL;
 
 

+ 0 - 1
crypto/internal.h

@@ -67,7 +67,6 @@ static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg)
 }
 }
 
 
 struct crypto_alg *crypto_mod_get(struct crypto_alg *alg);
 struct crypto_alg *crypto_mod_get(struct crypto_alg *alg);
-struct crypto_alg *crypto_alg_lookup(const char *name, u32 type, u32 mask);
 struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask);
 struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask);
 
 
 int crypto_init_cipher_ops(struct crypto_tfm *tfm);
 int crypto_init_cipher_ops(struct crypto_tfm *tfm);

+ 40 - 114
crypto/lrw.c

@@ -28,13 +28,31 @@
 
 
 #include <crypto/b128ops.h>
 #include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
 #include <crypto/gf128mul.h>
-#include <crypto/lrw.h>
 
 
 #define LRW_BUFFER_SIZE 128u
 #define LRW_BUFFER_SIZE 128u
 
 
+#define LRW_BLOCK_SIZE 16
+
 struct priv {
 struct priv {
 	struct crypto_skcipher *child;
 	struct crypto_skcipher *child;
-	struct lrw_table_ctx table;
+
+	/*
+	 * optimizes multiplying a random (non incrementing, as at the
+	 * start of a new sector) value with key2, we could also have
+	 * used 4k optimization tables or no optimization at all. In the
+	 * latter case we would have to store key2 here
+	 */
+	struct gf128mul_64k *table;
+
+	/*
+	 * stores:
+	 *  key2*{ 0,0,...0,0,0,0,1 }, key2*{ 0,0,...0,0,0,1,1 },
+	 *  key2*{ 0,0,...0,0,1,1,1 }, key2*{ 0,0,...0,1,1,1,1 }
+	 *  key2*{ 0,0,...1,1,1,1,1 }, etc
+	 * needed for optimized multiplication of incrementing values
+	 * with key2
+	 */
+	be128 mulinc[128];
 };
 };
 
 
 struct rctx {
 struct rctx {
@@ -65,11 +83,25 @@ static inline void setbit128_bbe(void *b, int bit)
 			), b);
 			), b);
 }
 }
 
 
-int lrw_init_table(struct lrw_table_ctx *ctx, const u8 *tweak)
+static int setkey(struct crypto_skcipher *parent, const u8 *key,
+		  unsigned int keylen)
 {
 {
+	struct priv *ctx = crypto_skcipher_ctx(parent);
+	struct crypto_skcipher *child = ctx->child;
+	int err, bsize = LRW_BLOCK_SIZE;
+	const u8 *tweak = key + keylen - bsize;
 	be128 tmp = { 0 };
 	be128 tmp = { 0 };
 	int i;
 	int i;
 
 
+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+					 CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(child, key, keylen - bsize);
+	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+					  CRYPTO_TFM_RES_MASK);
+	if (err)
+		return err;
+
 	if (ctx->table)
 	if (ctx->table)
 		gf128mul_free_64k(ctx->table);
 		gf128mul_free_64k(ctx->table);
 
 
@@ -87,34 +119,6 @@ int lrw_init_table(struct lrw_table_ctx *ctx, const u8 *tweak)
 
 
 	return 0;
 	return 0;
 }
 }
-EXPORT_SYMBOL_GPL(lrw_init_table);
-
-void lrw_free_table(struct lrw_table_ctx *ctx)
-{
-	if (ctx->table)
-		gf128mul_free_64k(ctx->table);
-}
-EXPORT_SYMBOL_GPL(lrw_free_table);
-
-static int setkey(struct crypto_skcipher *parent, const u8 *key,
-		  unsigned int keylen)
-{
-	struct priv *ctx = crypto_skcipher_ctx(parent);
-	struct crypto_skcipher *child = ctx->child;
-	int err, bsize = LRW_BLOCK_SIZE;
-	const u8 *tweak = key + keylen - bsize;
-
-	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
-					 CRYPTO_TFM_REQ_MASK);
-	err = crypto_skcipher_setkey(child, key, keylen - bsize);
-	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
-					  CRYPTO_TFM_RES_MASK);
-	if (err)
-		return err;
-
-	return lrw_init_table(&ctx->table, tweak);
-}
 
 
 static inline void inc(be128 *iv)
 static inline void inc(be128 *iv)
 {
 {
@@ -238,7 +242,7 @@ static int pre_crypt(struct skcipher_request *req)
 			/* T <- I*Key2, using the optimization
 			/* T <- I*Key2, using the optimization
 			 * discussed in the specification */
 			 * discussed in the specification */
 			be128_xor(&rctx->t, &rctx->t,
 			be128_xor(&rctx->t, &rctx->t,
-				  &ctx->table.mulinc[get_index128(iv)]);
+				  &ctx->mulinc[get_index128(iv)]);
 			inc(iv);
 			inc(iv);
 		} while ((avail -= bs) >= bs);
 		} while ((avail -= bs) >= bs);
 
 
@@ -301,7 +305,7 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
 	memcpy(&rctx->t, req->iv, sizeof(rctx->t));
 	memcpy(&rctx->t, req->iv, sizeof(rctx->t));
 
 
 	/* T <- I*Key2 */
 	/* T <- I*Key2 */
-	gf128mul_64k_bbe(&rctx->t, ctx->table.table);
+	gf128mul_64k_bbe(&rctx->t, ctx->table);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -313,7 +317,7 @@ static void exit_crypt(struct skcipher_request *req)
 	rctx->left = 0;
 	rctx->left = 0;
 
 
 	if (rctx->ext)
 	if (rctx->ext)
-		kfree(rctx->ext);
+		kzfree(rctx->ext);
 }
 }
 
 
 static int do_encrypt(struct skcipher_request *req, int err)
 static int do_encrypt(struct skcipher_request *req, int err)
@@ -416,85 +420,6 @@ static int decrypt(struct skcipher_request *req)
 	return do_decrypt(req, init_crypt(req, decrypt_done));
 	return do_decrypt(req, init_crypt(req, decrypt_done));
 }
 }
 
 
-int lrw_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
-	      struct scatterlist *ssrc, unsigned int nbytes,
-	      struct lrw_crypt_req *req)
-{
-	const unsigned int bsize = LRW_BLOCK_SIZE;
-	const unsigned int max_blks = req->tbuflen / bsize;
-	struct lrw_table_ctx *ctx = req->table_ctx;
-	struct blkcipher_walk walk;
-	unsigned int nblocks;
-	be128 *iv, *src, *dst, *t;
-	be128 *t_buf = req->tbuf;
-	int err, i;
-
-	BUG_ON(max_blks < 1);
-
-	blkcipher_walk_init(&walk, sdst, ssrc, nbytes);
-
-	err = blkcipher_walk_virt(desc, &walk);
-	nbytes = walk.nbytes;
-	if (!nbytes)
-		return err;
-
-	nblocks = min(walk.nbytes / bsize, max_blks);
-	src = (be128 *)walk.src.virt.addr;
-	dst = (be128 *)walk.dst.virt.addr;
-
-	/* calculate first value of T */
-	iv = (be128 *)walk.iv;
-	t_buf[0] = *iv;
-
-	/* T <- I*Key2 */
-	gf128mul_64k_bbe(&t_buf[0], ctx->table);
-
-	i = 0;
-	goto first;
-
-	for (;;) {
-		do {
-			for (i = 0; i < nblocks; i++) {
-				/* T <- I*Key2, using the optimization
-				 * discussed in the specification */
-				be128_xor(&t_buf[i], t,
-						&ctx->mulinc[get_index128(iv)]);
-				inc(iv);
-first:
-				t = &t_buf[i];
-
-				/* PP <- T xor P */
-				be128_xor(dst + i, t, src + i);
-			}
-
-			/* CC <- E(Key2,PP) */
-			req->crypt_fn(req->crypt_ctx, (u8 *)dst,
-				      nblocks * bsize);
-
-			/* C <- T xor CC */
-			for (i = 0; i < nblocks; i++)
-				be128_xor(dst + i, dst + i, &t_buf[i]);
-
-			src += nblocks;
-			dst += nblocks;
-			nbytes -= nblocks * bsize;
-			nblocks = min(nbytes / bsize, max_blks);
-		} while (nblocks > 0);
-
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-		nbytes = walk.nbytes;
-		if (!nbytes)
-			break;
-
-		nblocks = min(nbytes / bsize, max_blks);
-		src = (be128 *)walk.src.virt.addr;
-		dst = (be128 *)walk.dst.virt.addr;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(lrw_crypt);
-
 static int init_tfm(struct crypto_skcipher *tfm)
 static int init_tfm(struct crypto_skcipher *tfm)
 {
 {
 	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
 	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
@@ -518,7 +443,8 @@ static void exit_tfm(struct crypto_skcipher *tfm)
 {
 {
 	struct priv *ctx = crypto_skcipher_ctx(tfm);
 	struct priv *ctx = crypto_skcipher_ctx(tfm);
 
 
-	lrw_free_table(&ctx->table);
+	if (ctx->table)
+		gf128mul_free_64k(ctx->table);
 	crypto_free_skcipher(ctx->child);
 	crypto_free_skcipher(ctx->child);
 }
 }
 
 

+ 4 - 30
crypto/mcryptd.c

@@ -367,7 +367,7 @@ static void mcryptd_hash_update(struct crypto_async_request *req_async, int err)
 		goto out;
 		goto out;
 
 
 	rctx->out = req->result;
 	rctx->out = req->result;
-	err = ahash_mcryptd_update(&rctx->areq);
+	err = crypto_ahash_update(&rctx->areq);
 	if (err) {
 	if (err) {
 		req->base.complete = rctx->complete;
 		req->base.complete = rctx->complete;
 		goto out;
 		goto out;
@@ -394,7 +394,7 @@ static void mcryptd_hash_final(struct crypto_async_request *req_async, int err)
 		goto out;
 		goto out;
 
 
 	rctx->out = req->result;
 	rctx->out = req->result;
-	err = ahash_mcryptd_final(&rctx->areq);
+	err = crypto_ahash_final(&rctx->areq);
 	if (err) {
 	if (err) {
 		req->base.complete = rctx->complete;
 		req->base.complete = rctx->complete;
 		goto out;
 		goto out;
@@ -420,7 +420,7 @@ static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err)
 	if (unlikely(err == -EINPROGRESS))
 	if (unlikely(err == -EINPROGRESS))
 		goto out;
 		goto out;
 	rctx->out = req->result;
 	rctx->out = req->result;
-	err = ahash_mcryptd_finup(&rctx->areq);
+	err = crypto_ahash_finup(&rctx->areq);
 
 
 	if (err) {
 	if (err) {
 		req->base.complete = rctx->complete;
 		req->base.complete = rctx->complete;
@@ -455,7 +455,7 @@ static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err)
 						rctx->complete, req_async);
 						rctx->complete, req_async);
 
 
 	rctx->out = req->result;
 	rctx->out = req->result;
-	err = ahash_mcryptd_digest(desc);
+	err = crypto_ahash_init(desc) ?: crypto_ahash_finup(desc);
 
 
 out:
 out:
 	local_bh_disable();
 	local_bh_disable();
@@ -612,32 +612,6 @@ struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
 }
 }
 EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
 EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
 
 
-int ahash_mcryptd_digest(struct ahash_request *desc)
-{
-	return crypto_ahash_init(desc) ?: ahash_mcryptd_finup(desc);
-}
-
-int ahash_mcryptd_update(struct ahash_request *desc)
-{
-	/* alignment is to be done by multi-buffer crypto algorithm if needed */
-
-	return crypto_ahash_update(desc);
-}
-
-int ahash_mcryptd_finup(struct ahash_request *desc)
-{
-	/* alignment is to be done by multi-buffer crypto algorithm if needed */
-
-	return crypto_ahash_finup(desc);
-}
-
-int ahash_mcryptd_final(struct ahash_request *desc)
-{
-	/* alignment is to be done by multi-buffer crypto algorithm if needed */
-
-	return crypto_ahash_final(desc);
-}
-
 struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
 struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
 {
 {
 	struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
 	struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);

+ 0 - 17
crypto/md4.c

@@ -64,23 +64,6 @@ static inline u32 H(u32 x, u32 y, u32 z)
 #define ROUND2(a,b,c,d,k,s) (a = lshift(a + G(b,c,d) + k + (u32)0x5A827999,s))
 #define ROUND2(a,b,c,d,k,s) (a = lshift(a + G(b,c,d) + k + (u32)0x5A827999,s))
 #define ROUND3(a,b,c,d,k,s) (a = lshift(a + H(b,c,d) + k + (u32)0x6ED9EBA1,s))
 #define ROUND3(a,b,c,d,k,s) (a = lshift(a + H(b,c,d) + k + (u32)0x6ED9EBA1,s))
 
 
-/* XXX: this stuff can be optimized */
-static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		__le32_to_cpus(buf);
-		buf++;
-	}
-}
-
-static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		__cpu_to_le32s(buf);
-		buf++;
-	}
-}
-
 static void md4_transform(u32 *hash, u32 const *in)
 static void md4_transform(u32 *hash, u32 const *in)
 {
 {
 	u32 a, b, c, d;
 	u32 a, b, c, d;

+ 0 - 17
crypto/md5.c

@@ -32,23 +32,6 @@ const u8 md5_zero_message_hash[MD5_DIGEST_SIZE] = {
 };
 };
 EXPORT_SYMBOL_GPL(md5_zero_message_hash);
 EXPORT_SYMBOL_GPL(md5_zero_message_hash);
 
 
-/* XXX: this stuff can be optimized */
-static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		__le32_to_cpus(buf);
-		buf++;
-	}
-}
-
-static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		__cpu_to_le32s(buf);
-		buf++;
-	}
-}
-
 #define F1(x, y, z)	(z ^ (x & (y ^ z)))
 #define F1(x, y, z)	(z ^ (x & (y ^ z)))
 #define F2(x, y, z)	F1(z, x, y)
 #define F2(x, y, z)	F1(z, x, y)
 #define F3(x, y, z)	(x ^ y ^ z)
 #define F3(x, y, z)	(x ^ y ^ z)

+ 1 - 1
crypto/rsa-pkcs1pad.c

@@ -192,7 +192,7 @@ static int pkcs1pad_encrypt_sign_complete(struct akcipher_request *req, int err)
 	if (likely(!pad_len))
 	if (likely(!pad_len))
 		goto out;
 		goto out;
 
 
-	out_buf = kzalloc(ctx->key_size, GFP_ATOMIC);
+	out_buf = kzalloc(ctx->key_size, GFP_KERNEL);
 	err = -ENOMEM;
 	err = -ENOMEM;
 	if (!out_buf)
 	if (!out_buf)
 		goto out;
 		goto out;

+ 50 - 0
crypto/simd.c

@@ -221,4 +221,54 @@ void simd_skcipher_free(struct simd_skcipher_alg *salg)
 }
 }
 EXPORT_SYMBOL_GPL(simd_skcipher_free);
 EXPORT_SYMBOL_GPL(simd_skcipher_free);
 
 
+int simd_register_skciphers_compat(struct skcipher_alg *algs, int count,
+				   struct simd_skcipher_alg **simd_algs)
+{
+	int err;
+	int i;
+	const char *algname;
+	const char *drvname;
+	const char *basename;
+	struct simd_skcipher_alg *simd;
+
+	err = crypto_register_skciphers(algs, count);
+	if (err)
+		return err;
+
+	for (i = 0; i < count; i++) {
+		WARN_ON(strncmp(algs[i].base.cra_name, "__", 2));
+		WARN_ON(strncmp(algs[i].base.cra_driver_name, "__", 2));
+		algname = algs[i].base.cra_name + 2;
+		drvname = algs[i].base.cra_driver_name + 2;
+		basename = algs[i].base.cra_driver_name;
+		simd = simd_skcipher_create_compat(algname, drvname, basename);
+		err = PTR_ERR(simd);
+		if (IS_ERR(simd))
+			goto err_unregister;
+		simd_algs[i] = simd;
+	}
+	return 0;
+
+err_unregister:
+	simd_unregister_skciphers(algs, count, simd_algs);
+	return err;
+}
+EXPORT_SYMBOL_GPL(simd_register_skciphers_compat);
+
+void simd_unregister_skciphers(struct skcipher_alg *algs, int count,
+			       struct simd_skcipher_alg **simd_algs)
+{
+	int i;
+
+	crypto_unregister_skciphers(algs, count);
+
+	for (i = 0; i < count; i++) {
+		if (simd_algs[i]) {
+			simd_skcipher_free(simd_algs[i]);
+			simd_algs[i] = NULL;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(simd_unregister_skciphers);
+
 MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");

+ 244 - 0
crypto/sm4_generic.c

@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * SM4 Cipher Algorithm.
+ *
+ * Copyright (C) 2018 ARM Limited or its affiliates.
+ * All rights reserved.
+ */
+
+#include <crypto/sm4.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/crypto.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+static const u32 fk[4] = {
+	0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+};
+
+static const u8 sbox[256] = {
+	0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
+	0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
+	0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
+	0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
+	0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
+	0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
+	0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
+	0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
+	0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
+	0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
+	0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
+	0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
+	0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
+	0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
+	0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
+	0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
+	0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
+	0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
+	0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
+	0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
+	0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
+	0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
+	0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
+	0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
+	0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
+	0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
+	0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
+	0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
+	0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
+	0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
+	0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
+	0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
+};
+
+static const u32 ck[] = {
+	0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
+	0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
+	0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
+	0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
+	0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
+	0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
+	0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
+	0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
+};
+
+static u32 sm4_t_non_lin_sub(u32 x)
+{
+	int i;
+	u8 *b = (u8 *)&x;
+
+	for (i = 0; i < 4; ++i)
+		b[i] = sbox[b[i]];
+
+	return x;
+}
+
+static u32 sm4_key_lin_sub(u32 x)
+{
+	return x ^ rol32(x, 13) ^ rol32(x, 23);
+
+}
+
+static u32 sm4_enc_lin_sub(u32 x)
+{
+	return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24);
+}
+
+static u32 sm4_key_sub(u32 x)
+{
+	return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static u32 sm4_enc_sub(u32 x)
+{
+	return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static u32 sm4_round(const u32 *x, const u32 rk)
+{
+	return x[0] ^ sm4_enc_sub(x[1] ^ x[2] ^ x[3] ^ rk);
+}
+
+
+/**
+ * crypto_sm4_expand_key - Expands the SM4 key as described in GB/T 32907-2016
+ * @ctx:	The location where the computed key will be stored.
+ * @in_key:	The supplied key.
+ * @key_len:	The length of the supplied key.
+ *
+ * Returns 0 on success. The function fails only if an invalid key size (or
+ * pointer) is supplied.
+ */
+int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
+			  unsigned int key_len)
+{
+	u32 rk[4], t;
+	const u32 *key = (u32 *)in_key;
+	int i;
+
+	if (key_len != SM4_KEY_SIZE)
+		return -EINVAL;
+
+	for (i = 0; i < 4; ++i)
+		rk[i] = get_unaligned_be32(&key[i]) ^ fk[i];
+
+	for (i = 0; i < 32; ++i) {
+		t = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i]);
+		ctx->rkey_enc[i] = t;
+		rk[0] = rk[1];
+		rk[1] = rk[2];
+		rk[2] = rk[3];
+		rk[3] = t;
+	}
+
+	for (i = 0; i < 32; ++i)
+		ctx->rkey_dec[i] = ctx->rkey_enc[31 - i];
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_sm4_expand_key);
+
+/**
+ * crypto_sm4_set_key - Set the AES key.
+ * @tfm:	The %crypto_tfm that is used in the context.
+ * @in_key:	The input key.
+ * @key_len:	The size of the key.
+ *
+ * Returns 0 on success, on failure the %CRYPTO_TFM_RES_BAD_KEY_LEN flag in tfm
+ * is set. The function uses crypto_sm4_expand_key() to expand the key.
+ * &crypto_sm4_ctx _must_ be the private data embedded in @tfm which is
+ * retrieved with crypto_tfm_ctx().
+ */
+int crypto_sm4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len)
+{
+	struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	int ret;
+
+	ret = crypto_sm4_expand_key(ctx, in_key, key_len);
+	if (!ret)
+		return 0;
+
+	*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(crypto_sm4_set_key);
+
+static void sm4_do_crypt(const u32 *rk, u32 *out, const u32 *in)
+{
+	u32 x[4], i, t;
+
+	for (i = 0; i < 4; ++i)
+		x[i] = get_unaligned_be32(&in[i]);
+
+	for (i = 0; i < 32; ++i) {
+		t = sm4_round(x, rk[i]);
+		x[0] = x[1];
+		x[1] = x[2];
+		x[2] = x[3];
+		x[3] = t;
+	}
+
+	for (i = 0; i < 4; ++i)
+		put_unaligned_be32(x[3 - i], &out[i]);
+}
+
+/* encrypt a block of text */
+
+static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in);
+}
+
+/* decrypt a block of text */
+
+static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in);
+}
+
+static struct crypto_alg sm4_alg = {
+	.cra_name		=	"sm4",
+	.cra_driver_name	=	"sm4-generic",
+	.cra_priority		=	100,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	SM4_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct crypto_sm4_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	SM4_KEY_SIZE,
+			.cia_max_keysize	=	SM4_KEY_SIZE,
+			.cia_setkey		=	crypto_sm4_set_key,
+			.cia_encrypt		=	sm4_encrypt,
+			.cia_decrypt		=	sm4_decrypt
+		}
+	}
+};
+
+static int __init sm4_init(void)
+{
+	return crypto_register_alg(&sm4_alg);
+}
+
+static void __exit sm4_fini(void)
+{
+	crypto_unregister_alg(&sm4_alg);
+}
+
+module_init(sm4_init);
+module_exit(sm4_fini);
+
+MODULE_DESCRIPTION("SM4 Cipher Algorithm");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("sm4");
+MODULE_ALIAS_CRYPTO("sm4-generic");

+ 307 - 0
crypto/speck.c

@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Speck: a lightweight block cipher
+ *
+ * Copyright (c) 2018 Google, Inc
+ *
+ * Speck has 10 variants, including 5 block sizes.  For now we only implement
+ * the variants Speck128/128, Speck128/192, Speck128/256, Speck64/96, and
+ * Speck64/128.   Speck${B}/${K} denotes the variant with a block size of B bits
+ * and a key size of K bits.  The Speck128 variants are believed to be the most
+ * secure variants, and they use the same block size and key sizes as AES.  The
+ * Speck64 variants are less secure, but on 32-bit processors are usually
+ * faster.  The remaining variants (Speck32, Speck48, and Speck96) are even less
+ * secure and/or not as well suited for implementation on either 32-bit or
+ * 64-bit processors, so are omitted.
+ *
+ * Reference: "The Simon and Speck Families of Lightweight Block Ciphers"
+ * https://eprint.iacr.org/2013/404.pdf
+ *
+ * In a correspondence, the Speck designers have also clarified that the words
+ * should be interpreted in little-endian format, and the words should be
+ * ordered such that the first word of each block is 'y' rather than 'x', and
+ * the first key word (rather than the last) becomes the first round key.
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/speck.h>
+#include <linux/bitops.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+/* Speck128 */
+
+static __always_inline void speck128_round(u64 *x, u64 *y, u64 k)
+{
+	*x = ror64(*x, 8);
+	*x += *y;
+	*x ^= k;
+	*y = rol64(*y, 3);
+	*y ^= *x;
+}
+
+static __always_inline void speck128_unround(u64 *x, u64 *y, u64 k)
+{
+	*y ^= *x;
+	*y = ror64(*y, 3);
+	*x ^= k;
+	*x -= *y;
+	*x = rol64(*x, 8);
+}
+
+void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx,
+			     u8 *out, const u8 *in)
+{
+	u64 y = get_unaligned_le64(in);
+	u64 x = get_unaligned_le64(in + 8);
+	int i;
+
+	for (i = 0; i < ctx->nrounds; i++)
+		speck128_round(&x, &y, ctx->round_keys[i]);
+
+	put_unaligned_le64(y, out);
+	put_unaligned_le64(x, out + 8);
+}
+EXPORT_SYMBOL_GPL(crypto_speck128_encrypt);
+
+static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	crypto_speck128_encrypt(crypto_tfm_ctx(tfm), out, in);
+}
+
+void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx,
+			     u8 *out, const u8 *in)
+{
+	u64 y = get_unaligned_le64(in);
+	u64 x = get_unaligned_le64(in + 8);
+	int i;
+
+	for (i = ctx->nrounds - 1; i >= 0; i--)
+		speck128_unround(&x, &y, ctx->round_keys[i]);
+
+	put_unaligned_le64(y, out);
+	put_unaligned_le64(x, out + 8);
+}
+EXPORT_SYMBOL_GPL(crypto_speck128_decrypt);
+
+static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	crypto_speck128_decrypt(crypto_tfm_ctx(tfm), out, in);
+}
+
+int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key,
+			   unsigned int keylen)
+{
+	u64 l[3];
+	u64 k;
+	int i;
+
+	switch (keylen) {
+	case SPECK128_128_KEY_SIZE:
+		k = get_unaligned_le64(key);
+		l[0] = get_unaligned_le64(key + 8);
+		ctx->nrounds = SPECK128_128_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck128_round(&l[0], &k, i);
+		}
+		break;
+	case SPECK128_192_KEY_SIZE:
+		k = get_unaligned_le64(key);
+		l[0] = get_unaligned_le64(key + 8);
+		l[1] = get_unaligned_le64(key + 16);
+		ctx->nrounds = SPECK128_192_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck128_round(&l[i % 2], &k, i);
+		}
+		break;
+	case SPECK128_256_KEY_SIZE:
+		k = get_unaligned_le64(key);
+		l[0] = get_unaligned_le64(key + 8);
+		l[1] = get_unaligned_le64(key + 16);
+		l[2] = get_unaligned_le64(key + 24);
+		ctx->nrounds = SPECK128_256_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck128_round(&l[i % 3], &k, i);
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_speck128_setkey);
+
+static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key,
+			   unsigned int keylen)
+{
+	return crypto_speck128_setkey(crypto_tfm_ctx(tfm), key, keylen);
+}
+
+/* Speck64 */
+
+static __always_inline void speck64_round(u32 *x, u32 *y, u32 k)
+{
+	*x = ror32(*x, 8);
+	*x += *y;
+	*x ^= k;
+	*y = rol32(*y, 3);
+	*y ^= *x;
+}
+
+static __always_inline void speck64_unround(u32 *x, u32 *y, u32 k)
+{
+	*y ^= *x;
+	*y = ror32(*y, 3);
+	*x ^= k;
+	*x -= *y;
+	*x = rol32(*x, 8);
+}
+
+void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx,
+			    u8 *out, const u8 *in)
+{
+	u32 y = get_unaligned_le32(in);
+	u32 x = get_unaligned_le32(in + 4);
+	int i;
+
+	for (i = 0; i < ctx->nrounds; i++)
+		speck64_round(&x, &y, ctx->round_keys[i]);
+
+	put_unaligned_le32(y, out);
+	put_unaligned_le32(x, out + 4);
+}
+EXPORT_SYMBOL_GPL(crypto_speck64_encrypt);
+
+static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	crypto_speck64_encrypt(crypto_tfm_ctx(tfm), out, in);
+}
+
+void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx,
+			    u8 *out, const u8 *in)
+{
+	u32 y = get_unaligned_le32(in);
+	u32 x = get_unaligned_le32(in + 4);
+	int i;
+
+	for (i = ctx->nrounds - 1; i >= 0; i--)
+		speck64_unround(&x, &y, ctx->round_keys[i]);
+
+	put_unaligned_le32(y, out);
+	put_unaligned_le32(x, out + 4);
+}
+EXPORT_SYMBOL_GPL(crypto_speck64_decrypt);
+
+static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	crypto_speck64_decrypt(crypto_tfm_ctx(tfm), out, in);
+}
+
+int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key,
+			  unsigned int keylen)
+{
+	u32 l[3];
+	u32 k;
+	int i;
+
+	switch (keylen) {
+	case SPECK64_96_KEY_SIZE:
+		k = get_unaligned_le32(key);
+		l[0] = get_unaligned_le32(key + 4);
+		l[1] = get_unaligned_le32(key + 8);
+		ctx->nrounds = SPECK64_96_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck64_round(&l[i % 2], &k, i);
+		}
+		break;
+	case SPECK64_128_KEY_SIZE:
+		k = get_unaligned_le32(key);
+		l[0] = get_unaligned_le32(key + 4);
+		l[1] = get_unaligned_le32(key + 8);
+		l[2] = get_unaligned_le32(key + 12);
+		ctx->nrounds = SPECK64_128_NROUNDS;
+		for (i = 0; i < ctx->nrounds; i++) {
+			ctx->round_keys[i] = k;
+			speck64_round(&l[i % 3], &k, i);
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_speck64_setkey);
+
+static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key,
+			  unsigned int keylen)
+{
+	return crypto_speck64_setkey(crypto_tfm_ctx(tfm), key, keylen);
+}
+
+/* Algorithm definitions */
+
+static struct crypto_alg speck_algs[] = {
+	{
+		.cra_name		= "speck128",
+		.cra_driver_name	= "speck128-generic",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+		.cra_blocksize		= SPECK128_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct speck128_tfm_ctx),
+		.cra_module		= THIS_MODULE,
+		.cra_u			= {
+			.cipher = {
+				.cia_min_keysize	= SPECK128_128_KEY_SIZE,
+				.cia_max_keysize	= SPECK128_256_KEY_SIZE,
+				.cia_setkey		= speck128_setkey,
+				.cia_encrypt		= speck128_encrypt,
+				.cia_decrypt		= speck128_decrypt
+			}
+		}
+	}, {
+		.cra_name		= "speck64",
+		.cra_driver_name	= "speck64-generic",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+		.cra_blocksize		= SPECK64_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct speck64_tfm_ctx),
+		.cra_module		= THIS_MODULE,
+		.cra_u			= {
+			.cipher = {
+				.cia_min_keysize	= SPECK64_96_KEY_SIZE,
+				.cia_max_keysize	= SPECK64_128_KEY_SIZE,
+				.cia_setkey		= speck64_setkey,
+				.cia_encrypt		= speck64_encrypt,
+				.cia_decrypt		= speck64_decrypt
+			}
+		}
+	}
+};
+
+static int __init speck_module_init(void)
+{
+	return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+static void __exit speck_module_exit(void)
+{
+	crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs));
+}
+
+module_init(speck_module_init);
+module_exit(speck_module_exit);
+
+MODULE_DESCRIPTION("Speck block cipher (generic)");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("speck128");
+MODULE_ALIAS_CRYPTO("speck128-generic");
+MODULE_ALIAS_CRYPTO("speck64");
+MODULE_ALIAS_CRYPTO("speck64-generic");

+ 3 - 0
crypto/tcrypt.c

@@ -1983,6 +1983,9 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 	case 190:
 	case 190:
 		ret += tcrypt_test("authenc(hmac(sha512),cbc(des3_ede))");
 		ret += tcrypt_test("authenc(hmac(sha512),cbc(des3_ede))");
 		break;
 		break;
+	case 191:
+		ret += tcrypt_test("ecb(sm4)");
+		break;
 	case 200:
 	case 200:
 		test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
 		test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
 				speed_template_16_24_32);
 				speed_template_16_24_32);

+ 45 - 0
crypto/testmgr.c

@@ -3000,6 +3000,33 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.dec = __VECS(serpent_dec_tv_template)
 				.dec = __VECS(serpent_dec_tv_template)
 			}
 			}
 		}
 		}
+	}, {
+		.alg = "ecb(sm4)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(sm4_enc_tv_template),
+				.dec = __VECS(sm4_dec_tv_template)
+			}
+		}
+	}, {
+		.alg = "ecb(speck128)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(speck128_enc_tv_template),
+				.dec = __VECS(speck128_dec_tv_template)
+			}
+		}
+	}, {
+		.alg = "ecb(speck64)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(speck64_enc_tv_template),
+				.dec = __VECS(speck64_dec_tv_template)
+			}
+		}
 	}, {
 	}, {
 		.alg = "ecb(tea)",
 		.alg = "ecb(tea)",
 		.test = alg_test_skcipher,
 		.test = alg_test_skcipher,
@@ -3557,6 +3584,24 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.dec = __VECS(serpent_xts_dec_tv_template)
 				.dec = __VECS(serpent_xts_dec_tv_template)
 			}
 			}
 		}
 		}
+	}, {
+		.alg = "xts(speck128)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(speck128_xts_enc_tv_template),
+				.dec = __VECS(speck128_xts_dec_tv_template)
+			}
+		}
+	}, {
+		.alg = "xts(speck64)",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = __VECS(speck64_xts_enc_tv_template),
+				.dec = __VECS(speck64_xts_dec_tv_template)
+			}
+		}
 	}, {
 	}, {
 		.alg = "xts(twofish)",
 		.alg = "xts(twofish)",
 		.test = alg_test_skcipher,
 		.test = alg_test_skcipher,

+ 3321 - 1445
crypto/testmgr.h

@@ -548,7 +548,7 @@ static const struct akcipher_testvec rsa_tv_template[] = {
 static const struct akcipher_testvec pkcs1pad_rsa_tv_template[] = {
 static const struct akcipher_testvec pkcs1pad_rsa_tv_template[] = {
 	{
 	{
 	.key =
 	.key =
-	"\x30\x82\x03\x1f\x02\x01\x10\x02\x82\x01\x01\x00\xd7\x1e\x77\x82"
+	"\x30\x82\x03\x1f\x02\x01\x00\x02\x82\x01\x01\x00\xd7\x1e\x77\x82"
 	"\x8c\x92\x31\xe7\x69\x02\xa2\xd5\x5c\x78\xde\xa2\x0c\x8f\xfe\x28"
 	"\x8c\x92\x31\xe7\x69\x02\xa2\xd5\x5c\x78\xde\xa2\x0c\x8f\xfe\x28"
 	"\x59\x31\xdf\x40\x9c\x60\x61\x06\xb9\x2f\x62\x40\x80\x76\xcb\x67"
 	"\x59\x31\xdf\x40\x9c\x60\x61\x06\xb9\x2f\x62\x40\x80\x76\xcb\x67"
 	"\x4a\xb5\x59\x56\x69\x17\x07\xfa\xf9\x4c\xbd\x6c\x37\x7a\x46\x7d"
 	"\x4a\xb5\x59\x56\x69\x17\x07\xfa\xf9\x4c\xbd\x6c\x37\x7a\x46\x7d"
@@ -597,8 +597,8 @@ static const struct akcipher_testvec pkcs1pad_rsa_tv_template[] = {
 	"\xfe\xf8\x27\x1b\xd6\x55\x60\x5e\x48\xb7\x6d\x9a\xa8\x37\xf9\x7a"
 	"\xfe\xf8\x27\x1b\xd6\x55\x60\x5e\x48\xb7\x6d\x9a\xa8\x37\xf9\x7a"
 	"\xde\x1b\xcd\x5d\x1a\x30\xd4\xe9\x9e\x5b\x3c\x15\xf8\x9c\x1f\xda"
 	"\xde\x1b\xcd\x5d\x1a\x30\xd4\xe9\x9e\x5b\x3c\x15\xf8\x9c\x1f\xda"
 	"\xd1\x86\x48\x55\xce\x83\xee\x8e\x51\xc7\xde\x32\x12\x47\x7d\x46"
 	"\xd1\x86\x48\x55\xce\x83\xee\x8e\x51\xc7\xde\x32\x12\x47\x7d\x46"
-	"\xb8\x35\xdf\x41\x02\x01\x30\x02\x01\x30\x02\x01\x30\x02\x01\x30"
-	"\x02\x01\x30",
+	"\xb8\x35\xdf\x41\x02\x01\x00\x02\x01\x00\x02\x01\x00\x02\x01\x00"
+	"\x02\x01\x00",
 	.key_len = 804,
 	.key_len = 804,
 	/*
 	/*
 	 * m is SHA256 hash of following message:
 	 * m is SHA256 hash of following message:
@@ -2044,263 +2044,522 @@ static const struct hash_testvec crct10dif_tv_template[] = {
 		.digest		= (u8 *)(u16 []){ 0x44c6 },
 		.digest		= (u8 *)(u16 []){ 0x44c6 },
 		.np		= 4,
 		.np		= 4,
 		.tap		= { 1, 255, 57, 6 },
 		.tap		= { 1, 255, 57, 6 },
-	}
-};
-
-/* Example vectors below taken from
- * http://www.oscca.gov.cn/UpFile/20101222141857786.pdf
- *
- * The rest taken from
- * https://github.com/adamws/oscca-sm3
- */
-static const struct hash_testvec sm3_tv_template[] = {
-	{
-		.plaintext = "",
-		.psize = 0,
-		.digest = (u8 *)(u8 []) {
-			0x1A, 0xB2, 0x1D, 0x83, 0x55, 0xCF, 0xA1, 0x7F,
-			0x8e, 0x61, 0x19, 0x48, 0x31, 0xE8, 0x1A, 0x8F,
-			0x22, 0xBE, 0xC8, 0xC7, 0x28, 0xFE, 0xFB, 0x74,
-			0x7E, 0xD0, 0x35, 0xEB, 0x50, 0x82, 0xAA, 0x2B }
-	}, {
-		.plaintext = "a",
-		.psize = 1,
-		.digest = (u8 *)(u8 []) {
-			0x62, 0x34, 0x76, 0xAC, 0x18, 0xF6, 0x5A, 0x29,
-			0x09, 0xE4, 0x3C, 0x7F, 0xEC, 0x61, 0xB4, 0x9C,
-			0x7E, 0x76, 0x4A, 0x91, 0xA1, 0x8C, 0xCB, 0x82,
-			0xF1, 0x91, 0x7A, 0x29, 0xC8, 0x6C, 0x5E, 0x88 }
-	}, {
-		/* A.1. Example 1 */
-		.plaintext = "abc",
-		.psize = 3,
-		.digest = (u8 *)(u8 []) {
-			0x66, 0xC7, 0xF0, 0xF4, 0x62, 0xEE, 0xED, 0xD9,
-			0xD1, 0xF2, 0xD4, 0x6B, 0xDC, 0x10, 0xE4, 0xE2,
-			0x41, 0x67, 0xC4, 0x87, 0x5C, 0xF2, 0xF7, 0xA2,
-			0x29, 0x7D, 0xA0, 0x2B, 0x8F, 0x4B, 0xA8, 0xE0 }
-	}, {
-		.plaintext = "abcdefghijklmnopqrstuvwxyz",
-		.psize = 26,
-		.digest = (u8 *)(u8 []) {
-			0xB8, 0x0F, 0xE9, 0x7A, 0x4D, 0xA2, 0x4A, 0xFC,
-			0x27, 0x75, 0x64, 0xF6, 0x6A, 0x35, 0x9E, 0xF4,
-			0x40, 0x46, 0x2A, 0xD2, 0x8D, 0xCC, 0x6D, 0x63,
-			0xAD, 0xB2, 0x4D, 0x5C, 0x20, 0xA6, 0x15, 0x95 }
-	}, {
-		/* A.1. Example 2 */
-		.plaintext = "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdab"
-			     "cdabcdabcdabcdabcd",
-		.psize = 64,
-		.digest = (u8 *)(u8 []) {
-			0xDE, 0xBE, 0x9F, 0xF9, 0x22, 0x75, 0xB8, 0xA1,
-			0x38, 0x60, 0x48, 0x89, 0xC1, 0x8E, 0x5A, 0x4D,
-			0x6F, 0xDB, 0x70, 0xE5, 0x38, 0x7E, 0x57, 0x65,
-			0x29, 0x3D, 0xCB, 0xA3, 0x9C, 0x0C, 0x57, 0x32 }
-	}, {
-		.plaintext = "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
-			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
-			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
-			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
-			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
-			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
-			     "abcdabcdabcdabcdabcdabcdabcdabcd",
-		.psize = 256,
-		.digest = (u8 *)(u8 []) {
-			0xB9, 0x65, 0x76, 0x4C, 0x8B, 0xEB, 0xB0, 0x91,
-			0xC7, 0x60, 0x2B, 0x74, 0xAF, 0xD3, 0x4E, 0xEF,
-			0xB5, 0x31, 0xDC, 0xCB, 0x4E, 0x00, 0x76, 0xD9,
-			0xB7, 0xCD, 0x81, 0x31, 0x99, 0xB4, 0x59, 0x71 }
-	}
-};
-
-/*
- * SHA1 test vectors  from from FIPS PUB 180-1
- * Long vector from CAVS 5.0
- */
-static const struct hash_testvec sha1_tv_template[] = {
-	{
-		.plaintext = "",
-		.psize	= 0,
-		.digest	= "\xda\x39\xa3\xee\x5e\x6b\x4b\x0d\x32\x55"
-			  "\xbf\xef\x95\x60\x18\x90\xaf\xd8\x07\x09",
-	}, {
-		.plaintext = "abc",
-		.psize	= 3,
-		.digest	= "\xa9\x99\x3e\x36\x47\x06\x81\x6a\xba\x3e"
-			  "\x25\x71\x78\x50\xc2\x6c\x9c\xd0\xd8\x9d",
-	}, {
-		.plaintext = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
-		.psize	= 56,
-		.digest	= "\x84\x98\x3e\x44\x1c\x3b\xd2\x6e\xba\xae"
-			  "\x4a\xa1\xf9\x51\x29\xe5\xe5\x46\x70\xf1",
-		.np	= 2,
-		.tap	= { 28, 28 }
-	}, {
-		.plaintext = "\xec\x29\x56\x12\x44\xed\xe7\x06"
-			     "\xb6\xeb\x30\xa1\xc3\x71\xd7\x44"
-			     "\x50\xa1\x05\xc3\xf9\x73\x5f\x7f"
-			     "\xa9\xfe\x38\xcf\x67\xf3\x04\xa5"
-			     "\x73\x6a\x10\x6e\x92\xe1\x71\x39"
-			     "\xa6\x81\x3b\x1c\x81\xa4\xf3\xd3"
-			     "\xfb\x95\x46\xab\x42\x96\xfa\x9f"
-			     "\x72\x28\x26\xc0\x66\x86\x9e\xda"
-			     "\xcd\x73\xb2\x54\x80\x35\x18\x58"
-			     "\x13\xe2\x26\x34\xa9\xda\x44\x00"
-			     "\x0d\x95\xa2\x81\xff\x9f\x26\x4e"
-			     "\xcc\xe0\xa9\x31\x22\x21\x62\xd0"
-			     "\x21\xcc\xa2\x8d\xb5\xf3\xc2\xaa"
-			     "\x24\x94\x5a\xb1\xe3\x1c\xb4\x13"
-			     "\xae\x29\x81\x0f\xd7\x94\xca\xd5"
-			     "\xdf\xaf\x29\xec\x43\xcb\x38\xd1"
-			     "\x98\xfe\x4a\xe1\xda\x23\x59\x78"
-			     "\x02\x21\x40\x5b\xd6\x71\x2a\x53"
-			     "\x05\xda\x4b\x1b\x73\x7f\xce\x7c"
-			     "\xd2\x1c\x0e\xb7\x72\x8d\x08\x23"
-			     "\x5a\x90\x11",
-		.psize	= 163,
-		.digest	= "\x97\x01\x11\xc4\xe7\x7b\xcc\x88\xcc\x20"
-			  "\x45\x9c\x02\xb6\x9b\x4a\xa8\xf5\x82\x17",
-		.np	= 4,
-		.tap	= { 63, 64, 31, 5 }
-	}, {
-		.plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-",
-		.psize	= 64,
-		.digest = "\xc8\x71\xf6\x9a\x63\xcc\xa9\x84\x84\x82"
-			  "\x64\xe7\x79\x95\x5d\xd7\x19\x41\x7c\x91",
 	}, {
 	}, {
-		.plaintext = "\x08\x9f\x13\xaa\x41\xd8\x4c\xe3"
-			     "\x7a\x11\x85\x1c\xb3\x27\xbe\x55"
-			     "\xec\x60\xf7\x8e\x02\x99\x30\xc7"
-			     "\x3b\xd2\x69\x00\x74\x0b\xa2\x16"
-			     "\xad\x44\xdb\x4f\xe6\x7d\x14\x88"
-			     "\x1f\xb6\x2a\xc1\x58\xef\x63\xfa"
-			     "\x91\x05\x9c\x33\xca\x3e\xd5\x6c"
-			     "\x03\x77\x0e\xa5\x19\xb0\x47\xde"
-			     "\x52\xe9\x80\x17\x8b\x22\xb9\x2d"
-			     "\xc4\x5b\xf2\x66\xfd\x94\x08\x9f"
-			     "\x36\xcd\x41\xd8\x6f\x06\x7a\x11"
-			     "\xa8\x1c\xb3\x4a\xe1\x55\xec\x83"
-			     "\x1a\x8e\x25\xbc\x30\xc7\x5e\xf5"
-			     "\x69\x00\x97\x0b\xa2\x39\xd0\x44"
-			     "\xdb\x72\x09\x7d\x14\xab\x1f\xb6"
-			     "\x4d\xe4\x58\xef\x86\x1d\x91\x28"
-			     "\xbf\x33\xca\x61\xf8\x6c\x03\x9a"
-			     "\x0e\xa5\x3c\xd3\x47\xde\x75\x0c"
-			     "\x80\x17\xae\x22\xb9\x50\xe7\x5b"
-			     "\xf2\x89\x20\x94\x2b\xc2\x36\xcd"
-			     "\x64\xfb\x6f\x06\x9d\x11\xa8\x3f"
-			     "\xd6\x4a\xe1\x78\x0f\x83\x1a\xb1"
-			     "\x25\xbc\x53\xea\x5e\xf5\x8c\x00"
-			     "\x97\x2e\xc5\x39\xd0\x67\xfe\x72"
-			     "\x09\xa0\x14\xab\x42\xd9\x4d\xe4"
-			     "\x7b\x12\x86\x1d\xb4\x28\xbf\x56"
-			     "\xed\x61\xf8\x8f\x03\x9a\x31\xc8"
-			     "\x3c\xd3\x6a\x01\x75\x0c\xa3\x17"
-			     "\xae\x45\xdc\x50\xe7\x7e\x15\x89"
-			     "\x20\xb7\x2b\xc2\x59\xf0\x64\xfb"
-			     "\x92\x06\x9d\x34\xcb\x3f\xd6\x6d"
-			     "\x04\x78\x0f\xa6\x1a\xb1\x48\xdf"
-			     "\x53\xea\x81\x18\x8c\x23\xba\x2e"
-			     "\xc5\x5c\xf3\x67\xfe\x95\x09\xa0"
-			     "\x37\xce\x42\xd9\x70\x07\x7b\x12"
-			     "\xa9\x1d\xb4\x4b\xe2\x56\xed\x84"
-			     "\x1b\x8f\x26\xbd\x31\xc8\x5f\xf6"
-			     "\x6a\x01\x98\x0c\xa3\x3a\xd1\x45"
-			     "\xdc\x73\x0a\x7e\x15\xac\x20\xb7"
-			     "\x4e\xe5\x59\xf0\x87\x1e\x92\x29"
-			     "\xc0\x34\xcb\x62\xf9\x6d\x04\x9b"
-			     "\x0f\xa6\x3d\xd4\x48\xdf\x76\x0d"
-			     "\x81\x18\xaf\x23\xba\x51\xe8\x5c"
-			     "\xf3\x8a\x21\x95\x2c\xc3\x37\xce"
-			     "\x65\xfc\x70\x07\x9e\x12\xa9\x40"
-			     "\xd7\x4b\xe2\x79\x10\x84\x1b\xb2"
-			     "\x26\xbd\x54\xeb\x5f\xf6\x8d\x01"
-			     "\x98\x2f\xc6\x3a\xd1\x68\xff\x73"
-			     "\x0a\xa1\x15\xac\x43\xda\x4e\xe5"
-			     "\x7c\x13\x87\x1e\xb5\x29\xc0\x57"
-			     "\xee\x62\xf9\x90\x04\x9b\x32\xc9"
-			     "\x3d\xd4\x6b\x02\x76\x0d\xa4\x18"
-			     "\xaf\x46\xdd\x51\xe8\x7f\x16\x8a"
-			     "\x21\xb8\x2c\xc3\x5a\xf1\x65\xfc"
-			     "\x93\x07\x9e\x35\xcc\x40\xd7\x6e"
-			     "\x05\x79\x10\xa7\x1b\xb2\x49\xe0"
-			     "\x54\xeb\x82\x19\x8d\x24\xbb\x2f"
-			     "\xc6\x5d\xf4\x68\xff\x96\x0a\xa1"
-			     "\x38\xcf\x43\xda\x71\x08\x7c\x13"
-			     "\xaa\x1e\xb5\x4c\xe3\x57\xee\x85"
-			     "\x1c\x90\x27\xbe\x32\xc9\x60\xf7"
-			     "\x6b\x02\x99\x0d\xa4\x3b\xd2\x46"
-			     "\xdd\x74\x0b\x7f\x16\xad\x21\xb8"
-			     "\x4f\xe6\x5a\xf1\x88\x1f\x93\x2a"
-			     "\xc1\x35\xcc\x63\xfa\x6e\x05\x9c"
-			     "\x10\xa7\x3e\xd5\x49\xe0\x77\x0e"
-			     "\x82\x19\xb0\x24\xbb\x52\xe9\x5d"
-			     "\xf4\x8b\x22\x96\x2d\xc4\x38\xcf"
-			     "\x66\xfd\x71\x08\x9f\x13\xaa\x41"
-			     "\xd8\x4c\xe3\x7a\x11\x85\x1c\xb3"
-			     "\x27\xbe\x55\xec\x60\xf7\x8e\x02"
-			     "\x99\x30\xc7\x3b\xd2\x69\x00\x74"
-			     "\x0b\xa2\x16\xad\x44\xdb\x4f\xe6"
-			     "\x7d\x14\x88\x1f\xb6\x2a\xc1\x58"
-			     "\xef\x63\xfa\x91\x05\x9c\x33\xca"
-			     "\x3e\xd5\x6c\x03\x77\x0e\xa5\x19"
-			     "\xb0\x47\xde\x52\xe9\x80\x17\x8b"
-			     "\x22\xb9\x2d\xc4\x5b\xf2\x66\xfd"
-			     "\x94\x08\x9f\x36\xcd\x41\xd8\x6f"
-			     "\x06\x7a\x11\xa8\x1c\xb3\x4a\xe1"
-			     "\x55\xec\x83\x1a\x8e\x25\xbc\x30"
-			     "\xc7\x5e\xf5\x69\x00\x97\x0b\xa2"
-			     "\x39\xd0\x44\xdb\x72\x09\x7d\x14"
-			     "\xab\x1f\xb6\x4d\xe4\x58\xef\x86"
-			     "\x1d\x91\x28\xbf\x33\xca\x61\xf8"
-			     "\x6c\x03\x9a\x0e\xa5\x3c\xd3\x47"
-			     "\xde\x75\x0c\x80\x17\xae\x22\xb9"
-			     "\x50\xe7\x5b\xf2\x89\x20\x94\x2b"
-			     "\xc2\x36\xcd\x64\xfb\x6f\x06\x9d"
-			     "\x11\xa8\x3f\xd6\x4a\xe1\x78\x0f"
-			     "\x83\x1a\xb1\x25\xbc\x53\xea\x5e"
-			     "\xf5\x8c\x00\x97\x2e\xc5\x39\xd0"
-			     "\x67\xfe\x72\x09\xa0\x14\xab\x42"
-			     "\xd9\x4d\xe4\x7b\x12\x86\x1d\xb4"
-			     "\x28\xbf\x56\xed\x61\xf8\x8f\x03"
-			     "\x9a\x31\xc8\x3c\xd3\x6a\x01\x75"
-			     "\x0c\xa3\x17\xae\x45\xdc\x50\xe7"
-			     "\x7e\x15\x89\x20\xb7\x2b\xc2\x59"
-			     "\xf0\x64\xfb\x92\x06\x9d\x34\xcb"
-			     "\x3f\xd6\x6d\x04\x78\x0f\xa6\x1a"
-			     "\xb1\x48\xdf\x53\xea\x81\x18\x8c"
-			     "\x23\xba\x2e\xc5\x5c\xf3\x67\xfe"
-			     "\x95\x09\xa0\x37\xce\x42\xd9\x70"
-			     "\x07\x7b\x12\xa9\x1d\xb4\x4b\xe2"
-			     "\x56\xed\x84\x1b\x8f\x26\xbd\x31"
-			     "\xc8\x5f\xf6\x6a\x01\x98\x0c\xa3"
-			     "\x3a\xd1\x45\xdc\x73\x0a\x7e\x15"
-			     "\xac\x20\xb7\x4e\xe5\x59\xf0\x87"
-			     "\x1e\x92\x29\xc0\x34\xcb\x62\xf9"
-			     "\x6d\x04\x9b\x0f\xa6\x3d\xd4\x48"
-			     "\xdf\x76\x0d\x81\x18\xaf\x23\xba"
-			     "\x51\xe8\x5c\xf3\x8a\x21\x95\x2c"
-			     "\xc3\x37\xce\x65\xfc\x70\x07\x9e"
-			     "\x12\xa9\x40\xd7\x4b\xe2\x79\x10"
-			     "\x84\x1b\xb2\x26\xbd\x54\xeb\x5f"
-			     "\xf6\x8d\x01\x98\x2f\xc6\x3a\xd1"
-			     "\x68\xff\x73\x0a\xa1\x15\xac\x43"
-			     "\xda\x4e\xe5\x7c\x13\x87\x1e\xb5"
-			     "\x29\xc0\x57\xee\x62\xf9\x90\x04"
-			     "\x9b\x32\xc9\x3d\xd4\x6b\x02\x76"
-			     "\x0d\xa4\x18\xaf\x46\xdd\x51\xe8"
-			     "\x7f\x16\x8a\x21\xb8\x2c\xc3\x5a"
-			     "\xf1\x65\xfc\x93\x07\x9e\x35\xcc"
-			     "\x40\xd7\x6e\x05\x79\x10\xa7\x1b"
-			     "\xb2\x49\xe0\x54\xeb\x82\x19\x8d"
-			     "\x24\xbb\x2f\xc6\x5d\xf4\x68\xff"
-			     "\x96\x0a\xa1\x38\xcf\x43\xda\x71"
-			     "\x08\x7c\x13\xaa\x1e\xb5\x4c",
-		.psize     = 1023,
-		.digest    = "\xb8\xe3\x54\xed\xc5\xfc\xef\xa4"
-			     "\x55\x73\x4a\x81\x99\xe4\x47\x2a"
-			     "\x30\xd6\xc9\x85",
+		.plaintext =	"\x6e\x05\x79\x10\xa7\x1b\xb2\x49"
+				"\xe0\x54\xeb\x82\x19\x8d\x24\xbb"
+				"\x2f\xc6\x5d\xf4\x68\xff\x96\x0a"
+				"\xa1\x38\xcf\x43\xda\x71\x08\x7c"
+				"\x13\xaa\x1e\xb5\x4c\xe3\x57\xee"
+				"\x85\x1c\x90\x27\xbe\x32\xc9\x60"
+				"\xf7\x6b\x02\x99\x0d\xa4\x3b\xd2"
+				"\x46\xdd\x74\x0b\x7f\x16\xad\x21"
+				"\xb8\x4f\xe6\x5a\xf1\x88\x1f\x93"
+				"\x2a\xc1\x35\xcc\x63\xfa\x6e\x05"
+				"\x9c\x10\xa7\x3e\xd5\x49\xe0\x77"
+				"\x0e\x82\x19\xb0\x24\xbb\x52\xe9"
+				"\x5d\xf4\x8b\x22\x96\x2d\xc4\x38"
+				"\xcf\x66\xfd\x71\x08\x9f\x13\xaa"
+				"\x41\xd8\x4c\xe3\x7a\x11\x85\x1c"
+				"\xb3\x27\xbe\x55\xec\x60\xf7\x8e"
+				"\x02\x99\x30\xc7\x3b\xd2\x69\x00"
+				"\x74\x0b\xa2\x16\xad\x44\xdb\x4f"
+				"\xe6\x7d\x14\x88\x1f\xb6\x2a\xc1"
+				"\x58\xef\x63\xfa\x91\x05\x9c\x33"
+				"\xca\x3e\xd5\x6c\x03\x77\x0e\xa5"
+				"\x19\xb0\x47\xde\x52\xe9\x80\x17"
+				"\x8b\x22\xb9\x2d\xc4\x5b\xf2\x66"
+				"\xfd\x94\x08\x9f\x36\xcd\x41\xd8"
+				"\x6f\x06\x7a\x11\xa8\x1c\xb3\x4a"
+				"\xe1\x55\xec\x83\x1a\x8e\x25\xbc"
+				"\x30\xc7\x5e\xf5\x69\x00\x97\x0b"
+				"\xa2\x39\xd0\x44\xdb\x72\x09\x7d"
+				"\x14\xab\x1f\xb6\x4d\xe4\x58\xef"
+				"\x86\x1d\x91\x28\xbf\x33\xca\x61"
+				"\xf8\x6c\x03\x9a\x0e\xa5\x3c\xd3"
+				"\x47\xde\x75\x0c\x80\x17\xae\x22"
+				"\xb9\x50\xe7\x5b\xf2\x89\x20\x94"
+				"\x2b\xc2\x36\xcd\x64\xfb\x6f\x06"
+				"\x9d\x11\xa8\x3f\xd6\x4a\xe1\x78"
+				"\x0f\x83\x1a\xb1\x25\xbc\x53\xea"
+				"\x5e\xf5\x8c\x00\x97\x2e\xc5\x39"
+				"\xd0\x67\xfe\x72\x09\xa0\x14\xab"
+				"\x42\xd9\x4d\xe4\x7b\x12\x86\x1d"
+				"\xb4\x28\xbf\x56\xed\x61\xf8\x8f"
+				"\x03\x9a\x31\xc8\x3c\xd3\x6a\x01"
+				"\x75\x0c\xa3\x17\xae\x45\xdc\x50"
+				"\xe7\x7e\x15\x89\x20\xb7\x2b\xc2"
+				"\x59\xf0\x64\xfb\x92\x06\x9d\x34"
+				"\xcb\x3f\xd6\x6d\x04\x78\x0f\xa6"
+				"\x1a\xb1\x48\xdf\x53\xea\x81\x18"
+				"\x8c\x23\xba\x2e\xc5\x5c\xf3\x67"
+				"\xfe\x95\x09\xa0\x37\xce\x42\xd9"
+				"\x70\x07\x7b\x12\xa9\x1d\xb4\x4b"
+				"\xe2\x56\xed\x84\x1b\x8f\x26\xbd"
+				"\x31\xc8\x5f\xf6\x6a\x01\x98\x0c"
+				"\xa3\x3a\xd1\x45\xdc\x73\x0a\x7e"
+				"\x15\xac\x20\xb7\x4e\xe5\x59\xf0"
+				"\x87\x1e\x92\x29\xc0\x34\xcb\x62"
+				"\xf9\x6d\x04\x9b\x0f\xa6\x3d\xd4"
+				"\x48\xdf\x76\x0d\x81\x18\xaf\x23"
+				"\xba\x51\xe8\x5c\xf3\x8a\x21\x95"
+				"\x2c\xc3\x37\xce\x65\xfc\x70\x07"
+				"\x9e\x12\xa9\x40\xd7\x4b\xe2\x79"
+				"\x10\x84\x1b\xb2\x26\xbd\x54\xeb"
+				"\x5f\xf6\x8d\x01\x98\x2f\xc6\x3a"
+				"\xd1\x68\xff\x73\x0a\xa1\x15\xac"
+				"\x43\xda\x4e\xe5\x7c\x13\x87\x1e"
+				"\xb5\x29\xc0\x57\xee\x62\xf9\x90"
+				"\x04\x9b\x32\xc9\x3d\xd4\x6b\x02"
+				"\x76\x0d\xa4\x18\xaf\x46\xdd\x51"
+				"\xe8\x7f\x16\x8a\x21\xb8\x2c\xc3"
+				"\x5a\xf1\x65\xfc\x93\x07\x9e\x35"
+				"\xcc\x40\xd7\x6e\x05\x79\x10\xa7"
+				"\x1b\xb2\x49\xe0\x54\xeb\x82\x19"
+				"\x8d\x24\xbb\x2f\xc6\x5d\xf4\x68"
+				"\xff\x96\x0a\xa1\x38\xcf\x43\xda"
+				"\x71\x08\x7c\x13\xaa\x1e\xb5\x4c"
+				"\xe3\x57\xee\x85\x1c\x90\x27\xbe"
+				"\x32\xc9\x60\xf7\x6b\x02\x99\x0d"
+				"\xa4\x3b\xd2\x46\xdd\x74\x0b\x7f"
+				"\x16\xad\x21\xb8\x4f\xe6\x5a\xf1"
+				"\x88\x1f\x93\x2a\xc1\x35\xcc\x63"
+				"\xfa\x6e\x05\x9c\x10\xa7\x3e\xd5"
+				"\x49\xe0\x77\x0e\x82\x19\xb0\x24"
+				"\xbb\x52\xe9\x5d\xf4\x8b\x22\x96"
+				"\x2d\xc4\x38\xcf\x66\xfd\x71\x08"
+				"\x9f\x13\xaa\x41\xd8\x4c\xe3\x7a"
+				"\x11\x85\x1c\xb3\x27\xbe\x55\xec"
+				"\x60\xf7\x8e\x02\x99\x30\xc7\x3b"
+				"\xd2\x69\x00\x74\x0b\xa2\x16\xad"
+				"\x44\xdb\x4f\xe6\x7d\x14\x88\x1f"
+				"\xb6\x2a\xc1\x58\xef\x63\xfa\x91"
+				"\x05\x9c\x33\xca\x3e\xd5\x6c\x03"
+				"\x77\x0e\xa5\x19\xb0\x47\xde\x52"
+				"\xe9\x80\x17\x8b\x22\xb9\x2d\xc4"
+				"\x5b\xf2\x66\xfd\x94\x08\x9f\x36"
+				"\xcd\x41\xd8\x6f\x06\x7a\x11\xa8"
+				"\x1c\xb3\x4a\xe1\x55\xec\x83\x1a"
+				"\x8e\x25\xbc\x30\xc7\x5e\xf5\x69"
+				"\x00\x97\x0b\xa2\x39\xd0\x44\xdb"
+				"\x72\x09\x7d\x14\xab\x1f\xb6\x4d"
+				"\xe4\x58\xef\x86\x1d\x91\x28\xbf"
+				"\x33\xca\x61\xf8\x6c\x03\x9a\x0e"
+				"\xa5\x3c\xd3\x47\xde\x75\x0c\x80"
+				"\x17\xae\x22\xb9\x50\xe7\x5b\xf2"
+				"\x89\x20\x94\x2b\xc2\x36\xcd\x64"
+				"\xfb\x6f\x06\x9d\x11\xa8\x3f\xd6"
+				"\x4a\xe1\x78\x0f\x83\x1a\xb1\x25"
+				"\xbc\x53\xea\x5e\xf5\x8c\x00\x97"
+				"\x2e\xc5\x39\xd0\x67\xfe\x72\x09"
+				"\xa0\x14\xab\x42\xd9\x4d\xe4\x7b"
+				"\x12\x86\x1d\xb4\x28\xbf\x56\xed"
+				"\x61\xf8\x8f\x03\x9a\x31\xc8\x3c"
+				"\xd3\x6a\x01\x75\x0c\xa3\x17\xae"
+				"\x45\xdc\x50\xe7\x7e\x15\x89\x20"
+				"\xb7\x2b\xc2\x59\xf0\x64\xfb\x92"
+				"\x06\x9d\x34\xcb\x3f\xd6\x6d\x04"
+				"\x78\x0f\xa6\x1a\xb1\x48\xdf\x53"
+				"\xea\x81\x18\x8c\x23\xba\x2e\xc5"
+				"\x5c\xf3\x67\xfe\x95\x09\xa0\x37"
+				"\xce\x42\xd9\x70\x07\x7b\x12\xa9"
+				"\x1d\xb4\x4b\xe2\x56\xed\x84\x1b"
+				"\x8f\x26\xbd\x31\xc8\x5f\xf6\x6a"
+				"\x01\x98\x0c\xa3\x3a\xd1\x45\xdc"
+				"\x73\x0a\x7e\x15\xac\x20\xb7\x4e"
+				"\xe5\x59\xf0\x87\x1e\x92\x29\xc0"
+				"\x34\xcb\x62\xf9\x6d\x04\x9b\x0f"
+				"\xa6\x3d\xd4\x48\xdf\x76\x0d\x81"
+				"\x18\xaf\x23\xba\x51\xe8\x5c\xf3"
+				"\x8a\x21\x95\x2c\xc3\x37\xce\x65"
+				"\xfc\x70\x07\x9e\x12\xa9\x40\xd7"
+				"\x4b\xe2\x79\x10\x84\x1b\xb2\x26"
+				"\xbd\x54\xeb\x5f\xf6\x8d\x01\x98"
+				"\x2f\xc6\x3a\xd1\x68\xff\x73\x0a"
+				"\xa1\x15\xac\x43\xda\x4e\xe5\x7c"
+				"\x13\x87\x1e\xb5\x29\xc0\x57\xee"
+				"\x62\xf9\x90\x04\x9b\x32\xc9\x3d"
+				"\xd4\x6b\x02\x76\x0d\xa4\x18\xaf"
+				"\x46\xdd\x51\xe8\x7f\x16\x8a\x21"
+				"\xb8\x2c\xc3\x5a\xf1\x65\xfc\x93"
+				"\x07\x9e\x35\xcc\x40\xd7\x6e\x05"
+				"\x79\x10\xa7\x1b\xb2\x49\xe0\x54"
+				"\xeb\x82\x19\x8d\x24\xbb\x2f\xc6"
+				"\x5d\xf4\x68\xff\x96\x0a\xa1\x38"
+				"\xcf\x43\xda\x71\x08\x7c\x13\xaa"
+				"\x1e\xb5\x4c\xe3\x57\xee\x85\x1c"
+				"\x90\x27\xbe\x32\xc9\x60\xf7\x6b"
+				"\x02\x99\x0d\xa4\x3b\xd2\x46\xdd"
+				"\x74\x0b\x7f\x16\xad\x21\xb8\x4f"
+				"\xe6\x5a\xf1\x88\x1f\x93\x2a\xc1"
+				"\x35\xcc\x63\xfa\x6e\x05\x9c\x10"
+				"\xa7\x3e\xd5\x49\xe0\x77\x0e\x82"
+				"\x19\xb0\x24\xbb\x52\xe9\x5d\xf4"
+				"\x8b\x22\x96\x2d\xc4\x38\xcf\x66"
+				"\xfd\x71\x08\x9f\x13\xaa\x41\xd8"
+				"\x4c\xe3\x7a\x11\x85\x1c\xb3\x27"
+				"\xbe\x55\xec\x60\xf7\x8e\x02\x99"
+				"\x30\xc7\x3b\xd2\x69\x00\x74\x0b"
+				"\xa2\x16\xad\x44\xdb\x4f\xe6\x7d"
+				"\x14\x88\x1f\xb6\x2a\xc1\x58\xef"
+				"\x63\xfa\x91\x05\x9c\x33\xca\x3e"
+				"\xd5\x6c\x03\x77\x0e\xa5\x19\xb0"
+				"\x47\xde\x52\xe9\x80\x17\x8b\x22"
+				"\xb9\x2d\xc4\x5b\xf2\x66\xfd\x94"
+				"\x08\x9f\x36\xcd\x41\xd8\x6f\x06"
+				"\x7a\x11\xa8\x1c\xb3\x4a\xe1\x55"
+				"\xec\x83\x1a\x8e\x25\xbc\x30\xc7"
+				"\x5e\xf5\x69\x00\x97\x0b\xa2\x39"
+				"\xd0\x44\xdb\x72\x09\x7d\x14\xab"
+				"\x1f\xb6\x4d\xe4\x58\xef\x86\x1d"
+				"\x91\x28\xbf\x33\xca\x61\xf8\x6c"
+				"\x03\x9a\x0e\xa5\x3c\xd3\x47\xde"
+				"\x75\x0c\x80\x17\xae\x22\xb9\x50"
+				"\xe7\x5b\xf2\x89\x20\x94\x2b\xc2"
+				"\x36\xcd\x64\xfb\x6f\x06\x9d\x11"
+				"\xa8\x3f\xd6\x4a\xe1\x78\x0f\x83"
+				"\x1a\xb1\x25\xbc\x53\xea\x5e\xf5"
+				"\x8c\x00\x97\x2e\xc5\x39\xd0\x67"
+				"\xfe\x72\x09\xa0\x14\xab\x42\xd9"
+				"\x4d\xe4\x7b\x12\x86\x1d\xb4\x28"
+				"\xbf\x56\xed\x61\xf8\x8f\x03\x9a"
+				"\x31\xc8\x3c\xd3\x6a\x01\x75\x0c"
+				"\xa3\x17\xae\x45\xdc\x50\xe7\x7e"
+				"\x15\x89\x20\xb7\x2b\xc2\x59\xf0"
+				"\x64\xfb\x92\x06\x9d\x34\xcb\x3f"
+				"\xd6\x6d\x04\x78\x0f\xa6\x1a\xb1"
+				"\x48\xdf\x53\xea\x81\x18\x8c\x23"
+				"\xba\x2e\xc5\x5c\xf3\x67\xfe\x95"
+				"\x09\xa0\x37\xce\x42\xd9\x70\x07"
+				"\x7b\x12\xa9\x1d\xb4\x4b\xe2\x56"
+				"\xed\x84\x1b\x8f\x26\xbd\x31\xc8"
+				"\x5f\xf6\x6a\x01\x98\x0c\xa3\x3a"
+				"\xd1\x45\xdc\x73\x0a\x7e\x15\xac"
+				"\x20\xb7\x4e\xe5\x59\xf0\x87\x1e"
+				"\x92\x29\xc0\x34\xcb\x62\xf9\x6d"
+				"\x04\x9b\x0f\xa6\x3d\xd4\x48\xdf"
+				"\x76\x0d\x81\x18\xaf\x23\xba\x51"
+				"\xe8\x5c\xf3\x8a\x21\x95\x2c\xc3"
+				"\x37\xce\x65\xfc\x70\x07\x9e\x12"
+				"\xa9\x40\xd7\x4b\xe2\x79\x10\x84"
+				"\x1b\xb2\x26\xbd\x54\xeb\x5f\xf6"
+				"\x8d\x01\x98\x2f\xc6\x3a\xd1\x68"
+				"\xff\x73\x0a\xa1\x15\xac\x43\xda"
+				"\x4e\xe5\x7c\x13\x87\x1e\xb5\x29"
+				"\xc0\x57\xee\x62\xf9\x90\x04\x9b"
+				"\x32\xc9\x3d\xd4\x6b\x02\x76\x0d"
+				"\xa4\x18\xaf\x46\xdd\x51\xe8\x7f"
+				"\x16\x8a\x21\xb8\x2c\xc3\x5a\xf1"
+				"\x65\xfc\x93\x07\x9e\x35\xcc\x40"
+				"\xd7\x6e\x05\x79\x10\xa7\x1b\xb2"
+				"\x49\xe0\x54\xeb\x82\x19\x8d\x24"
+				"\xbb\x2f\xc6\x5d\xf4\x68\xff\x96"
+				"\x0a\xa1\x38\xcf\x43\xda\x71\x08"
+				"\x7c\x13\xaa\x1e\xb5\x4c\xe3\x57"
+				"\xee\x85\x1c\x90\x27\xbe\x32\xc9"
+				"\x60\xf7\x6b\x02\x99\x0d\xa4\x3b"
+				"\xd2\x46\xdd\x74\x0b\x7f\x16\xad"
+				"\x21\xb8\x4f\xe6\x5a\xf1\x88\x1f"
+				"\x93\x2a\xc1\x35\xcc\x63\xfa\x6e"
+				"\x05\x9c\x10\xa7\x3e\xd5\x49\xe0"
+				"\x77\x0e\x82\x19\xb0\x24\xbb\x52"
+				"\xe9\x5d\xf4\x8b\x22\x96\x2d\xc4"
+				"\x38\xcf\x66\xfd\x71\x08\x9f\x13"
+				"\xaa\x41\xd8\x4c\xe3\x7a\x11\x85"
+				"\x1c\xb3\x27\xbe\x55\xec\x60\xf7"
+				"\x8e\x02\x99\x30\xc7\x3b\xd2\x69"
+				"\x00\x74\x0b\xa2\x16\xad\x44\xdb"
+				"\x4f\xe6\x7d\x14\x88\x1f\xb6\x2a"
+				"\xc1\x58\xef\x63\xfa\x91\x05\x9c"
+				"\x33\xca\x3e\xd5\x6c\x03\x77\x0e"
+				"\xa5\x19\xb0\x47\xde\x52\xe9\x80"
+				"\x17\x8b\x22\xb9\x2d\xc4\x5b\xf2"
+				"\x66\xfd\x94\x08\x9f\x36\xcd\x41"
+				"\xd8\x6f\x06\x7a\x11\xa8\x1c\xb3"
+				"\x4a\xe1\x55\xec\x83\x1a\x8e\x25"
+				"\xbc\x30\xc7\x5e\xf5\x69\x00\x97"
+				"\x0b\xa2\x39\xd0\x44\xdb\x72\x09"
+				"\x7d\x14\xab\x1f\xb6\x4d\xe4\x58"
+				"\xef\x86\x1d\x91\x28\xbf\x33\xca"
+				"\x61\xf8\x6c\x03\x9a\x0e\xa5\x3c"
+				"\xd3\x47\xde\x75\x0c\x80\x17\xae"
+				"\x22\xb9\x50\xe7\x5b\xf2\x89\x20"
+				"\x94\x2b\xc2\x36\xcd\x64\xfb\x6f"
+				"\x06\x9d\x11\xa8\x3f\xd6\x4a\xe1"
+				"\x78\x0f\x83\x1a\xb1\x25\xbc\x53"
+				"\xea\x5e\xf5\x8c\x00\x97\x2e\xc5"
+				"\x39\xd0\x67\xfe\x72\x09\xa0\x14"
+				"\xab\x42\xd9\x4d\xe4\x7b\x12\x86"
+				"\x1d\xb4\x28\xbf\x56\xed\x61\xf8"
+				"\x8f\x03\x9a\x31\xc8\x3c\xd3\x6a"
+				"\x01\x75\x0c\xa3\x17\xae\x45\xdc"
+				"\x50\xe7\x7e\x15\x89\x20\xb7\x2b"
+				"\xc2\x59\xf0\x64\xfb\x92\x06\x9d"
+				"\x34\xcb\x3f\xd6\x6d\x04\x78\x0f"
+				"\xa6\x1a\xb1\x48\xdf\x53\xea\x81"
+				"\x18\x8c\x23\xba\x2e\xc5\x5c\xf3"
+				"\x67\xfe\x95\x09\xa0\x37\xce\x42"
+				"\xd9\x70\x07\x7b\x12\xa9\x1d\xb4"
+				"\x4b\xe2\x56\xed\x84\x1b\x8f\x26"
+				"\xbd\x31\xc8\x5f\xf6\x6a\x01\x98",
+		.psize = 2048,
+		.digest		= (u8 *)(u16 []){ 0x23ca },
+	}
+};
+
+/* Example vectors below taken from
+ * http://www.oscca.gov.cn/UpFile/20101222141857786.pdf
+ *
+ * The rest taken from
+ * https://github.com/adamws/oscca-sm3
+ */
+static const struct hash_testvec sm3_tv_template[] = {
+	{
+		.plaintext = "",
+		.psize = 0,
+		.digest = (u8 *)(u8 []) {
+			0x1A, 0xB2, 0x1D, 0x83, 0x55, 0xCF, 0xA1, 0x7F,
+			0x8e, 0x61, 0x19, 0x48, 0x31, 0xE8, 0x1A, 0x8F,
+			0x22, 0xBE, 0xC8, 0xC7, 0x28, 0xFE, 0xFB, 0x74,
+			0x7E, 0xD0, 0x35, 0xEB, 0x50, 0x82, 0xAA, 0x2B }
+	}, {
+		.plaintext = "a",
+		.psize = 1,
+		.digest = (u8 *)(u8 []) {
+			0x62, 0x34, 0x76, 0xAC, 0x18, 0xF6, 0x5A, 0x29,
+			0x09, 0xE4, 0x3C, 0x7F, 0xEC, 0x61, 0xB4, 0x9C,
+			0x7E, 0x76, 0x4A, 0x91, 0xA1, 0x8C, 0xCB, 0x82,
+			0xF1, 0x91, 0x7A, 0x29, 0xC8, 0x6C, 0x5E, 0x88 }
+	}, {
+		/* A.1. Example 1 */
+		.plaintext = "abc",
+		.psize = 3,
+		.digest = (u8 *)(u8 []) {
+			0x66, 0xC7, 0xF0, 0xF4, 0x62, 0xEE, 0xED, 0xD9,
+			0xD1, 0xF2, 0xD4, 0x6B, 0xDC, 0x10, 0xE4, 0xE2,
+			0x41, 0x67, 0xC4, 0x87, 0x5C, 0xF2, 0xF7, 0xA2,
+			0x29, 0x7D, 0xA0, 0x2B, 0x8F, 0x4B, 0xA8, 0xE0 }
+	}, {
+		.plaintext = "abcdefghijklmnopqrstuvwxyz",
+		.psize = 26,
+		.digest = (u8 *)(u8 []) {
+			0xB8, 0x0F, 0xE9, 0x7A, 0x4D, 0xA2, 0x4A, 0xFC,
+			0x27, 0x75, 0x64, 0xF6, 0x6A, 0x35, 0x9E, 0xF4,
+			0x40, 0x46, 0x2A, 0xD2, 0x8D, 0xCC, 0x6D, 0x63,
+			0xAD, 0xB2, 0x4D, 0x5C, 0x20, 0xA6, 0x15, 0x95 }
+	}, {
+		/* A.1. Example 2 */
+		.plaintext = "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdab"
+			     "cdabcdabcdabcdabcd",
+		.psize = 64,
+		.digest = (u8 *)(u8 []) {
+			0xDE, 0xBE, 0x9F, 0xF9, 0x22, 0x75, 0xB8, 0xA1,
+			0x38, 0x60, 0x48, 0x89, 0xC1, 0x8E, 0x5A, 0x4D,
+			0x6F, 0xDB, 0x70, 0xE5, 0x38, 0x7E, 0x57, 0x65,
+			0x29, 0x3D, 0xCB, 0xA3, 0x9C, 0x0C, 0x57, 0x32 }
+	}, {
+		.plaintext = "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
+			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
+			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
+			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
+			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
+			     "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
+			     "abcdabcdabcdabcdabcdabcdabcdabcd",
+		.psize = 256,
+		.digest = (u8 *)(u8 []) {
+			0xB9, 0x65, 0x76, 0x4C, 0x8B, 0xEB, 0xB0, 0x91,
+			0xC7, 0x60, 0x2B, 0x74, 0xAF, 0xD3, 0x4E, 0xEF,
+			0xB5, 0x31, 0xDC, 0xCB, 0x4E, 0x00, 0x76, 0xD9,
+			0xB7, 0xCD, 0x81, 0x31, 0x99, 0xB4, 0x59, 0x71 }
+	}
+};
+
+/*
+ * SHA1 test vectors  from from FIPS PUB 180-1
+ * Long vector from CAVS 5.0
+ */
+static const struct hash_testvec sha1_tv_template[] = {
+	{
+		.plaintext = "",
+		.psize	= 0,
+		.digest	= "\xda\x39\xa3\xee\x5e\x6b\x4b\x0d\x32\x55"
+			  "\xbf\xef\x95\x60\x18\x90\xaf\xd8\x07\x09",
+	}, {
+		.plaintext = "abc",
+		.psize	= 3,
+		.digest	= "\xa9\x99\x3e\x36\x47\x06\x81\x6a\xba\x3e"
+			  "\x25\x71\x78\x50\xc2\x6c\x9c\xd0\xd8\x9d",
+	}, {
+		.plaintext = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+		.psize	= 56,
+		.digest	= "\x84\x98\x3e\x44\x1c\x3b\xd2\x6e\xba\xae"
+			  "\x4a\xa1\xf9\x51\x29\xe5\xe5\x46\x70\xf1",
+		.np	= 2,
+		.tap	= { 28, 28 }
+	}, {
+		.plaintext = "\xec\x29\x56\x12\x44\xed\xe7\x06"
+			     "\xb6\xeb\x30\xa1\xc3\x71\xd7\x44"
+			     "\x50\xa1\x05\xc3\xf9\x73\x5f\x7f"
+			     "\xa9\xfe\x38\xcf\x67\xf3\x04\xa5"
+			     "\x73\x6a\x10\x6e\x92\xe1\x71\x39"
+			     "\xa6\x81\x3b\x1c\x81\xa4\xf3\xd3"
+			     "\xfb\x95\x46\xab\x42\x96\xfa\x9f"
+			     "\x72\x28\x26\xc0\x66\x86\x9e\xda"
+			     "\xcd\x73\xb2\x54\x80\x35\x18\x58"
+			     "\x13\xe2\x26\x34\xa9\xda\x44\x00"
+			     "\x0d\x95\xa2\x81\xff\x9f\x26\x4e"
+			     "\xcc\xe0\xa9\x31\x22\x21\x62\xd0"
+			     "\x21\xcc\xa2\x8d\xb5\xf3\xc2\xaa"
+			     "\x24\x94\x5a\xb1\xe3\x1c\xb4\x13"
+			     "\xae\x29\x81\x0f\xd7\x94\xca\xd5"
+			     "\xdf\xaf\x29\xec\x43\xcb\x38\xd1"
+			     "\x98\xfe\x4a\xe1\xda\x23\x59\x78"
+			     "\x02\x21\x40\x5b\xd6\x71\x2a\x53"
+			     "\x05\xda\x4b\x1b\x73\x7f\xce\x7c"
+			     "\xd2\x1c\x0e\xb7\x72\x8d\x08\x23"
+			     "\x5a\x90\x11",
+		.psize	= 163,
+		.digest	= "\x97\x01\x11\xc4\xe7\x7b\xcc\x88\xcc\x20"
+			  "\x45\x9c\x02\xb6\x9b\x4a\xa8\xf5\x82\x17",
+		.np	= 4,
+		.tap	= { 63, 64, 31, 5 }
+	}, {
+		.plaintext = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-",
+		.psize	= 64,
+		.digest = "\xc8\x71\xf6\x9a\x63\xcc\xa9\x84\x84\x82"
+			  "\x64\xe7\x79\x95\x5d\xd7\x19\x41\x7c\x91",
+	}, {
+		.plaintext = "\x08\x9f\x13\xaa\x41\xd8\x4c\xe3"
+			     "\x7a\x11\x85\x1c\xb3\x27\xbe\x55"
+			     "\xec\x60\xf7\x8e\x02\x99\x30\xc7"
+			     "\x3b\xd2\x69\x00\x74\x0b\xa2\x16"
+			     "\xad\x44\xdb\x4f\xe6\x7d\x14\x88"
+			     "\x1f\xb6\x2a\xc1\x58\xef\x63\xfa"
+			     "\x91\x05\x9c\x33\xca\x3e\xd5\x6c"
+			     "\x03\x77\x0e\xa5\x19\xb0\x47\xde"
+			     "\x52\xe9\x80\x17\x8b\x22\xb9\x2d"
+			     "\xc4\x5b\xf2\x66\xfd\x94\x08\x9f"
+			     "\x36\xcd\x41\xd8\x6f\x06\x7a\x11"
+			     "\xa8\x1c\xb3\x4a\xe1\x55\xec\x83"
+			     "\x1a\x8e\x25\xbc\x30\xc7\x5e\xf5"
+			     "\x69\x00\x97\x0b\xa2\x39\xd0\x44"
+			     "\xdb\x72\x09\x7d\x14\xab\x1f\xb6"
+			     "\x4d\xe4\x58\xef\x86\x1d\x91\x28"
+			     "\xbf\x33\xca\x61\xf8\x6c\x03\x9a"
+			     "\x0e\xa5\x3c\xd3\x47\xde\x75\x0c"
+			     "\x80\x17\xae\x22\xb9\x50\xe7\x5b"
+			     "\xf2\x89\x20\x94\x2b\xc2\x36\xcd"
+			     "\x64\xfb\x6f\x06\x9d\x11\xa8\x3f"
+			     "\xd6\x4a\xe1\x78\x0f\x83\x1a\xb1"
+			     "\x25\xbc\x53\xea\x5e\xf5\x8c\x00"
+			     "\x97\x2e\xc5\x39\xd0\x67\xfe\x72"
+			     "\x09\xa0\x14\xab\x42\xd9\x4d\xe4"
+			     "\x7b\x12\x86\x1d\xb4\x28\xbf\x56"
+			     "\xed\x61\xf8\x8f\x03\x9a\x31\xc8"
+			     "\x3c\xd3\x6a\x01\x75\x0c\xa3\x17"
+			     "\xae\x45\xdc\x50\xe7\x7e\x15\x89"
+			     "\x20\xb7\x2b\xc2\x59\xf0\x64\xfb"
+			     "\x92\x06\x9d\x34\xcb\x3f\xd6\x6d"
+			     "\x04\x78\x0f\xa6\x1a\xb1\x48\xdf"
+			     "\x53\xea\x81\x18\x8c\x23\xba\x2e"
+			     "\xc5\x5c\xf3\x67\xfe\x95\x09\xa0"
+			     "\x37\xce\x42\xd9\x70\x07\x7b\x12"
+			     "\xa9\x1d\xb4\x4b\xe2\x56\xed\x84"
+			     "\x1b\x8f\x26\xbd\x31\xc8\x5f\xf6"
+			     "\x6a\x01\x98\x0c\xa3\x3a\xd1\x45"
+			     "\xdc\x73\x0a\x7e\x15\xac\x20\xb7"
+			     "\x4e\xe5\x59\xf0\x87\x1e\x92\x29"
+			     "\xc0\x34\xcb\x62\xf9\x6d\x04\x9b"
+			     "\x0f\xa6\x3d\xd4\x48\xdf\x76\x0d"
+			     "\x81\x18\xaf\x23\xba\x51\xe8\x5c"
+			     "\xf3\x8a\x21\x95\x2c\xc3\x37\xce"
+			     "\x65\xfc\x70\x07\x9e\x12\xa9\x40"
+			     "\xd7\x4b\xe2\x79\x10\x84\x1b\xb2"
+			     "\x26\xbd\x54\xeb\x5f\xf6\x8d\x01"
+			     "\x98\x2f\xc6\x3a\xd1\x68\xff\x73"
+			     "\x0a\xa1\x15\xac\x43\xda\x4e\xe5"
+			     "\x7c\x13\x87\x1e\xb5\x29\xc0\x57"
+			     "\xee\x62\xf9\x90\x04\x9b\x32\xc9"
+			     "\x3d\xd4\x6b\x02\x76\x0d\xa4\x18"
+			     "\xaf\x46\xdd\x51\xe8\x7f\x16\x8a"
+			     "\x21\xb8\x2c\xc3\x5a\xf1\x65\xfc"
+			     "\x93\x07\x9e\x35\xcc\x40\xd7\x6e"
+			     "\x05\x79\x10\xa7\x1b\xb2\x49\xe0"
+			     "\x54\xeb\x82\x19\x8d\x24\xbb\x2f"
+			     "\xc6\x5d\xf4\x68\xff\x96\x0a\xa1"
+			     "\x38\xcf\x43\xda\x71\x08\x7c\x13"
+			     "\xaa\x1e\xb5\x4c\xe3\x57\xee\x85"
+			     "\x1c\x90\x27\xbe\x32\xc9\x60\xf7"
+			     "\x6b\x02\x99\x0d\xa4\x3b\xd2\x46"
+			     "\xdd\x74\x0b\x7f\x16\xad\x21\xb8"
+			     "\x4f\xe6\x5a\xf1\x88\x1f\x93\x2a"
+			     "\xc1\x35\xcc\x63\xfa\x6e\x05\x9c"
+			     "\x10\xa7\x3e\xd5\x49\xe0\x77\x0e"
+			     "\x82\x19\xb0\x24\xbb\x52\xe9\x5d"
+			     "\xf4\x8b\x22\x96\x2d\xc4\x38\xcf"
+			     "\x66\xfd\x71\x08\x9f\x13\xaa\x41"
+			     "\xd8\x4c\xe3\x7a\x11\x85\x1c\xb3"
+			     "\x27\xbe\x55\xec\x60\xf7\x8e\x02"
+			     "\x99\x30\xc7\x3b\xd2\x69\x00\x74"
+			     "\x0b\xa2\x16\xad\x44\xdb\x4f\xe6"
+			     "\x7d\x14\x88\x1f\xb6\x2a\xc1\x58"
+			     "\xef\x63\xfa\x91\x05\x9c\x33\xca"
+			     "\x3e\xd5\x6c\x03\x77\x0e\xa5\x19"
+			     "\xb0\x47\xde\x52\xe9\x80\x17\x8b"
+			     "\x22\xb9\x2d\xc4\x5b\xf2\x66\xfd"
+			     "\x94\x08\x9f\x36\xcd\x41\xd8\x6f"
+			     "\x06\x7a\x11\xa8\x1c\xb3\x4a\xe1"
+			     "\x55\xec\x83\x1a\x8e\x25\xbc\x30"
+			     "\xc7\x5e\xf5\x69\x00\x97\x0b\xa2"
+			     "\x39\xd0\x44\xdb\x72\x09\x7d\x14"
+			     "\xab\x1f\xb6\x4d\xe4\x58\xef\x86"
+			     "\x1d\x91\x28\xbf\x33\xca\x61\xf8"
+			     "\x6c\x03\x9a\x0e\xa5\x3c\xd3\x47"
+			     "\xde\x75\x0c\x80\x17\xae\x22\xb9"
+			     "\x50\xe7\x5b\xf2\x89\x20\x94\x2b"
+			     "\xc2\x36\xcd\x64\xfb\x6f\x06\x9d"
+			     "\x11\xa8\x3f\xd6\x4a\xe1\x78\x0f"
+			     "\x83\x1a\xb1\x25\xbc\x53\xea\x5e"
+			     "\xf5\x8c\x00\x97\x2e\xc5\x39\xd0"
+			     "\x67\xfe\x72\x09\xa0\x14\xab\x42"
+			     "\xd9\x4d\xe4\x7b\x12\x86\x1d\xb4"
+			     "\x28\xbf\x56\xed\x61\xf8\x8f\x03"
+			     "\x9a\x31\xc8\x3c\xd3\x6a\x01\x75"
+			     "\x0c\xa3\x17\xae\x45\xdc\x50\xe7"
+			     "\x7e\x15\x89\x20\xb7\x2b\xc2\x59"
+			     "\xf0\x64\xfb\x92\x06\x9d\x34\xcb"
+			     "\x3f\xd6\x6d\x04\x78\x0f\xa6\x1a"
+			     "\xb1\x48\xdf\x53\xea\x81\x18\x8c"
+			     "\x23\xba\x2e\xc5\x5c\xf3\x67\xfe"
+			     "\x95\x09\xa0\x37\xce\x42\xd9\x70"
+			     "\x07\x7b\x12\xa9\x1d\xb4\x4b\xe2"
+			     "\x56\xed\x84\x1b\x8f\x26\xbd\x31"
+			     "\xc8\x5f\xf6\x6a\x01\x98\x0c\xa3"
+			     "\x3a\xd1\x45\xdc\x73\x0a\x7e\x15"
+			     "\xac\x20\xb7\x4e\xe5\x59\xf0\x87"
+			     "\x1e\x92\x29\xc0\x34\xcb\x62\xf9"
+			     "\x6d\x04\x9b\x0f\xa6\x3d\xd4\x48"
+			     "\xdf\x76\x0d\x81\x18\xaf\x23\xba"
+			     "\x51\xe8\x5c\xf3\x8a\x21\x95\x2c"
+			     "\xc3\x37\xce\x65\xfc\x70\x07\x9e"
+			     "\x12\xa9\x40\xd7\x4b\xe2\x79\x10"
+			     "\x84\x1b\xb2\x26\xbd\x54\xeb\x5f"
+			     "\xf6\x8d\x01\x98\x2f\xc6\x3a\xd1"
+			     "\x68\xff\x73\x0a\xa1\x15\xac\x43"
+			     "\xda\x4e\xe5\x7c\x13\x87\x1e\xb5"
+			     "\x29\xc0\x57\xee\x62\xf9\x90\x04"
+			     "\x9b\x32\xc9\x3d\xd4\x6b\x02\x76"
+			     "\x0d\xa4\x18\xaf\x46\xdd\x51\xe8"
+			     "\x7f\x16\x8a\x21\xb8\x2c\xc3\x5a"
+			     "\xf1\x65\xfc\x93\x07\x9e\x35\xcc"
+			     "\x40\xd7\x6e\x05\x79\x10\xa7\x1b"
+			     "\xb2\x49\xe0\x54\xeb\x82\x19\x8d"
+			     "\x24\xbb\x2f\xc6\x5d\xf4\x68\xff"
+			     "\x96\x0a\xa1\x38\xcf\x43\xda\x71"
+			     "\x08\x7c\x13\xaa\x1e\xb5\x4c",
+		.psize     = 1023,
+		.digest    = "\xb8\xe3\x54\xed\xc5\xfc\xef\xa4"
+			     "\x55\x73\x4a\x81\x99\xe4\x47\x2a"
+			     "\x30\xd6\xc9\x85",
 	}
 	}
 };
 };
 
 
@@ -12303,23 +12562,296 @@ static const struct cipher_testvec serpent_cbc_dec_tv_template[] = {
 			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
 			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
+		.rlen	= 496,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 496 - 20, 4, 16 },
+	},
+};
+
+static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
+	{ /* Generated with Crypto++ */
+		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
+			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
+			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
+			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
+		.klen	= 32,
+		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
+			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
+		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
+			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
+			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
+			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
+			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
+			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
+			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
+			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
+			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
+			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
+			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
+			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
+			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
+			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
+			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
+			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
+			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
+			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
+			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
+			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
+			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
+			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
+			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
+			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
+			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
+			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
+			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
+			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
+			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
+			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
+			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
+			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
+			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
+			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
+			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
+			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
+			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
+			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
+			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
+			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
+			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
+			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
+			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
+			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
+			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
+			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
+			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
+			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
+			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
+			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
+			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
+			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
+			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
+			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
+			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
+			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
+			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
+			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
+			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
+			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
+		.ilen	= 496,
+		.result	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
+			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
+			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
+			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
+			  "\xE0\x4A\x73\x00\x65\xB6\x1A\x0D"
+			  "\x5C\x60\xDF\x34\xDC\x60\x4C\xDF"
+			  "\xB5\x1F\x26\x8C\xDA\xC1\x11\xA8"
+			  "\x80\xFA\x37\x7A\x89\xAA\xAE\x7B"
+			  "\x92\x6E\xB9\xDC\xC9\x62\x4F\x88"
+			  "\x0A\x5D\x97\x2F\x6B\xAC\x03\x7C"
+			  "\x22\xF6\x55\x5A\xFA\x35\xA5\x17"
+			  "\xA1\x5C\x5E\x2B\x63\x2D\xB9\x91"
+			  "\x3E\x83\x26\x00\x4E\xD5\xBE\xCE"
+			  "\x79\xC4\x3D\xFC\x70\xA0\xAD\x96"
+			  "\xBA\x58\x2A\x1C\xDF\xC2\x3A\xA5"
+			  "\x7C\xB5\x12\x89\xED\xBF\xB6\x09"
+			  "\x13\x4F\x7D\x61\x3C\x5C\x27\xFC"
+			  "\x5D\xE1\x4F\xA1\xEA\xB3\xCA\xB9"
+			  "\xE6\xD0\x97\x81\xDE\xD1\xFB\x8A"
+			  "\x30\xDB\xA3\x5D\xEC\x25\x0B\x86"
+			  "\x71\xC8\xA7\x67\xE8\xBC\x7D\x4C"
+			  "\xAE\x82\xD3\x73\x31\x09\xCB\xB3"
+			  "\x4D\xD4\xC0\x8A\x2B\xFA\xA6\x55"
+			  "\x39\x0A\xBC\x6E\x75\xAB\xC2\xE2"
+			  "\x8A\xF2\x26\xCD\x63\x38\x35\xF7"
+			  "\xAE\x12\x83\xCD\x8A\x9E\x7E\x4C"
+			  "\xFE\x4D\xD7\xCE\x5C\x6E\x4C\xAF"
+			  "\xE3\xCD\x76\xA7\x87\xA1\x54\x7C"
+			  "\xEC\x32\xC7\x83\x2A\xFF\xF8\xEA"
+			  "\x87\xB2\x47\xA3\x9D\xC2\x9C\xA2"
+			  "\xB7\x2C\x7C\x1A\x24\xCB\x88\x61"
+			  "\xFF\xA7\x1A\x16\x01\xDD\x4B\xFC"
+			  "\x2E\xE0\x48\x67\x09\x42\xCC\x91"
+			  "\xBE\x20\x38\xC0\x5E\x3B\x95\x00"
+			  "\xA1\x96\x66\x0B\x8A\xE9\x9E\xF7"
+			  "\x6B\x34\x0A\x51\xC0\x3B\xEB\x71"
+			  "\x07\x97\x38\x4B\x5C\x56\x98\x67"
+			  "\x78\x9C\xD0\x0E\x2B\xB5\x67\x90"
+			  "\x75\xF8\xFE\x6D\x4E\x85\xCC\x0D"
+			  "\x18\x06\x15\x9D\x5A\x10\x13\x37"
+			  "\xA3\xD6\x68\xA2\xDF\x7E\xC7\x12"
+			  "\xC9\x0D\x4D\x91\xB0\x2A\x55\xFF"
+			  "\x6F\x73\x13\xDF\x28\xB5\x2A\x2C"
+			  "\xE4\xFC\x20\xD9\xF1\x7A\x82\xB1"
+			  "\xCB\x57\xB6\x3D\x8C\xF4\x8E\x27"
+			  "\x37\xDC\x35\xF3\x79\x01\x53\xA4"
+			  "\x7B\x37\xDE\x7C\x04\xAE\x50\xDB"
+			  "\x9B\x1E\x8C\x07\xA7\x52\x49\x50"
+			  "\x34\x25\x65\xDD\xA9\x8F\x7E\xBD"
+			  "\x7A\xC9\x36\xAE\xDE\x21\x48\x64"
+			  "\xC2\x02\xBA\xBE\x11\x1E\x3D\x9C"
+			  "\x98\x52\xCC\x04\xBD\x5E\x61\x26"
+			  "\x10\xD3\x21\xD9\x6E\x25\x98\x77"
+			  "\x8E\x98\x63\xF6\xF6\x52\xFB\x13"
+			  "\xAA\x30\xF2\xB9\xA4\x43\x53\x39"
+			  "\x1C\x97\x07\x7E\x6B\xFF\x3D\x43"
+			  "\xA6\x71\x6B\x66\x8F\x58\x3F\x71"
+			  "\x90\x47\x40\x92\xE6\x69\xD1\x96"
+			  "\x34\xB3\x3B\xE5\x43\xE4\xD5\x56"
+			  "\xB2\xE6\x7E\x86\x7A\x12\x17\x5B"
+			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
+			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13",
+		.rlen	= 496,
+	}, { /* Generated with Crypto++ */
+		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
+			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
+			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
+			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
+		.klen	= 32,
+		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
+			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
+		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
+			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
+			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
+			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
+			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
+			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
+			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
+			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
+			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
+			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
+			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
+			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
+			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
+			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
+			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
+			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
+			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
+			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
+			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
+			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
+			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
+			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
+			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
+			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
+			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
+			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
+			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
+			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
+			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
+			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
+			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
+			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
+			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
+			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
+			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
+			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
+			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
+			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
+			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
+			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
+			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
+			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
+			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
+			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
+			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
+			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
+			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
+			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
+			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
+			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
+			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
+			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
+			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
+			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
+			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
+			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
+			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
+			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
+			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
+			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
+			  "\x2B\xC2\x59",
+		.ilen	= 499,
+		.result	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
+			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
+			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
+			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
+			  "\xE0\x4A\x73\x00\x65\xB6\x1A\x0D"
+			  "\x5C\x60\xDF\x34\xDC\x60\x4C\xDF"
+			  "\xB5\x1F\x26\x8C\xDA\xC1\x11\xA8"
+			  "\x80\xFA\x37\x7A\x89\xAA\xAE\x7B"
+			  "\x92\x6E\xB9\xDC\xC9\x62\x4F\x88"
+			  "\x0A\x5D\x97\x2F\x6B\xAC\x03\x7C"
+			  "\x22\xF6\x55\x5A\xFA\x35\xA5\x17"
+			  "\xA1\x5C\x5E\x2B\x63\x2D\xB9\x91"
+			  "\x3E\x83\x26\x00\x4E\xD5\xBE\xCE"
+			  "\x79\xC4\x3D\xFC\x70\xA0\xAD\x96"
+			  "\xBA\x58\x2A\x1C\xDF\xC2\x3A\xA5"
+			  "\x7C\xB5\x12\x89\xED\xBF\xB6\x09"
+			  "\x13\x4F\x7D\x61\x3C\x5C\x27\xFC"
+			  "\x5D\xE1\x4F\xA1\xEA\xB3\xCA\xB9"
+			  "\xE6\xD0\x97\x81\xDE\xD1\xFB\x8A"
+			  "\x30\xDB\xA3\x5D\xEC\x25\x0B\x86"
+			  "\x71\xC8\xA7\x67\xE8\xBC\x7D\x4C"
+			  "\xAE\x82\xD3\x73\x31\x09\xCB\xB3"
+			  "\x4D\xD4\xC0\x8A\x2B\xFA\xA6\x55"
+			  "\x39\x0A\xBC\x6E\x75\xAB\xC2\xE2"
+			  "\x8A\xF2\x26\xCD\x63\x38\x35\xF7"
+			  "\xAE\x12\x83\xCD\x8A\x9E\x7E\x4C"
+			  "\xFE\x4D\xD7\xCE\x5C\x6E\x4C\xAF"
+			  "\xE3\xCD\x76\xA7\x87\xA1\x54\x7C"
+			  "\xEC\x32\xC7\x83\x2A\xFF\xF8\xEA"
+			  "\x87\xB2\x47\xA3\x9D\xC2\x9C\xA2"
+			  "\xB7\x2C\x7C\x1A\x24\xCB\x88\x61"
+			  "\xFF\xA7\x1A\x16\x01\xDD\x4B\xFC"
+			  "\x2E\xE0\x48\x67\x09\x42\xCC\x91"
+			  "\xBE\x20\x38\xC0\x5E\x3B\x95\x00"
+			  "\xA1\x96\x66\x0B\x8A\xE9\x9E\xF7"
+			  "\x6B\x34\x0A\x51\xC0\x3B\xEB\x71"
+			  "\x07\x97\x38\x4B\x5C\x56\x98\x67"
+			  "\x78\x9C\xD0\x0E\x2B\xB5\x67\x90"
+			  "\x75\xF8\xFE\x6D\x4E\x85\xCC\x0D"
+			  "\x18\x06\x15\x9D\x5A\x10\x13\x37"
+			  "\xA3\xD6\x68\xA2\xDF\x7E\xC7\x12"
+			  "\xC9\x0D\x4D\x91\xB0\x2A\x55\xFF"
+			  "\x6F\x73\x13\xDF\x28\xB5\x2A\x2C"
+			  "\xE4\xFC\x20\xD9\xF1\x7A\x82\xB1"
+			  "\xCB\x57\xB6\x3D\x8C\xF4\x8E\x27"
+			  "\x37\xDC\x35\xF3\x79\x01\x53\xA4"
+			  "\x7B\x37\xDE\x7C\x04\xAE\x50\xDB"
+			  "\x9B\x1E\x8C\x07\xA7\x52\x49\x50"
+			  "\x34\x25\x65\xDD\xA9\x8F\x7E\xBD"
+			  "\x7A\xC9\x36\xAE\xDE\x21\x48\x64"
+			  "\xC2\x02\xBA\xBE\x11\x1E\x3D\x9C"
+			  "\x98\x52\xCC\x04\xBD\x5E\x61\x26"
+			  "\x10\xD3\x21\xD9\x6E\x25\x98\x77"
+			  "\x8E\x98\x63\xF6\xF6\x52\xFB\x13"
+			  "\xAA\x30\xF2\xB9\xA4\x43\x53\x39"
+			  "\x1C\x97\x07\x7E\x6B\xFF\x3D\x43"
+			  "\xA6\x71\x6B\x66\x8F\x58\x3F\x71"
+			  "\x90\x47\x40\x92\xE6\x69\xD1\x96"
+			  "\x34\xB3\x3B\xE5\x43\xE4\xD5\x56"
+			  "\xB2\xE6\x7E\x86\x7A\x12\x17\x5B"
+			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
+			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13"
+			  "\x38\xE2\xE5",
+		.rlen	= 499,
 		.also_non_np = 1,
 		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
-	{ /* Generated with Crypto++ */
+		.np	= 2,
+		.tap	= { 499 - 16, 16 },
+	}, { /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
 			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
 			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
 			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
 			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
 		.klen	= 32,
 		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
+		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
+			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
 		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
@@ -12383,7 +12915,82 @@ static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
 		.ilen	= 496,
 		.ilen	= 496,
-		.result	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
+		.result	= "\x06\x9A\xF8\xB4\x53\x88\x62\xFC"
+			  "\x68\xB8\x2E\xDF\xC1\x05\x0F\x3D"
+			  "\xAF\x4D\x95\xAE\xC4\xE9\x1C\xDC"
+			  "\xF6\x2B\x8F\x90\x89\xF6\x7E\x1A"
+			  "\xA6\xB9\xE4\xF4\xFA\xCA\xE5\x7E"
+			  "\x71\x28\x06\x4F\xE8\x08\x39\xDA"
+			  "\xA5\x0E\xC8\xC0\xB8\x16\xE5\x69"
+			  "\xE5\xCA\xEC\x4F\x63\x2C\xC0\x9B"
+			  "\x9F\x3E\x39\x79\xF0\xCD\x64\x35"
+			  "\x4A\xD3\xC8\xA9\x31\xCD\x48\x5B"
+			  "\x92\x3D\x8F\x3F\x96\xBD\xB3\x18"
+			  "\x74\x2A\x5D\x29\x3F\x57\x8F\xE2"
+			  "\x67\x9A\xE0\xE5\xD4\x4A\xE2\x47"
+			  "\xBC\xF6\xEB\x14\xF3\x8C\x20\xC2"
+			  "\x7D\xE2\x43\x81\x86\x72\x2E\xB1"
+			  "\x39\xF6\x95\xE1\x1F\xCB\x76\x33"
+			  "\x5B\x7D\x23\x0F\x3A\x67\x2A\x2F"
+			  "\xB9\x37\x9D\xDD\x1F\x16\xA1\x3C"
+			  "\x70\xFE\x52\xAA\x93\x3C\xC4\x46"
+			  "\xB1\xE5\xFF\xDA\xAF\xE2\x84\xFE"
+			  "\x25\x92\xB2\x63\xBD\x49\x77\xB4"
+			  "\x22\xA4\x6A\xD5\x04\xE0\x45\x58"
+			  "\x1C\x34\x96\x7C\x03\x0C\x13\xA2"
+			  "\x05\x22\xE2\xCB\x5A\x35\x03\x09"
+			  "\x40\xD2\x82\x05\xCA\x58\x73\xF2"
+			  "\x29\x5E\x01\x47\x13\x32\x78\xBE"
+			  "\x06\xB0\x51\xDB\x6C\x31\xA0\x1C"
+			  "\x74\xBC\x8D\x25\xDF\xF8\x65\xD1"
+			  "\x38\x35\x11\x26\x4A\xB4\x06\x32"
+			  "\xFA\xD2\x07\x77\xB3\x74\x98\x80"
+			  "\x61\x59\xA8\x9F\xF3\x6F\x2A\xBF"
+			  "\xE6\xA5\x9A\xC4\x6B\xA6\x49\x6F"
+			  "\xBC\x47\xD9\xFB\xC6\xEF\x25\x65"
+			  "\x96\xAC\x9F\xE4\x81\x4B\xD8\xBA"
+			  "\xD6\x9B\xC9\x6D\x58\x40\x81\x02"
+			  "\x73\x44\x4E\x43\x6E\x37\xBB\x11"
+			  "\xE3\xF9\xB8\x2F\xEC\x76\x34\xEA"
+			  "\x90\xCD\xB7\x2E\x0E\x32\x71\xE8"
+			  "\xBB\x4E\x0B\x98\xA4\x17\x17\x5B"
+			  "\x07\xB5\x82\x3A\xC4\xE8\x42\x51"
+			  "\x5A\x4C\x4E\x7D\xBF\xC4\xC0\x4F"
+			  "\x68\xB8\xC6\x4A\x32\x6F\x0B\xD7"
+			  "\x85\xED\x6B\xFB\x72\xD2\xA5\x8F"
+			  "\xBF\xF9\xAC\x59\x50\xA8\x08\x70"
+			  "\xEC\xBD\x0A\xBF\xE5\x87\xA1\xC2"
+			  "\x92\x14\x78\xAF\xE8\xEA\x2E\xDD"
+			  "\xC1\x03\x9A\xAA\x89\x8B\x32\x46"
+			  "\x5B\x18\x27\xBA\x46\xAA\x64\xDE"
+			  "\xE3\xD5\xA3\xFC\x7B\x5B\x61\xDB"
+			  "\x7E\xDA\xEC\x30\x17\x19\xF8\x80"
+			  "\xB5\x5E\x27\xB5\x37\x3A\x1F\x28"
+			  "\x07\x73\xC3\x63\xCE\xFF\x8C\xFE"
+			  "\x81\x4E\xF8\x24\xF3\xB8\xC7\xE8"
+			  "\x16\x9A\xCC\x58\x2F\x88\x1C\x4B"
+			  "\xBB\x33\xA2\x73\xF0\x1C\x89\x0E"
+			  "\xDC\x34\x27\x89\x98\xCE\x1C\xA2"
+			  "\xD8\xB8\x90\xBE\xEC\x72\x28\x13"
+			  "\xAC\x7B\xF1\xD0\x7F\x7A\x28\x50"
+			  "\xB7\x99\x65\x8A\xC9\xC6\x21\x34"
+			  "\x7F\x67\x9D\xB7\x2C\xCC\xF5\x17"
+			  "\x2B\x89\xAC\xB0\xD7\x1E\x47\xB0"
+			  "\x61\xAF\xD4\x63\x6D\xB8\x2D\x20",
+		.rlen	= 496,
+	},
+};
+
+static const struct cipher_testvec serpent_ctr_dec_tv_template[] = {
+	{ /* Generated with Crypto++ */
+		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
+			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
+			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
+			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
+		.klen	= 32,
+		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
+			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
+		.input	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
 			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
 			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
 			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
 			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
 			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
 			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
@@ -12445,16 +13052,8 @@ static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
 			  "\xB2\xE6\x7E\x86\x7A\x12\x17\x5B"
 			  "\xB2\xE6\x7E\x86\x7A\x12\x17\x5B"
 			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
 			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
 			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13",
 			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13",
-		.rlen	= 496,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ilen	= 496,
+		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -12515,10 +13114,17 @@ static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
 			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
 			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59",
-		.ilen	= 499,
-		.result	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
+		.rlen	= 496,
+	}, { /* Generated with Crypto++ */
+		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
+			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
+			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
+			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
+		.klen	= 32,
+		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
+			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
+		.input	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
 			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
 			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
 			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
 			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
 			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
 			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
@@ -12581,19 +13187,8 @@ static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
 			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
 			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
 			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13"
 			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13"
 			  "\x38\xE2\xE5",
 			  "\x38\xE2\xE5",
-		.rlen	= 499,
-		.also_non_np = 1,
-		.np	= 2,
-		.tap	= { 499 - 16, 16 },
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
-			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ilen	= 499,
+		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -12654,9 +13249,21 @@ static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
 			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
 			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\x06\x9A\xF8\xB4\x53\x88\x62\xFC"
+			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
+			  "\x2B\xC2\x59",
+		.rlen	= 499,
+		.also_non_np = 1,
+		.np	= 2,
+		.tap	= { 499 - 16, 16 },
+	}, { /* Generated with Crypto++ */
+		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
+			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
+			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
+			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
+		.klen	= 32,
+		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
+			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
+		.input	= "\x06\x9A\xF8\xB4\x53\x88\x62\xFC"
 			  "\x68\xB8\x2E\xDF\xC1\x05\x0F\x3D"
 			  "\x68\xB8\x2E\xDF\xC1\x05\x0F\x3D"
 			  "\xAF\x4D\x95\xAE\xC4\xE9\x1C\xDC"
 			  "\xAF\x4D\x95\xAE\xC4\xE9\x1C\xDC"
 			  "\xF6\x2B\x8F\x90\x89\xF6\x7E\x1A"
 			  "\xF6\x2B\x8F\x90\x89\xF6\x7E\x1A"
@@ -12718,81 +13325,6 @@ static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
 			  "\x7F\x67\x9D\xB7\x2C\xCC\xF5\x17"
 			  "\x7F\x67\x9D\xB7\x2C\xCC\xF5\x17"
 			  "\x2B\x89\xAC\xB0\xD7\x1E\x47\xB0"
 			  "\x2B\x89\xAC\xB0\xD7\x1E\x47\xB0"
 			  "\x61\xAF\xD4\x63\x6D\xB8\x2D\x20",
 			  "\x61\xAF\xD4\x63\x6D\xB8\x2D\x20",
-		.rlen	= 496,
-	},
-};
-
-static const struct cipher_testvec serpent_ctr_dec_tv_template[] = {
-	{ /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
-			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
-			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
-			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
-			  "\xE0\x4A\x73\x00\x65\xB6\x1A\x0D"
-			  "\x5C\x60\xDF\x34\xDC\x60\x4C\xDF"
-			  "\xB5\x1F\x26\x8C\xDA\xC1\x11\xA8"
-			  "\x80\xFA\x37\x7A\x89\xAA\xAE\x7B"
-			  "\x92\x6E\xB9\xDC\xC9\x62\x4F\x88"
-			  "\x0A\x5D\x97\x2F\x6B\xAC\x03\x7C"
-			  "\x22\xF6\x55\x5A\xFA\x35\xA5\x17"
-			  "\xA1\x5C\x5E\x2B\x63\x2D\xB9\x91"
-			  "\x3E\x83\x26\x00\x4E\xD5\xBE\xCE"
-			  "\x79\xC4\x3D\xFC\x70\xA0\xAD\x96"
-			  "\xBA\x58\x2A\x1C\xDF\xC2\x3A\xA5"
-			  "\x7C\xB5\x12\x89\xED\xBF\xB6\x09"
-			  "\x13\x4F\x7D\x61\x3C\x5C\x27\xFC"
-			  "\x5D\xE1\x4F\xA1\xEA\xB3\xCA\xB9"
-			  "\xE6\xD0\x97\x81\xDE\xD1\xFB\x8A"
-			  "\x30\xDB\xA3\x5D\xEC\x25\x0B\x86"
-			  "\x71\xC8\xA7\x67\xE8\xBC\x7D\x4C"
-			  "\xAE\x82\xD3\x73\x31\x09\xCB\xB3"
-			  "\x4D\xD4\xC0\x8A\x2B\xFA\xA6\x55"
-			  "\x39\x0A\xBC\x6E\x75\xAB\xC2\xE2"
-			  "\x8A\xF2\x26\xCD\x63\x38\x35\xF7"
-			  "\xAE\x12\x83\xCD\x8A\x9E\x7E\x4C"
-			  "\xFE\x4D\xD7\xCE\x5C\x6E\x4C\xAF"
-			  "\xE3\xCD\x76\xA7\x87\xA1\x54\x7C"
-			  "\xEC\x32\xC7\x83\x2A\xFF\xF8\xEA"
-			  "\x87\xB2\x47\xA3\x9D\xC2\x9C\xA2"
-			  "\xB7\x2C\x7C\x1A\x24\xCB\x88\x61"
-			  "\xFF\xA7\x1A\x16\x01\xDD\x4B\xFC"
-			  "\x2E\xE0\x48\x67\x09\x42\xCC\x91"
-			  "\xBE\x20\x38\xC0\x5E\x3B\x95\x00"
-			  "\xA1\x96\x66\x0B\x8A\xE9\x9E\xF7"
-			  "\x6B\x34\x0A\x51\xC0\x3B\xEB\x71"
-			  "\x07\x97\x38\x4B\x5C\x56\x98\x67"
-			  "\x78\x9C\xD0\x0E\x2B\xB5\x67\x90"
-			  "\x75\xF8\xFE\x6D\x4E\x85\xCC\x0D"
-			  "\x18\x06\x15\x9D\x5A\x10\x13\x37"
-			  "\xA3\xD6\x68\xA2\xDF\x7E\xC7\x12"
-			  "\xC9\x0D\x4D\x91\xB0\x2A\x55\xFF"
-			  "\x6F\x73\x13\xDF\x28\xB5\x2A\x2C"
-			  "\xE4\xFC\x20\xD9\xF1\x7A\x82\xB1"
-			  "\xCB\x57\xB6\x3D\x8C\xF4\x8E\x27"
-			  "\x37\xDC\x35\xF3\x79\x01\x53\xA4"
-			  "\x7B\x37\xDE\x7C\x04\xAE\x50\xDB"
-			  "\x9B\x1E\x8C\x07\xA7\x52\x49\x50"
-			  "\x34\x25\x65\xDD\xA9\x8F\x7E\xBD"
-			  "\x7A\xC9\x36\xAE\xDE\x21\x48\x64"
-			  "\xC2\x02\xBA\xBE\x11\x1E\x3D\x9C"
-			  "\x98\x52\xCC\x04\xBD\x5E\x61\x26"
-			  "\x10\xD3\x21\xD9\x6E\x25\x98\x77"
-			  "\x8E\x98\x63\xF6\xF6\x52\xFB\x13"
-			  "\xAA\x30\xF2\xB9\xA4\x43\x53\x39"
-			  "\x1C\x97\x07\x7E\x6B\xFF\x3D\x43"
-			  "\xA6\x71\x6B\x66\x8F\x58\x3F\x71"
-			  "\x90\x47\x40\x92\xE6\x69\xD1\x96"
-			  "\x34\xB3\x3B\xE5\x43\xE4\xD5\x56"
-			  "\xB2\xE6\x7E\x86\x7A\x12\x17\x5B"
-			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
-			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13",
 		.ilen	= 496,
 		.ilen	= 496,
 		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
@@ -12857,795 +13389,2156 @@ static const struct cipher_testvec serpent_ctr_dec_tv_template[] = {
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
 		.rlen	= 496,
 		.rlen	= 496,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
+	},
+};
+
+static const struct cipher_testvec serpent_lrw_enc_tv_template[] = {
+	/* Generated from AES-LRW test vectors */
+	{
+		.key	= "\x45\x62\xac\x25\xf8\x28\x17\x6d"
+			  "\x4c\x26\x84\x14\xb5\x68\x01\x85"
+			  "\x25\x8e\x2a\x05\xe7\x3e\x9d\x03"
+			  "\xee\x5a\x83\x0c\xcc\x09\x4c\x87",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ilen	= 16,
+		.result	= "\x6f\xbf\xd4\xa4\x5d\x71\x16\x79"
+			  "\x63\x9c\xa6\x8e\x40\xbe\x0d\x8a",
+		.rlen	= 16,
+	}, {
+		.key	= "\x59\x70\x47\x14\xf5\x57\x47\x8c"
+			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
+			  "\x0d\x48\xf0\xb7\xb1\x5a\x53\xea"
+			  "\x1c\xaa\x6b\x29\xc2\xca\xfb\xaf",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x02",
+		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ilen	= 16,
+		.result	= "\xfd\xb2\x66\x98\x80\x96\x55\xad"
+			  "\x08\x94\x54\x9c\x21\x7c\x69\xe3",
+		.rlen	= 16,
+	}, {
+		.key	= "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
+			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
+			  "\xcd\xf9\x0b\x16\x0c\x64\x8f\xb6"
+			  "\xb0\x0d\x0d\x1b\xae\x85\x87\x1f",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x00\x00\x00\x00",
+		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ilen	= 16,
+		.result	= "\x14\x5e\x3d\x70\xc0\x6e\x9c\x34"
+			  "\x5b\x5e\xcf\x0f\xe4\x8c\x21\x5c",
+		.rlen	= 16,
+	}, {
+		.key	= "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
+			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
+			  "\xca\xc6\xbc\x35\x4d\x4a\x65\x54"
+			  "\x90\xae\x61\xcf\x7b\xae\xbd\xcc"
+			  "\xad\xe4\x94\xc5\x4a\x29\xae\x70",
+		.klen	= 40,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ilen	= 16,
+		.result	= "\x25\x39\xaa\xa5\xf0\x65\xc8\xdc"
+			  "\x5d\x45\x95\x30\x8f\xff\x2f\x1b",
+		.rlen	= 16,
+	}, {
+		.key	= "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
+			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
+			  "\xa0\x19\x07\xc0\x9d\xf7\xbb\xdd"
+			  "\x52\x13\xb2\xb7\xf0\xff\x11\xd8"
+			  "\xd6\x08\xd0\xcd\x2e\xb1\x17\x6f",
+		.klen	= 40,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x00\x00\x00\x00",
+		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ilen	= 16,
+		.result	= "\x0c\x20\x20\x63\xd6\x8b\xfc\x8f"
+			  "\xc0\xe2\x17\xbb\xd2\x59\x6f\x26",
+		.rlen	= 16,
+	}, {
+		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
+			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
+			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
+			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
+			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
+			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
+		.klen	= 48,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ilen	= 16,
+		.result	= "\xc1\x35\x2e\x53\xf0\x96\x4d\x9c"
+			  "\x2e\x18\xe6\x99\xcd\xd3\x15\x68",
+		.rlen	= 16,
+	}, {
+		.key	= "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
+			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
+			  "\xb2\xfb\x64\xce\x60\x97\x87\x8d"
+			  "\x17\xfc\xe4\x5a\x49\xe8\x30\xb7"
+			  "\x6e\x78\x17\xe7\x2d\x5e\x12\xd4"
+			  "\x60\x64\x04\x7a\xf1\x2f\x9e\x0c",
+		.klen	= 48,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x00\x00\x00\x00",
+		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.ilen	= 16,
+		.result	= "\x86\x0a\xc6\xa9\x1a\x9f\xe7\xe6"
+			  "\x64\x3b\x33\xd6\xd5\x84\xd6\xdf",
+		.rlen	= 16,
+	}, {
+		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
+			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
+			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
+			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
+			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
+			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
+		.klen	= 48,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
+			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
+			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
+			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
+			  "\x8f\xcb\x8d\x6b\xe3\xa6\x29\x90"
+			  "\xfe\x2a\x62\x82\xae\x6d\x8b\xf6"
+			  "\xad\x1e\x9e\x20\x5f\x38\xbe\x04"
+			  "\xda\x10\x8e\xed\xa2\xa4\x87\xab"
+			  "\xda\x6b\xb4\x0c\x75\xba\xd3\x7c"
+			  "\xc9\xac\x42\x31\x95\x7c\xc9\x04"
+			  "\xeb\xd5\x6e\x32\x69\x8a\xdb\xa6"
+			  "\x15\xd7\x3f\x4f\x2f\x66\x69\x03"
+			  "\x9c\x1f\x54\x0f\xde\x1f\xf3\x65"
+			  "\x4c\x96\x12\xed\x7c\x92\x03\x01"
+			  "\x6f\xbc\x35\x93\xac\xf1\x27\xf1"
+			  "\xb4\x96\x82\x5a\x5f\xb0\xa0\x50"
+			  "\x89\xa4\x8e\x66\x44\x85\xcc\xfd"
+			  "\x33\x14\x70\xe3\x96\xb2\xc3\xd3"
+			  "\xbb\x54\x5a\x1a\xf9\x74\xa2\xc5"
+			  "\x2d\x64\x75\xdd\xb4\x54\xe6\x74"
+			  "\x8c\xd3\x9d\x9e\x86\xab\x51\x53"
+			  "\xb7\x93\x3e\x6f\xd0\x4e\x2c\x40"
+			  "\xf6\xa8\x2e\x3e\x9d\xf4\x66\xa5"
+			  "\x76\x12\x73\x44\x1a\x56\xd7\x72"
+			  "\x88\xcd\x21\x8c\x4c\x0f\xfe\xda"
+			  "\x95\xe0\x3a\xa6\xa5\x84\x46\xcd"
+			  "\xd5\x3e\x9d\x3a\xe2\x67\xe6\x60"
+			  "\x1a\xe2\x70\x85\x58\xc2\x1b\x09"
+			  "\xe1\xd7\x2c\xca\xad\xa8\x8f\xf9"
+			  "\xac\xb3\x0e\xdb\xca\x2e\xe2\xb8"
+			  "\x51\x71\xd9\x3c\x6c\xf1\x56\xf8"
+			  "\xea\x9c\xf1\xfb\x0c\xe6\xb7\x10"
+			  "\x1c\xf8\xa9\x7c\xe8\x53\x35\xc1"
+			  "\x90\x3e\x76\x4a\x74\xa4\x21\x2c"
+			  "\xf6\x2c\x4e\x0f\x94\x3a\x88\x2e"
+			  "\x41\x09\x6a\x33\x7d\xf6\xdd\x3f"
+			  "\x8d\x23\x31\x74\x84\xeb\x88\x6e"
+			  "\xcc\xb9\xbc\x22\x83\x19\x07\x22"
+			  "\xa5\x2d\xdf\xa5\xf3\x80\x85\x78"
+			  "\x84\x39\x6a\x6d\x6a\x99\x4f\xa5"
+			  "\x15\xfe\x46\xb0\xe4\x6c\xa5\x41"
+			  "\x3c\xce\x8f\x42\x60\x71\xa7\x75"
+			  "\x08\x40\x65\x8a\x82\xbf\xf5\x43"
+			  "\x71\x96\xa9\x4d\x44\x8a\x20\xbe"
+			  "\xfa\x4d\xbb\xc0\x7d\x31\x96\x65"
+			  "\xe7\x75\xe5\x3e\xfd\x92\x3b\xc9"
+			  "\x55\xbb\x16\x7e\xf7\xc2\x8c\xa4"
+			  "\x40\x1d\xe5\xef\x0e\xdf\xe4\x9a"
+			  "\x62\x73\x65\xfd\x46\x63\x25\x3d"
+			  "\x2b\xaf\xe5\x64\xfe\xa5\x5c\xcf"
+			  "\x24\xf3\xb4\xac\x64\xba\xdf\x4b"
+			  "\xc6\x96\x7d\x81\x2d\x8d\x97\xf7"
+			  "\xc5\x68\x77\x84\x32\x2b\xcc\x85"
+			  "\x74\x96\xf0\x12\x77\x61\xb9\xeb"
+			  "\x71\xaa\x82\xcb\x1c\xdb\x89\xc8"
+			  "\xc6\xb5\xe3\x5c\x7d\x39\x07\x24"
+			  "\xda\x39\x87\x45\xc0\x2b\xbb\x01"
+			  "\xac\xbc\x2a\x5c\x7f\xfc\xe8\xce"
+			  "\x6d\x9c\x6f\xed\xd3\xc1\xa1\xd6"
+			  "\xc5\x55\xa9\x66\x2f\xe1\xc8\x32"
+			  "\xa6\x5d\xa4\x3a\x98\x73\xe8\x45"
+			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
+			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
+			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
+		.ilen	= 512,
+		.result	= "\xe3\x5a\x38\x0f\x4d\x92\x3a\x74"
+			  "\x15\xb1\x50\x8c\x9a\xd8\x99\x1d"
+			  "\x82\xec\xf1\x5f\x03\x6d\x02\x58"
+			  "\x90\x67\xfc\xdd\x8d\xe1\x38\x08"
+			  "\x7b\xc9\x9b\x4b\x04\x09\x50\x15"
+			  "\xce\xab\xda\x33\x30\x20\x12\xfa"
+			  "\x83\xc4\xa6\x9a\x2e\x7d\x90\xd9"
+			  "\xa6\xa6\x67\x43\xb4\xa7\xa8\x5c"
+			  "\xbb\x6a\x49\x2b\x8b\xf8\xd0\x22"
+			  "\xe5\x9e\xba\xe8\x8c\x67\xb8\x5b"
+			  "\x60\xbc\xf5\xa4\x95\x4e\x66\xe5"
+			  "\x6d\x8e\xa9\xf6\x65\x2e\x04\xf5"
+			  "\xba\xb5\xdb\x88\xc2\xf6\x7a\x4b"
+			  "\x89\x58\x7c\x9a\xae\x26\xe8\xb7"
+			  "\xb7\x28\xcc\xd6\xcc\xa5\x98\x4d"
+			  "\xb9\x91\xcb\xb4\xe4\x8b\x96\x47"
+			  "\x5f\x03\x8b\xdd\x94\xd1\xee\x12"
+			  "\xa7\x83\x80\xf2\xc1\x15\x74\x4f"
+			  "\x49\xf9\xb0\x7e\x6f\xdc\x73\x2f"
+			  "\xe2\xcf\xe0\x1b\x34\xa5\xa0\x52"
+			  "\xfb\x3c\x5d\x85\x91\xe6\x6d\x98"
+			  "\x04\xd6\xdd\x4c\x00\x64\xd9\x54"
+			  "\x5c\x3c\x08\x1d\x4c\x06\x9f\xb8"
+			  "\x1c\x4d\x8d\xdc\xa4\x3c\xb9\x3b"
+			  "\x9e\x85\xce\xc3\xa8\x4a\x0c\xd9"
+			  "\x04\xc3\x6f\x17\x66\xa9\x1f\x59"
+			  "\xd9\xe2\x19\x36\xa3\x88\xb8\x0b"
+			  "\x0f\x4a\x4d\xf8\xc8\x6f\xd5\x43"
+			  "\xeb\xa0\xab\x1f\x61\xc0\x06\xeb"
+			  "\x93\xb7\xb8\x6f\x0d\xbd\x07\x49"
+			  "\xb3\xac\x5d\xcf\x31\xa0\x27\x26"
+			  "\x21\xbe\x94\x2e\x19\xea\xf4\xee"
+			  "\xb5\x13\x89\xf7\x94\x0b\xef\x59"
+			  "\x44\xc5\x78\x8b\x3c\x3b\x71\x20"
+			  "\xf9\x35\x0c\x70\x74\xdc\x5b\xc2"
+			  "\xb4\x11\x0e\x2c\x61\xa1\x52\x46"
+			  "\x18\x11\x16\xc6\x86\x44\xa7\xaf"
+			  "\xd5\x0c\x7d\xa6\x9e\x25\x2d\x1b"
+			  "\x9a\x8f\x0f\xf8\x6a\x61\xa0\xea"
+			  "\x3f\x0e\x90\xd6\x8f\x83\x30\x64"
+			  "\xb5\x51\x2d\x08\x3c\xcd\x99\x36"
+			  "\x96\xd4\xb1\xb5\x48\x30\xca\x48"
+			  "\xf7\x11\xa8\xf5\x97\x8a\x6a\x6d"
+			  "\x12\x33\x2f\xc0\xe8\xda\xec\x8a"
+			  "\xe1\x88\x72\x63\xde\x20\xa3\xe1"
+			  "\x8e\xac\x84\x37\x35\xf5\xf7\x3f"
+			  "\x00\x02\x0e\xe4\xc1\x53\x68\x3f"
+			  "\xaa\xd5\xac\x52\x3d\x20\x2f\x4d"
+			  "\x7c\x83\xd0\xbd\xaa\x97\x35\x36"
+			  "\x98\x88\x59\x5d\xe7\x24\xe3\x90"
+			  "\x9d\x30\x47\xa7\xc3\x60\x35\xf4"
+			  "\xd5\xdb\x0e\x4d\x44\xc1\x81\x8b"
+			  "\xfd\xbd\xc3\x2b\xba\x68\xfe\x8d"
+			  "\x49\x5a\x3c\x8a\xa3\x01\xae\x25"
+			  "\x42\xab\xd2\x87\x1b\x35\xd6\xd2"
+			  "\xd7\x70\x1c\x1f\x72\xd1\xe1\x39"
+			  "\x1c\x58\xa2\xb4\xd0\x78\x55\x72"
+			  "\x76\x59\xea\xd9\xd7\x6e\x63\x8b"
+			  "\xcc\x9b\xa7\x74\x89\xfc\xa3\x68"
+			  "\x86\x28\xd1\xbb\x54\x8d\x66\xad"
+			  "\x2a\x92\xf9\x4e\x04\x3d\xae\xfd"
+			  "\x1b\x2b\x7f\xc3\x2f\x1a\x78\x0a"
+			  "\x5c\xc6\x84\xfe\x7c\xcb\x26\xfd"
+			  "\xd9\x51\x0f\xd7\x94\x2f\xc5\xa7",
+		.rlen	= 512,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 512 - 20, 4, 16 },
+	},
+};
+
+static const struct cipher_testvec serpent_lrw_dec_tv_template[] = {
+	/* Generated from AES-LRW test vectors */
+	/* same as enc vectors with input and result reversed */
+	{
+		.key	= "\x45\x62\xac\x25\xf8\x28\x17\x6d"
+			  "\x4c\x26\x84\x14\xb5\x68\x01\x85"
+			  "\x25\x8e\x2a\x05\xe7\x3e\x9d\x03"
+			  "\xee\x5a\x83\x0c\xcc\x09\x4c\x87",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\x6f\xbf\xd4\xa4\x5d\x71\x16\x79"
+			  "\x63\x9c\xa6\x8e\x40\xbe\x0d\x8a",
+		.ilen	= 16,
+		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\x59\x70\x47\x14\xf5\x57\x47\x8c"
+			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
+			  "\x0d\x48\xf0\xb7\xb1\x5a\x53\xea"
+			  "\x1c\xaa\x6b\x29\xc2\xca\xfb\xaf",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x02",
+		.input	= "\xfd\xb2\x66\x98\x80\x96\x55\xad"
+			  "\x08\x94\x54\x9c\x21\x7c\x69\xe3",
+		.ilen	= 16,
+		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
+			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
+			  "\xcd\xf9\x0b\x16\x0c\x64\x8f\xb6"
+			  "\xb0\x0d\x0d\x1b\xae\x85\x87\x1f",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x00\x00\x00\x00",
+		.input	= "\x14\x5e\x3d\x70\xc0\x6e\x9c\x34"
+			  "\x5b\x5e\xcf\x0f\xe4\x8c\x21\x5c",
+		.ilen	= 16,
+		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
+			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
+			  "\xca\xc6\xbc\x35\x4d\x4a\x65\x54"
+			  "\x90\xae\x61\xcf\x7b\xae\xbd\xcc"
+			  "\xad\xe4\x94\xc5\x4a\x29\xae\x70",
+		.klen	= 40,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\x25\x39\xaa\xa5\xf0\x65\xc8\xdc"
+			  "\x5d\x45\x95\x30\x8f\xff\x2f\x1b",
+		.ilen	= 16,
+		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
+			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
+			  "\xa0\x19\x07\xc0\x9d\xf7\xbb\xdd"
+			  "\x52\x13\xb2\xb7\xf0\xff\x11\xd8"
+			  "\xd6\x08\xd0\xcd\x2e\xb1\x17\x6f",
+		.klen	= 40,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x00\x00\x00\x00",
+		.input	= "\x0c\x20\x20\x63\xd6\x8b\xfc\x8f"
+			  "\xc0\xe2\x17\xbb\xd2\x59\x6f\x26",
+		.ilen	= 16,
+		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
+			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
+			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
+			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
+			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
+			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
+		.klen	= 48,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\xc1\x35\x2e\x53\xf0\x96\x4d\x9c"
+			  "\x2e\x18\xe6\x99\xcd\xd3\x15\x68",
+		.ilen	= 16,
+		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
+			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
+			  "\xb2\xfb\x64\xce\x60\x97\x87\x8d"
+			  "\x17\xfc\xe4\x5a\x49\xe8\x30\xb7"
+			  "\x6e\x78\x17\xe7\x2d\x5e\x12\xd4"
+			  "\x60\x64\x04\x7a\xf1\x2f\x9e\x0c",
+		.klen	= 48,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x02\x00\x00\x00\x00",
+		.input	= "\x86\x0a\xc6\xa9\x1a\x9f\xe7\xe6"
+			  "\x64\x3b\x33\xd6\xd5\x84\xd6\xdf",
+		.ilen	= 16,
+		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
+			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
+			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
+			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
+			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
+			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
+		.klen	= 48,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.input	= "\xe3\x5a\x38\x0f\x4d\x92\x3a\x74"
+			  "\x15\xb1\x50\x8c\x9a\xd8\x99\x1d"
+			  "\x82\xec\xf1\x5f\x03\x6d\x02\x58"
+			  "\x90\x67\xfc\xdd\x8d\xe1\x38\x08"
+			  "\x7b\xc9\x9b\x4b\x04\x09\x50\x15"
+			  "\xce\xab\xda\x33\x30\x20\x12\xfa"
+			  "\x83\xc4\xa6\x9a\x2e\x7d\x90\xd9"
+			  "\xa6\xa6\x67\x43\xb4\xa7\xa8\x5c"
+			  "\xbb\x6a\x49\x2b\x8b\xf8\xd0\x22"
+			  "\xe5\x9e\xba\xe8\x8c\x67\xb8\x5b"
+			  "\x60\xbc\xf5\xa4\x95\x4e\x66\xe5"
+			  "\x6d\x8e\xa9\xf6\x65\x2e\x04\xf5"
+			  "\xba\xb5\xdb\x88\xc2\xf6\x7a\x4b"
+			  "\x89\x58\x7c\x9a\xae\x26\xe8\xb7"
+			  "\xb7\x28\xcc\xd6\xcc\xa5\x98\x4d"
+			  "\xb9\x91\xcb\xb4\xe4\x8b\x96\x47"
+			  "\x5f\x03\x8b\xdd\x94\xd1\xee\x12"
+			  "\xa7\x83\x80\xf2\xc1\x15\x74\x4f"
+			  "\x49\xf9\xb0\x7e\x6f\xdc\x73\x2f"
+			  "\xe2\xcf\xe0\x1b\x34\xa5\xa0\x52"
+			  "\xfb\x3c\x5d\x85\x91\xe6\x6d\x98"
+			  "\x04\xd6\xdd\x4c\x00\x64\xd9\x54"
+			  "\x5c\x3c\x08\x1d\x4c\x06\x9f\xb8"
+			  "\x1c\x4d\x8d\xdc\xa4\x3c\xb9\x3b"
+			  "\x9e\x85\xce\xc3\xa8\x4a\x0c\xd9"
+			  "\x04\xc3\x6f\x17\x66\xa9\x1f\x59"
+			  "\xd9\xe2\x19\x36\xa3\x88\xb8\x0b"
+			  "\x0f\x4a\x4d\xf8\xc8\x6f\xd5\x43"
+			  "\xeb\xa0\xab\x1f\x61\xc0\x06\xeb"
+			  "\x93\xb7\xb8\x6f\x0d\xbd\x07\x49"
+			  "\xb3\xac\x5d\xcf\x31\xa0\x27\x26"
+			  "\x21\xbe\x94\x2e\x19\xea\xf4\xee"
+			  "\xb5\x13\x89\xf7\x94\x0b\xef\x59"
+			  "\x44\xc5\x78\x8b\x3c\x3b\x71\x20"
+			  "\xf9\x35\x0c\x70\x74\xdc\x5b\xc2"
+			  "\xb4\x11\x0e\x2c\x61\xa1\x52\x46"
+			  "\x18\x11\x16\xc6\x86\x44\xa7\xaf"
+			  "\xd5\x0c\x7d\xa6\x9e\x25\x2d\x1b"
+			  "\x9a\x8f\x0f\xf8\x6a\x61\xa0\xea"
+			  "\x3f\x0e\x90\xd6\x8f\x83\x30\x64"
+			  "\xb5\x51\x2d\x08\x3c\xcd\x99\x36"
+			  "\x96\xd4\xb1\xb5\x48\x30\xca\x48"
+			  "\xf7\x11\xa8\xf5\x97\x8a\x6a\x6d"
+			  "\x12\x33\x2f\xc0\xe8\xda\xec\x8a"
+			  "\xe1\x88\x72\x63\xde\x20\xa3\xe1"
+			  "\x8e\xac\x84\x37\x35\xf5\xf7\x3f"
+			  "\x00\x02\x0e\xe4\xc1\x53\x68\x3f"
+			  "\xaa\xd5\xac\x52\x3d\x20\x2f\x4d"
+			  "\x7c\x83\xd0\xbd\xaa\x97\x35\x36"
+			  "\x98\x88\x59\x5d\xe7\x24\xe3\x90"
+			  "\x9d\x30\x47\xa7\xc3\x60\x35\xf4"
+			  "\xd5\xdb\x0e\x4d\x44\xc1\x81\x8b"
+			  "\xfd\xbd\xc3\x2b\xba\x68\xfe\x8d"
+			  "\x49\x5a\x3c\x8a\xa3\x01\xae\x25"
+			  "\x42\xab\xd2\x87\x1b\x35\xd6\xd2"
+			  "\xd7\x70\x1c\x1f\x72\xd1\xe1\x39"
+			  "\x1c\x58\xa2\xb4\xd0\x78\x55\x72"
+			  "\x76\x59\xea\xd9\xd7\x6e\x63\x8b"
+			  "\xcc\x9b\xa7\x74\x89\xfc\xa3\x68"
+			  "\x86\x28\xd1\xbb\x54\x8d\x66\xad"
+			  "\x2a\x92\xf9\x4e\x04\x3d\xae\xfd"
+			  "\x1b\x2b\x7f\xc3\x2f\x1a\x78\x0a"
+			  "\x5c\xc6\x84\xfe\x7c\xcb\x26\xfd"
+			  "\xd9\x51\x0f\xd7\x94\x2f\xc5\xa7",
+		.ilen	= 512,
+		.result	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
+			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
+			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
+			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
+			  "\x8f\xcb\x8d\x6b\xe3\xa6\x29\x90"
+			  "\xfe\x2a\x62\x82\xae\x6d\x8b\xf6"
+			  "\xad\x1e\x9e\x20\x5f\x38\xbe\x04"
+			  "\xda\x10\x8e\xed\xa2\xa4\x87\xab"
+			  "\xda\x6b\xb4\x0c\x75\xba\xd3\x7c"
+			  "\xc9\xac\x42\x31\x95\x7c\xc9\x04"
+			  "\xeb\xd5\x6e\x32\x69\x8a\xdb\xa6"
+			  "\x15\xd7\x3f\x4f\x2f\x66\x69\x03"
+			  "\x9c\x1f\x54\x0f\xde\x1f\xf3\x65"
+			  "\x4c\x96\x12\xed\x7c\x92\x03\x01"
+			  "\x6f\xbc\x35\x93\xac\xf1\x27\xf1"
+			  "\xb4\x96\x82\x5a\x5f\xb0\xa0\x50"
+			  "\x89\xa4\x8e\x66\x44\x85\xcc\xfd"
+			  "\x33\x14\x70\xe3\x96\xb2\xc3\xd3"
+			  "\xbb\x54\x5a\x1a\xf9\x74\xa2\xc5"
+			  "\x2d\x64\x75\xdd\xb4\x54\xe6\x74"
+			  "\x8c\xd3\x9d\x9e\x86\xab\x51\x53"
+			  "\xb7\x93\x3e\x6f\xd0\x4e\x2c\x40"
+			  "\xf6\xa8\x2e\x3e\x9d\xf4\x66\xa5"
+			  "\x76\x12\x73\x44\x1a\x56\xd7\x72"
+			  "\x88\xcd\x21\x8c\x4c\x0f\xfe\xda"
+			  "\x95\xe0\x3a\xa6\xa5\x84\x46\xcd"
+			  "\xd5\x3e\x9d\x3a\xe2\x67\xe6\x60"
+			  "\x1a\xe2\x70\x85\x58\xc2\x1b\x09"
+			  "\xe1\xd7\x2c\xca\xad\xa8\x8f\xf9"
+			  "\xac\xb3\x0e\xdb\xca\x2e\xe2\xb8"
+			  "\x51\x71\xd9\x3c\x6c\xf1\x56\xf8"
+			  "\xea\x9c\xf1\xfb\x0c\xe6\xb7\x10"
+			  "\x1c\xf8\xa9\x7c\xe8\x53\x35\xc1"
+			  "\x90\x3e\x76\x4a\x74\xa4\x21\x2c"
+			  "\xf6\x2c\x4e\x0f\x94\x3a\x88\x2e"
+			  "\x41\x09\x6a\x33\x7d\xf6\xdd\x3f"
+			  "\x8d\x23\x31\x74\x84\xeb\x88\x6e"
+			  "\xcc\xb9\xbc\x22\x83\x19\x07\x22"
+			  "\xa5\x2d\xdf\xa5\xf3\x80\x85\x78"
+			  "\x84\x39\x6a\x6d\x6a\x99\x4f\xa5"
+			  "\x15\xfe\x46\xb0\xe4\x6c\xa5\x41"
+			  "\x3c\xce\x8f\x42\x60\x71\xa7\x75"
+			  "\x08\x40\x65\x8a\x82\xbf\xf5\x43"
+			  "\x71\x96\xa9\x4d\x44\x8a\x20\xbe"
+			  "\xfa\x4d\xbb\xc0\x7d\x31\x96\x65"
+			  "\xe7\x75\xe5\x3e\xfd\x92\x3b\xc9"
+			  "\x55\xbb\x16\x7e\xf7\xc2\x8c\xa4"
+			  "\x40\x1d\xe5\xef\x0e\xdf\xe4\x9a"
+			  "\x62\x73\x65\xfd\x46\x63\x25\x3d"
+			  "\x2b\xaf\xe5\x64\xfe\xa5\x5c\xcf"
+			  "\x24\xf3\xb4\xac\x64\xba\xdf\x4b"
+			  "\xc6\x96\x7d\x81\x2d\x8d\x97\xf7"
+			  "\xc5\x68\x77\x84\x32\x2b\xcc\x85"
+			  "\x74\x96\xf0\x12\x77\x61\xb9\xeb"
+			  "\x71\xaa\x82\xcb\x1c\xdb\x89\xc8"
+			  "\xc6\xb5\xe3\x5c\x7d\x39\x07\x24"
+			  "\xda\x39\x87\x45\xc0\x2b\xbb\x01"
+			  "\xac\xbc\x2a\x5c\x7f\xfc\xe8\xce"
+			  "\x6d\x9c\x6f\xed\xd3\xc1\xa1\xd6"
+			  "\xc5\x55\xa9\x66\x2f\xe1\xc8\x32"
+			  "\xa6\x5d\xa4\x3a\x98\x73\xe8\x45"
+			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
+			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
+			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
+		.rlen	= 512,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 512 - 20, 4, 16 },
+	},
+};
+
+static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
+	/* Generated from AES-XTS test vectors */
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 32,
 		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
-			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
-			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
-			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
-			  "\xE0\x4A\x73\x00\x65\xB6\x1A\x0D"
-			  "\x5C\x60\xDF\x34\xDC\x60\x4C\xDF"
-			  "\xB5\x1F\x26\x8C\xDA\xC1\x11\xA8"
-			  "\x80\xFA\x37\x7A\x89\xAA\xAE\x7B"
-			  "\x92\x6E\xB9\xDC\xC9\x62\x4F\x88"
-			  "\x0A\x5D\x97\x2F\x6B\xAC\x03\x7C"
-			  "\x22\xF6\x55\x5A\xFA\x35\xA5\x17"
-			  "\xA1\x5C\x5E\x2B\x63\x2D\xB9\x91"
-			  "\x3E\x83\x26\x00\x4E\xD5\xBE\xCE"
-			  "\x79\xC4\x3D\xFC\x70\xA0\xAD\x96"
-			  "\xBA\x58\x2A\x1C\xDF\xC2\x3A\xA5"
-			  "\x7C\xB5\x12\x89\xED\xBF\xB6\x09"
-			  "\x13\x4F\x7D\x61\x3C\x5C\x27\xFC"
-			  "\x5D\xE1\x4F\xA1\xEA\xB3\xCA\xB9"
-			  "\xE6\xD0\x97\x81\xDE\xD1\xFB\x8A"
-			  "\x30\xDB\xA3\x5D\xEC\x25\x0B\x86"
-			  "\x71\xC8\xA7\x67\xE8\xBC\x7D\x4C"
-			  "\xAE\x82\xD3\x73\x31\x09\xCB\xB3"
-			  "\x4D\xD4\xC0\x8A\x2B\xFA\xA6\x55"
-			  "\x39\x0A\xBC\x6E\x75\xAB\xC2\xE2"
-			  "\x8A\xF2\x26\xCD\x63\x38\x35\xF7"
-			  "\xAE\x12\x83\xCD\x8A\x9E\x7E\x4C"
-			  "\xFE\x4D\xD7\xCE\x5C\x6E\x4C\xAF"
-			  "\xE3\xCD\x76\xA7\x87\xA1\x54\x7C"
-			  "\xEC\x32\xC7\x83\x2A\xFF\xF8\xEA"
-			  "\x87\xB2\x47\xA3\x9D\xC2\x9C\xA2"
-			  "\xB7\x2C\x7C\x1A\x24\xCB\x88\x61"
-			  "\xFF\xA7\x1A\x16\x01\xDD\x4B\xFC"
-			  "\x2E\xE0\x48\x67\x09\x42\xCC\x91"
-			  "\xBE\x20\x38\xC0\x5E\x3B\x95\x00"
-			  "\xA1\x96\x66\x0B\x8A\xE9\x9E\xF7"
-			  "\x6B\x34\x0A\x51\xC0\x3B\xEB\x71"
-			  "\x07\x97\x38\x4B\x5C\x56\x98\x67"
-			  "\x78\x9C\xD0\x0E\x2B\xB5\x67\x90"
-			  "\x75\xF8\xFE\x6D\x4E\x85\xCC\x0D"
-			  "\x18\x06\x15\x9D\x5A\x10\x13\x37"
-			  "\xA3\xD6\x68\xA2\xDF\x7E\xC7\x12"
-			  "\xC9\x0D\x4D\x91\xB0\x2A\x55\xFF"
-			  "\x6F\x73\x13\xDF\x28\xB5\x2A\x2C"
-			  "\xE4\xFC\x20\xD9\xF1\x7A\x82\xB1"
-			  "\xCB\x57\xB6\x3D\x8C\xF4\x8E\x27"
-			  "\x37\xDC\x35\xF3\x79\x01\x53\xA4"
-			  "\x7B\x37\xDE\x7C\x04\xAE\x50\xDB"
-			  "\x9B\x1E\x8C\x07\xA7\x52\x49\x50"
-			  "\x34\x25\x65\xDD\xA9\x8F\x7E\xBD"
-			  "\x7A\xC9\x36\xAE\xDE\x21\x48\x64"
-			  "\xC2\x02\xBA\xBE\x11\x1E\x3D\x9C"
-			  "\x98\x52\xCC\x04\xBD\x5E\x61\x26"
-			  "\x10\xD3\x21\xD9\x6E\x25\x98\x77"
-			  "\x8E\x98\x63\xF6\xF6\x52\xFB\x13"
-			  "\xAA\x30\xF2\xB9\xA4\x43\x53\x39"
-			  "\x1C\x97\x07\x7E\x6B\xFF\x3D\x43"
-			  "\xA6\x71\x6B\x66\x8F\x58\x3F\x71"
-			  "\x90\x47\x40\x92\xE6\x69\xD1\x96"
-			  "\x34\xB3\x3B\xE5\x43\xE4\xD5\x56"
-			  "\xB2\xE6\x7E\x86\x7A\x12\x17\x5B"
-			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
-			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13"
-			  "\x38\xE2\xE5",
-		.ilen	= 499,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59",
-		.rlen	= 499,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ilen	= 32,
+		.result	= "\xe1\x08\xb8\x1d\x2c\xf5\x33\x64"
+			  "\xc8\x12\x04\xc7\xb3\x70\xe8\xc4"
+			  "\x6a\x31\xc5\xf3\x00\xca\xb9\x16"
+			  "\xde\xe2\x77\x66\xf7\xfe\x62\x08",
+		.rlen	= 32,
+	}, {
+		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.ilen	= 32,
+		.result	= "\x1a\x0a\x09\x5f\xcd\x07\x07\x98"
+			  "\x41\x86\x12\xaf\xb3\xd7\x68\x13"
+			  "\xed\x81\xcd\x06\x87\x43\x1a\xbb"
+			  "\x13\x3d\xd6\x1e\x2b\xe1\x77\xbe",
+		.rlen	= 32,
+	}, {
+		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.ilen	= 32,
+		.result	= "\xf9\x9b\x28\xb8\x5c\xaf\x8c\x61"
+			  "\xb6\x1c\x81\x8f\x2c\x87\x60\x89"
+			  "\x0d\x8d\x7a\xe8\x60\x48\xcc\x86"
+			  "\xc1\x68\x45\xaa\x00\xe9\x24\xc5",
+		.rlen	= 32,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ilen	= 512,
+		.result	= "\xfe\x47\x4a\xc8\x60\x7e\xb4\x8b"
+			  "\x0d\x10\xf4\xb0\x0d\xba\xf8\x53"
+			  "\x65\x6e\x38\x4b\xdb\xaa\xb1\x9e"
+			  "\x28\xca\xb0\x22\xb3\x85\x75\xf4"
+			  "\x00\x5c\x75\x14\x06\xd6\x25\x82"
+			  "\xe6\xcb\x08\xf7\x29\x90\x23\x8e"
+			  "\xa4\x68\x57\xe4\xf0\xd8\x32\xf3"
+			  "\x80\x51\x67\xb5\x0b\x85\x69\xe8"
+			  "\x19\xfe\xc4\xc7\x3e\xea\x90\xd3"
+			  "\x8f\xa3\xf2\x0a\xac\x17\x4b\xa0"
+			  "\x63\x5a\x16\x0f\xf0\xce\x66\x1f"
+			  "\x2c\x21\x07\xf1\xa4\x03\xa3\x44"
+			  "\x41\x61\x87\x5d\x6b\xb3\xef\xd4"
+			  "\xfc\xaa\x32\x7e\x55\x58\x04\x41"
+			  "\xc9\x07\x33\xc6\xa2\x68\xd6\x5a"
+			  "\x55\x79\x4b\x6f\xcf\x89\xb9\x19"
+			  "\xe5\x54\x13\x15\xb2\x1a\xfa\x15"
+			  "\xc2\xf0\x06\x59\xfa\xa0\x25\x05"
+			  "\x58\xfa\x43\x91\x16\x85\x40\xbb"
+			  "\x0d\x34\x4d\xc5\x1e\x20\xd5\x08"
+			  "\xcd\x22\x22\x41\x11\x9f\x6c\x7c"
+			  "\x8d\x57\xc9\xba\x57\xe8\x2c\xf7"
+			  "\xa0\x42\xa8\xde\xfc\xa3\xca\x98"
+			  "\x4b\x43\xb1\xce\x4b\xbf\x01\x67"
+			  "\x6e\x29\x60\xbd\x10\x14\x84\x82"
+			  "\x83\x82\x0c\x63\x73\x92\x02\x7c"
+			  "\x55\x37\x20\x80\x17\x51\xc8\xbc"
+			  "\x46\x02\xcb\x38\x07\x6d\xe2\x85"
+			  "\xaa\x29\xaf\x24\x58\x0d\xf0\x75"
+			  "\x08\x0a\xa5\x34\x25\x16\xf3\x74"
+			  "\xa7\x0b\x97\xbe\xc1\xa9\xdc\x29"
+			  "\x1a\x0a\x56\xc1\x1a\x91\x97\x8c"
+			  "\x0b\xc7\x16\xed\x5a\x22\xa6\x2e"
+			  "\x8c\x2b\x4f\x54\x76\x47\x53\x8e"
+			  "\xe8\x00\xec\x92\xb9\x55\xe6\xa2"
+			  "\xf3\xe2\x4f\x6a\x66\x60\xd0\x87"
+			  "\xe6\xd1\xcc\xe3\x6a\xc5\x2d\x21"
+			  "\xcc\x9d\x6a\xb6\x75\xaa\xe2\x19"
+			  "\x21\x9f\xa1\x5e\x4c\xfd\x72\xf9"
+			  "\x94\x4e\x63\xc7\xae\xfc\xed\x47"
+			  "\xe2\xfe\x7a\x63\x77\xfe\x97\x82"
+			  "\xb1\x10\x6e\x36\x1d\xe1\xc4\x80"
+			  "\xec\x69\x41\xec\xa7\x8a\xe0\x2f"
+			  "\xe3\x49\x26\xa2\x41\xb2\x08\x0f"
+			  "\x28\xb4\xa7\x39\xa1\x99\x2d\x1e"
+			  "\x43\x42\x35\xd0\xcf\xec\x77\x67"
+			  "\xb2\x3b\x9e\x1c\x35\xde\x4f\x5e"
+			  "\x73\x3f\x5d\x6f\x07\x4b\x2e\x50"
+			  "\xab\x6c\x6b\xff\xea\x00\x67\xaa"
+			  "\x0e\x82\x32\xdd\x3d\xb5\xe5\x76"
+			  "\x2b\x77\x3f\xbe\x12\x75\xfb\x92"
+			  "\xc6\x89\x67\x4d\xca\xf7\xd4\x50"
+			  "\xc0\x74\x47\xcc\xd9\x0a\xd4\xc6"
+			  "\x3b\x17\x2e\xe3\x35\xbb\x53\xb5"
+			  "\x86\xad\x51\xcc\xd5\x96\xb8\xdc"
+			  "\x03\x57\xe6\x98\x52\x2f\x61\x62"
+			  "\xc4\x5c\x9c\x36\x71\x07\xfb\x94"
+			  "\xe3\x02\xc4\x2b\x08\x75\xc7\x35"
+			  "\xfb\x2e\x88\x7b\xbb\x67\x00\xe1"
+			  "\xc9\xdd\x99\xb2\x13\x53\x1a\x4e"
+			  "\x76\x87\x19\x04\x1a\x2f\x38\x3e"
+			  "\xef\x91\x64\x1d\x18\x07\x4e\x31"
+			  "\x88\x21\x7c\xb0\xa5\x12\x4c\x3c"
+			  "\xb0\x20\xbd\xda\xdf\xf9\x7c\xdd",
+		.rlen	= 512,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x62\x49\x77\x57\x24\x70\x93\x69"
+			  "\x99\x59\x57\x49\x66\x96\x76\x27"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95"
+			  "\x02\x88\x41\x97\x16\x93\x99\x37"
+			  "\x51\x05\x82\x09\x74\x94\x45\x92",
+		.klen	= 64,
+		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ilen	= 512,
+		.result	= "\x2b\xc9\xb4\x6b\x10\x94\xa9\x32"
+			  "\xaa\xb0\x20\xc6\x44\x3d\x74\x1f"
+			  "\x75\x01\xa7\xf6\xf5\xf7\x62\x1b"
+			  "\x80\x1b\x82\xcb\x01\x59\x91\x7f"
+			  "\x80\x3a\x98\xf0\xd2\xca\xc4\xc3"
+			  "\x34\xfd\xe6\x11\xf9\x33\x45\x12"
+			  "\x48\xc5\x8c\x25\xf1\xc5\xc5\x23"
+			  "\xd3\x44\xb4\x73\xd5\x04\xc0\xb7"
+			  "\xca\x2f\xf5\xcd\xc5\xb4\xdd\xb0"
+			  "\xf4\x60\xe8\xfb\xc6\x9c\xc5\x78"
+			  "\xcd\xec\x7d\xdc\x19\x9c\x72\x64"
+			  "\x63\x0b\x38\x2e\x76\xdd\x2d\x36"
+			  "\x49\xb0\x1d\xea\x78\x9e\x00\xca"
+			  "\x20\xcc\x1b\x1e\x98\x74\xab\xed"
+			  "\x79\xf7\xd0\x6c\xd8\x93\x80\x29"
+			  "\xac\xa5\x5e\x34\xa9\xab\xa0\x55"
+			  "\x9a\xea\xaa\x95\x4d\x7b\xfe\x46"
+			  "\x26\x8a\xfd\x88\xa2\xa8\xa6\xae"
+			  "\x25\x42\x17\xbf\x76\x8f\x1c\x3d"
+			  "\xec\x9a\xda\x64\x96\xb5\x61\xff"
+			  "\x99\xeb\x12\x96\x85\x82\x9d\xd5"
+			  "\x81\x85\x14\xa8\x59\xac\x8c\x94"
+			  "\xbb\x3b\x85\x2b\xdf\xb3\x0c\xba"
+			  "\x82\xc6\x4d\xca\x86\xea\x53\x28"
+			  "\x4c\xe0\x4e\x31\xe3\x73\x2f\x79"
+			  "\x9d\x42\xe1\x03\xe3\x8b\xc4\xff"
+			  "\x05\xca\x81\x7b\xda\xa2\xde\x63"
+			  "\x3a\x10\xbe\xc2\xac\x32\xc4\x05"
+			  "\x47\x7e\xef\x67\xe2\x5f\x5b\xae"
+			  "\xed\xf1\x70\x34\x16\x9a\x07\x7b"
+			  "\xf2\x25\x2b\xb0\xf8\x3c\x15\x9a"
+			  "\xa6\x59\x55\x5f\xc1\xf4\x1e\xcd"
+			  "\x93\x1f\x06\xba\xd4\x9a\x22\x69"
+			  "\xfa\x8e\x95\x0d\xf3\x23\x59\x2c"
+			  "\xfe\x00\xba\xf0\x0e\xbc\x6d\xd6"
+			  "\x62\xf0\x7a\x0e\x83\x3e\xdb\x32"
+			  "\xfd\x43\x7d\xda\x42\x51\x87\x43"
+			  "\x9d\xf9\xef\xf4\x30\x97\xf8\x09"
+			  "\x88\xfc\x3f\x93\x70\xc1\x4a\xec"
+			  "\x27\x5f\x11\xac\x71\xc7\x48\x46"
+			  "\x2f\xf9\xdf\x8d\x9f\xf7\x2e\x56"
+			  "\x0d\x4e\xb0\x32\x76\xce\x86\x81"
+			  "\xcd\xdf\xe4\x00\xbf\xfd\x5f\x24"
+			  "\xaf\xf7\x9a\xde\xff\x18\xac\x14"
+			  "\x90\xc5\x01\x39\x34\x0f\x24\xf3"
+			  "\x13\x2f\x5e\x4f\x30\x9a\x36\x40"
+			  "\xec\xea\xbc\xcd\x9e\x0e\x5b\x23"
+			  "\x50\x88\x97\x40\x69\xb1\x37\xf5"
+			  "\xc3\x15\xf9\x3f\xb7\x79\x64\xe8"
+			  "\x7b\x10\x20\xb9\x2b\x46\x83\x5b"
+			  "\xd8\x39\xfc\xe4\xfa\x88\x52\xf2"
+			  "\x72\xb0\x97\x4e\x89\xb3\x48\x00"
+			  "\xc1\x16\x73\x50\x77\xba\xa6\x65"
+			  "\x20\x2d\xb0\x02\x27\x89\xda\x99"
+			  "\x45\xfb\xe9\xd3\x1d\x39\x2f\xd6"
+			  "\x2a\xda\x09\x12\x11\xaf\xe6\x57"
+			  "\x01\x04\x8a\xff\x86\x8b\xac\xf8"
+			  "\xee\xe4\x1c\x98\x5b\xcf\x6b\x76"
+			  "\xa3\x0e\x33\x74\x40\x18\x39\x72"
+			  "\x66\x50\x31\xfd\x70\xdf\xe8\x51"
+			  "\x96\x21\x36\xb2\x9b\xfa\x85\xd1"
+			  "\x30\x05\xc8\x92\x98\x80\xff\x7a"
+			  "\xaf\x43\x0b\xc5\x20\x41\x92\x20"
+			  "\xd4\xa0\x91\x98\x11\x5f\x4d\xb1",
+		.rlen	= 512,
 		.also_non_np = 1,
 		.also_non_np = 1,
-		.np	= 2,
-		.tap	= { 499 - 16, 16 },
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
-			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x06\x9A\xF8\xB4\x53\x88\x62\xFC"
-			  "\x68\xB8\x2E\xDF\xC1\x05\x0F\x3D"
-			  "\xAF\x4D\x95\xAE\xC4\xE9\x1C\xDC"
-			  "\xF6\x2B\x8F\x90\x89\xF6\x7E\x1A"
-			  "\xA6\xB9\xE4\xF4\xFA\xCA\xE5\x7E"
-			  "\x71\x28\x06\x4F\xE8\x08\x39\xDA"
-			  "\xA5\x0E\xC8\xC0\xB8\x16\xE5\x69"
-			  "\xE5\xCA\xEC\x4F\x63\x2C\xC0\x9B"
-			  "\x9F\x3E\x39\x79\xF0\xCD\x64\x35"
-			  "\x4A\xD3\xC8\xA9\x31\xCD\x48\x5B"
-			  "\x92\x3D\x8F\x3F\x96\xBD\xB3\x18"
-			  "\x74\x2A\x5D\x29\x3F\x57\x8F\xE2"
-			  "\x67\x9A\xE0\xE5\xD4\x4A\xE2\x47"
-			  "\xBC\xF6\xEB\x14\xF3\x8C\x20\xC2"
-			  "\x7D\xE2\x43\x81\x86\x72\x2E\xB1"
-			  "\x39\xF6\x95\xE1\x1F\xCB\x76\x33"
-			  "\x5B\x7D\x23\x0F\x3A\x67\x2A\x2F"
-			  "\xB9\x37\x9D\xDD\x1F\x16\xA1\x3C"
-			  "\x70\xFE\x52\xAA\x93\x3C\xC4\x46"
-			  "\xB1\xE5\xFF\xDA\xAF\xE2\x84\xFE"
-			  "\x25\x92\xB2\x63\xBD\x49\x77\xB4"
-			  "\x22\xA4\x6A\xD5\x04\xE0\x45\x58"
-			  "\x1C\x34\x96\x7C\x03\x0C\x13\xA2"
-			  "\x05\x22\xE2\xCB\x5A\x35\x03\x09"
-			  "\x40\xD2\x82\x05\xCA\x58\x73\xF2"
-			  "\x29\x5E\x01\x47\x13\x32\x78\xBE"
-			  "\x06\xB0\x51\xDB\x6C\x31\xA0\x1C"
-			  "\x74\xBC\x8D\x25\xDF\xF8\x65\xD1"
-			  "\x38\x35\x11\x26\x4A\xB4\x06\x32"
-			  "\xFA\xD2\x07\x77\xB3\x74\x98\x80"
-			  "\x61\x59\xA8\x9F\xF3\x6F\x2A\xBF"
-			  "\xE6\xA5\x9A\xC4\x6B\xA6\x49\x6F"
-			  "\xBC\x47\xD9\xFB\xC6\xEF\x25\x65"
-			  "\x96\xAC\x9F\xE4\x81\x4B\xD8\xBA"
-			  "\xD6\x9B\xC9\x6D\x58\x40\x81\x02"
-			  "\x73\x44\x4E\x43\x6E\x37\xBB\x11"
-			  "\xE3\xF9\xB8\x2F\xEC\x76\x34\xEA"
-			  "\x90\xCD\xB7\x2E\x0E\x32\x71\xE8"
-			  "\xBB\x4E\x0B\x98\xA4\x17\x17\x5B"
-			  "\x07\xB5\x82\x3A\xC4\xE8\x42\x51"
-			  "\x5A\x4C\x4E\x7D\xBF\xC4\xC0\x4F"
-			  "\x68\xB8\xC6\x4A\x32\x6F\x0B\xD7"
-			  "\x85\xED\x6B\xFB\x72\xD2\xA5\x8F"
-			  "\xBF\xF9\xAC\x59\x50\xA8\x08\x70"
-			  "\xEC\xBD\x0A\xBF\xE5\x87\xA1\xC2"
-			  "\x92\x14\x78\xAF\xE8\xEA\x2E\xDD"
-			  "\xC1\x03\x9A\xAA\x89\x8B\x32\x46"
-			  "\x5B\x18\x27\xBA\x46\xAA\x64\xDE"
-			  "\xE3\xD5\xA3\xFC\x7B\x5B\x61\xDB"
-			  "\x7E\xDA\xEC\x30\x17\x19\xF8\x80"
-			  "\xB5\x5E\x27\xB5\x37\x3A\x1F\x28"
-			  "\x07\x73\xC3\x63\xCE\xFF\x8C\xFE"
-			  "\x81\x4E\xF8\x24\xF3\xB8\xC7\xE8"
-			  "\x16\x9A\xCC\x58\x2F\x88\x1C\x4B"
-			  "\xBB\x33\xA2\x73\xF0\x1C\x89\x0E"
-			  "\xDC\x34\x27\x89\x98\xCE\x1C\xA2"
-			  "\xD8\xB8\x90\xBE\xEC\x72\x28\x13"
-			  "\xAC\x7B\xF1\xD0\x7F\x7A\x28\x50"
-			  "\xB7\x99\x65\x8A\xC9\xC6\x21\x34"
-			  "\x7F\x67\x9D\xB7\x2C\xCC\xF5\x17"
-			  "\x2B\x89\xAC\xB0\xD7\x1E\x47\xB0"
-			  "\x61\xAF\xD4\x63\x6D\xB8\x2D\x20",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
+		.np	= 3,
+		.tap	= { 512 - 20, 4, 16 },
 	},
 	},
 };
 };
 
 
-static const struct cipher_testvec serpent_lrw_enc_tv_template[] = {
-	/* Generated from AES-LRW test vectors */
+static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
+	/* Generated from AES-XTS test vectors */
+	/* same as enc vectors with input and result reversed */
 	{
 	{
-		.key	= "\x45\x62\xac\x25\xf8\x28\x17\x6d"
-			  "\x4c\x26\x84\x14\xb5\x68\x01\x85"
-			  "\x25\x8e\x2a\x05\xe7\x3e\x9d\x03"
-			  "\xee\x5a\x83\x0c\xcc\x09\x4c\x87",
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 32,
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x6f\xbf\xd4\xa4\x5d\x71\x16\x79"
-			  "\x63\x9c\xa6\x8e\x40\xbe\x0d\x8a",
-		.rlen	= 16,
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xe1\x08\xb8\x1d\x2c\xf5\x33\x64"
+			  "\xc8\x12\x04\xc7\xb3\x70\xe8\xc4"
+			  "\x6a\x31\xc5\xf3\x00\xca\xb9\x16"
+			  "\xde\xe2\x77\x66\xf7\xfe\x62\x08",
+		.ilen	= 32,
+		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.rlen	= 32,
 	}, {
 	}, {
-		.key	= "\x59\x70\x47\x14\xf5\x57\x47\x8c"
-			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
-			  "\x0d\x48\xf0\xb7\xb1\x5a\x53\xea"
-			  "\x1c\xaa\x6b\x29\xc2\xca\xfb\xaf",
+		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
 		.klen	= 32,
 		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x02",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\xfd\xb2\x66\x98\x80\x96\x55\xad"
-			  "\x08\x94\x54\x9c\x21\x7c\x69\xe3",
-		.rlen	= 16,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x1a\x0a\x09\x5f\xcd\x07\x07\x98"
+			  "\x41\x86\x12\xaf\xb3\xd7\x68\x13"
+			  "\xed\x81\xcd\x06\x87\x43\x1a\xbb"
+			  "\x13\x3d\xd6\x1e\x2b\xe1\x77\xbe",
+		.ilen	= 32,
+		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.rlen	= 32,
 	}, {
 	}, {
-		.key	= "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
-			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
-			  "\xcd\xf9\x0b\x16\x0c\x64\x8f\xb6"
-			  "\xb0\x0d\x0d\x1b\xae\x85\x87\x1f",
+		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xf9\x9b\x28\xb8\x5c\xaf\x8c\x61"
+			  "\xb6\x1c\x81\x8f\x2c\x87\x60\x89"
+			  "\x0d\x8d\x7a\xe8\x60\x48\xcc\x86"
+			  "\xc1\x68\x45\xaa\x00\xe9\x24\xc5",
+		.ilen	= 32,
+		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.rlen	= 32,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95",
 		.klen	= 32,
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x14\x5e\x3d\x70\xc0\x6e\x9c\x34"
-			  "\x5b\x5e\xcf\x0f\xe4\x8c\x21\x5c",
-		.rlen	= 16,
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xfe\x47\x4a\xc8\x60\x7e\xb4\x8b"
+			  "\x0d\x10\xf4\xb0\x0d\xba\xf8\x53"
+			  "\x65\x6e\x38\x4b\xdb\xaa\xb1\x9e"
+			  "\x28\xca\xb0\x22\xb3\x85\x75\xf4"
+			  "\x00\x5c\x75\x14\x06\xd6\x25\x82"
+			  "\xe6\xcb\x08\xf7\x29\x90\x23\x8e"
+			  "\xa4\x68\x57\xe4\xf0\xd8\x32\xf3"
+			  "\x80\x51\x67\xb5\x0b\x85\x69\xe8"
+			  "\x19\xfe\xc4\xc7\x3e\xea\x90\xd3"
+			  "\x8f\xa3\xf2\x0a\xac\x17\x4b\xa0"
+			  "\x63\x5a\x16\x0f\xf0\xce\x66\x1f"
+			  "\x2c\x21\x07\xf1\xa4\x03\xa3\x44"
+			  "\x41\x61\x87\x5d\x6b\xb3\xef\xd4"
+			  "\xfc\xaa\x32\x7e\x55\x58\x04\x41"
+			  "\xc9\x07\x33\xc6\xa2\x68\xd6\x5a"
+			  "\x55\x79\x4b\x6f\xcf\x89\xb9\x19"
+			  "\xe5\x54\x13\x15\xb2\x1a\xfa\x15"
+			  "\xc2\xf0\x06\x59\xfa\xa0\x25\x05"
+			  "\x58\xfa\x43\x91\x16\x85\x40\xbb"
+			  "\x0d\x34\x4d\xc5\x1e\x20\xd5\x08"
+			  "\xcd\x22\x22\x41\x11\x9f\x6c\x7c"
+			  "\x8d\x57\xc9\xba\x57\xe8\x2c\xf7"
+			  "\xa0\x42\xa8\xde\xfc\xa3\xca\x98"
+			  "\x4b\x43\xb1\xce\x4b\xbf\x01\x67"
+			  "\x6e\x29\x60\xbd\x10\x14\x84\x82"
+			  "\x83\x82\x0c\x63\x73\x92\x02\x7c"
+			  "\x55\x37\x20\x80\x17\x51\xc8\xbc"
+			  "\x46\x02\xcb\x38\x07\x6d\xe2\x85"
+			  "\xaa\x29\xaf\x24\x58\x0d\xf0\x75"
+			  "\x08\x0a\xa5\x34\x25\x16\xf3\x74"
+			  "\xa7\x0b\x97\xbe\xc1\xa9\xdc\x29"
+			  "\x1a\x0a\x56\xc1\x1a\x91\x97\x8c"
+			  "\x0b\xc7\x16\xed\x5a\x22\xa6\x2e"
+			  "\x8c\x2b\x4f\x54\x76\x47\x53\x8e"
+			  "\xe8\x00\xec\x92\xb9\x55\xe6\xa2"
+			  "\xf3\xe2\x4f\x6a\x66\x60\xd0\x87"
+			  "\xe6\xd1\xcc\xe3\x6a\xc5\x2d\x21"
+			  "\xcc\x9d\x6a\xb6\x75\xaa\xe2\x19"
+			  "\x21\x9f\xa1\x5e\x4c\xfd\x72\xf9"
+			  "\x94\x4e\x63\xc7\xae\xfc\xed\x47"
+			  "\xe2\xfe\x7a\x63\x77\xfe\x97\x82"
+			  "\xb1\x10\x6e\x36\x1d\xe1\xc4\x80"
+			  "\xec\x69\x41\xec\xa7\x8a\xe0\x2f"
+			  "\xe3\x49\x26\xa2\x41\xb2\x08\x0f"
+			  "\x28\xb4\xa7\x39\xa1\x99\x2d\x1e"
+			  "\x43\x42\x35\xd0\xcf\xec\x77\x67"
+			  "\xb2\x3b\x9e\x1c\x35\xde\x4f\x5e"
+			  "\x73\x3f\x5d\x6f\x07\x4b\x2e\x50"
+			  "\xab\x6c\x6b\xff\xea\x00\x67\xaa"
+			  "\x0e\x82\x32\xdd\x3d\xb5\xe5\x76"
+			  "\x2b\x77\x3f\xbe\x12\x75\xfb\x92"
+			  "\xc6\x89\x67\x4d\xca\xf7\xd4\x50"
+			  "\xc0\x74\x47\xcc\xd9\x0a\xd4\xc6"
+			  "\x3b\x17\x2e\xe3\x35\xbb\x53\xb5"
+			  "\x86\xad\x51\xcc\xd5\x96\xb8\xdc"
+			  "\x03\x57\xe6\x98\x52\x2f\x61\x62"
+			  "\xc4\x5c\x9c\x36\x71\x07\xfb\x94"
+			  "\xe3\x02\xc4\x2b\x08\x75\xc7\x35"
+			  "\xfb\x2e\x88\x7b\xbb\x67\x00\xe1"
+			  "\xc9\xdd\x99\xb2\x13\x53\x1a\x4e"
+			  "\x76\x87\x19\x04\x1a\x2f\x38\x3e"
+			  "\xef\x91\x64\x1d\x18\x07\x4e\x31"
+			  "\x88\x21\x7c\xb0\xa5\x12\x4c\x3c"
+			  "\xb0\x20\xbd\xda\xdf\xf9\x7c\xdd",
+		.ilen	= 512,
+		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.rlen	= 512,
 	}, {
 	}, {
-		.key	= "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
-			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
-			  "\xca\xc6\xbc\x35\x4d\x4a\x65\x54"
-			  "\x90\xae\x61\xcf\x7b\xae\xbd\xcc"
-			  "\xad\xe4\x94\xc5\x4a\x29\xae\x70",
-		.klen	= 40,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x62\x49\x77\x57\x24\x70\x93\x69"
+			  "\x99\x59\x57\x49\x66\x96\x76\x27"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95"
+			  "\x02\x88\x41\x97\x16\x93\x99\x37"
+			  "\x51\x05\x82\x09\x74\x94\x45\x92",
+		.klen	= 64,
+		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x2b\xc9\xb4\x6b\x10\x94\xa9\x32"
+			  "\xaa\xb0\x20\xc6\x44\x3d\x74\x1f"
+			  "\x75\x01\xa7\xf6\xf5\xf7\x62\x1b"
+			  "\x80\x1b\x82\xcb\x01\x59\x91\x7f"
+			  "\x80\x3a\x98\xf0\xd2\xca\xc4\xc3"
+			  "\x34\xfd\xe6\x11\xf9\x33\x45\x12"
+			  "\x48\xc5\x8c\x25\xf1\xc5\xc5\x23"
+			  "\xd3\x44\xb4\x73\xd5\x04\xc0\xb7"
+			  "\xca\x2f\xf5\xcd\xc5\xb4\xdd\xb0"
+			  "\xf4\x60\xe8\xfb\xc6\x9c\xc5\x78"
+			  "\xcd\xec\x7d\xdc\x19\x9c\x72\x64"
+			  "\x63\x0b\x38\x2e\x76\xdd\x2d\x36"
+			  "\x49\xb0\x1d\xea\x78\x9e\x00\xca"
+			  "\x20\xcc\x1b\x1e\x98\x74\xab\xed"
+			  "\x79\xf7\xd0\x6c\xd8\x93\x80\x29"
+			  "\xac\xa5\x5e\x34\xa9\xab\xa0\x55"
+			  "\x9a\xea\xaa\x95\x4d\x7b\xfe\x46"
+			  "\x26\x8a\xfd\x88\xa2\xa8\xa6\xae"
+			  "\x25\x42\x17\xbf\x76\x8f\x1c\x3d"
+			  "\xec\x9a\xda\x64\x96\xb5\x61\xff"
+			  "\x99\xeb\x12\x96\x85\x82\x9d\xd5"
+			  "\x81\x85\x14\xa8\x59\xac\x8c\x94"
+			  "\xbb\x3b\x85\x2b\xdf\xb3\x0c\xba"
+			  "\x82\xc6\x4d\xca\x86\xea\x53\x28"
+			  "\x4c\xe0\x4e\x31\xe3\x73\x2f\x79"
+			  "\x9d\x42\xe1\x03\xe3\x8b\xc4\xff"
+			  "\x05\xca\x81\x7b\xda\xa2\xde\x63"
+			  "\x3a\x10\xbe\xc2\xac\x32\xc4\x05"
+			  "\x47\x7e\xef\x67\xe2\x5f\x5b\xae"
+			  "\xed\xf1\x70\x34\x16\x9a\x07\x7b"
+			  "\xf2\x25\x2b\xb0\xf8\x3c\x15\x9a"
+			  "\xa6\x59\x55\x5f\xc1\xf4\x1e\xcd"
+			  "\x93\x1f\x06\xba\xd4\x9a\x22\x69"
+			  "\xfa\x8e\x95\x0d\xf3\x23\x59\x2c"
+			  "\xfe\x00\xba\xf0\x0e\xbc\x6d\xd6"
+			  "\x62\xf0\x7a\x0e\x83\x3e\xdb\x32"
+			  "\xfd\x43\x7d\xda\x42\x51\x87\x43"
+			  "\x9d\xf9\xef\xf4\x30\x97\xf8\x09"
+			  "\x88\xfc\x3f\x93\x70\xc1\x4a\xec"
+			  "\x27\x5f\x11\xac\x71\xc7\x48\x46"
+			  "\x2f\xf9\xdf\x8d\x9f\xf7\x2e\x56"
+			  "\x0d\x4e\xb0\x32\x76\xce\x86\x81"
+			  "\xcd\xdf\xe4\x00\xbf\xfd\x5f\x24"
+			  "\xaf\xf7\x9a\xde\xff\x18\xac\x14"
+			  "\x90\xc5\x01\x39\x34\x0f\x24\xf3"
+			  "\x13\x2f\x5e\x4f\x30\x9a\x36\x40"
+			  "\xec\xea\xbc\xcd\x9e\x0e\x5b\x23"
+			  "\x50\x88\x97\x40\x69\xb1\x37\xf5"
+			  "\xc3\x15\xf9\x3f\xb7\x79\x64\xe8"
+			  "\x7b\x10\x20\xb9\x2b\x46\x83\x5b"
+			  "\xd8\x39\xfc\xe4\xfa\x88\x52\xf2"
+			  "\x72\xb0\x97\x4e\x89\xb3\x48\x00"
+			  "\xc1\x16\x73\x50\x77\xba\xa6\x65"
+			  "\x20\x2d\xb0\x02\x27\x89\xda\x99"
+			  "\x45\xfb\xe9\xd3\x1d\x39\x2f\xd6"
+			  "\x2a\xda\x09\x12\x11\xaf\xe6\x57"
+			  "\x01\x04\x8a\xff\x86\x8b\xac\xf8"
+			  "\xee\xe4\x1c\x98\x5b\xcf\x6b\x76"
+			  "\xa3\x0e\x33\x74\x40\x18\x39\x72"
+			  "\x66\x50\x31\xfd\x70\xdf\xe8\x51"
+			  "\x96\x21\x36\xb2\x9b\xfa\x85\xd1"
+			  "\x30\x05\xc8\x92\x98\x80\xff\x7a"
+			  "\xaf\x43\x0b\xc5\x20\x41\x92\x20"
+			  "\xd4\xa0\x91\x98\x11\x5f\x4d\xb1",
+		.ilen	= 512,
+		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.rlen	= 512,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 512 - 20, 4, 16 },
+	},
+};
+
+/*
+ * SM4 test vector taken from the draft RFC
+ * https://tools.ietf.org/html/draft-crypto-sm4-00#ref-GBT.32907-2016
+ */
+
+static const struct cipher_testvec sm4_enc_tv_template[] = {
+	{ /* SM4 Appendix A: Example Calculations. Example 1. */
+		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.input	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
 		.ilen	= 16,
 		.ilen	= 16,
-		.result	= "\x25\x39\xaa\xa5\xf0\x65\xc8\xdc"
-			  "\x5d\x45\x95\x30\x8f\xff\x2f\x1b",
+		.result	= "\x68\x1E\xDF\x34\xD2\x06\x96\x5E"
+			  "\x86\xB3\xE9\x4F\x53\x6E\x42\x46",
 		.rlen	= 16,
 		.rlen	= 16,
-	}, {
-		.key	= "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
-			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
-			  "\xa0\x19\x07\xc0\x9d\xf7\xbb\xdd"
-			  "\x52\x13\xb2\xb7\xf0\xff\x11\xd8"
-			  "\xd6\x08\xd0\xcd\x2e\xb1\x17\x6f",
-		.klen	= 40,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+	}, { /*
+	      *  SM4 Appendix A: Example Calculations.
+	      *  Last 10 iterations of Example 2.
+	      */
+		.key    = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.input	= "\x99\x4a\xc3\xe7\xc3\x57\x89\x6a"
+			  "\x81\xfc\xa8\xe\x38\x3e\xef\x80"
+			  "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
+			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
+			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
+			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
+			  "\x8c\xc8\x80\xbd\x11\x98\xf3\x7b"
+			  "\xa2\xdd\x14\x20\xf9\xe8\xbb\x82"
+			  "\xf7\x32\xca\x4b\xa8\xf7\xb3\x4d"
+			  "\x27\xd1\xcd\xe6\xb6\x65\x5a\x23"
+			  "\xc2\xf3\x54\x84\x53\xe3\xb9\x20"
+			  "\xa5\x37\x0\xbe\xe7\x7b\x48\xfb"
+			  "\x21\x3d\x9e\x48\x1d\x9e\xf5\xbf"
+			  "\x77\xd5\xb4\x4a\x53\x71\x94\x7a"
+			  "\x88\xa6\x6e\x6\x93\xca\x43\xa5"
+			  "\xc4\xf6\xcd\x53\x4b\x7b\x8e\xfe"
+			  "\xb4\x28\x7c\x42\x29\x32\x5d\x88"
+			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
+			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
+			  "\x31\x51\xec\x47\xc3\x51\x83\xc1",
+		.ilen	= 160,
+		.result	= "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
+			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
+			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
+			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
+			  "\x8c\xc8\x80\xbd\x11\x98\xf3\x7b"
+			  "\xa2\xdd\x14\x20\xf9\xe8\xbb\x82"
+			  "\xf7\x32\xca\x4b\xa8\xf7\xb3\x4d"
+			  "\x27\xd1\xcd\xe6\xb6\x65\x5a\x23"
+			  "\xc2\xf3\x54\x84\x53\xe3\xb9\x20"
+			  "\xa5\x37\x0\xbe\xe7\x7b\x48\xfb"
+			  "\x21\x3d\x9e\x48\x1d\x9e\xf5\xbf"
+			  "\x77\xd5\xb4\x4a\x53\x71\x94\x7a"
+			  "\x88\xa6\x6e\x6\x93\xca\x43\xa5"
+			  "\xc4\xf6\xcd\x53\x4b\x7b\x8e\xfe"
+			  "\xb4\x28\x7c\x42\x29\x32\x5d\x88"
+			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
+			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
+			  "\x31\x51\xec\x47\xc3\x51\x83\xc1"
+			  "\x59\x52\x98\xc7\xc6\xfd\x27\x1f"
+			  "\x4\x2\xf8\x4\xc3\x3d\x3f\x66",
+		.rlen	= 160
+	}
+};
+
+static const struct cipher_testvec sm4_dec_tv_template[] = {
+	{ /* SM4 Appendix A: Example Calculations. Example 1. */
+		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.input	= "\x68\x1E\xDF\x34\xD2\x06\x96\x5E"
+			  "\x86\xB3\xE9\x4F\x53\x6E\x42\x46",
 		.ilen	= 16,
 		.ilen	= 16,
-		.result	= "\x0c\x20\x20\x63\xd6\x8b\xfc\x8f"
-			  "\xc0\xe2\x17\xbb\xd2\x59\x6f\x26",
+		.result	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
 		.rlen	= 16,
 		.rlen	= 16,
-	}, {
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+	}, { /*
+	      *  SM4 Appendix A: Example Calculations.
+	      *  Last 10 iterations of Example 2.
+	      */
+		.key    = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
+		.klen	= 16,
+		.input	= "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
+			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
+			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
+			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
+			  "\x8c\xc8\x80\xbd\x11\x98\xf3\x7b"
+			  "\xa2\xdd\x14\x20\xf9\xe8\xbb\x82"
+			  "\xf7\x32\xca\x4b\xa8\xf7\xb3\x4d"
+			  "\x27\xd1\xcd\xe6\xb6\x65\x5a\x23"
+			  "\xc2\xf3\x54\x84\x53\xe3\xb9\x20"
+			  "\xa5\x37\x0\xbe\xe7\x7b\x48\xfb"
+			  "\x21\x3d\x9e\x48\x1d\x9e\xf5\xbf"
+			  "\x77\xd5\xb4\x4a\x53\x71\x94\x7a"
+			  "\x88\xa6\x6e\x6\x93\xca\x43\xa5"
+			  "\xc4\xf6\xcd\x53\x4b\x7b\x8e\xfe"
+			  "\xb4\x28\x7c\x42\x29\x32\x5d\x88"
+			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
+			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
+			  "\x31\x51\xec\x47\xc3\x51\x83\xc1"
+			  "\x59\x52\x98\xc7\xc6\xfd\x27\x1f"
+			  "\x4\x2\xf8\x4\xc3\x3d\x3f\x66",
+		.ilen	= 160,
+		.result	= "\x99\x4a\xc3\xe7\xc3\x57\x89\x6a"
+			  "\x81\xfc\xa8\xe\x38\x3e\xef\x80"
+			  "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
+			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
+			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
+			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
+			  "\x8c\xc8\x80\xbd\x11\x98\xf3\x7b"
+			  "\xa2\xdd\x14\x20\xf9\xe8\xbb\x82"
+			  "\xf7\x32\xca\x4b\xa8\xf7\xb3\x4d"
+			  "\x27\xd1\xcd\xe6\xb6\x65\x5a\x23"
+			  "\xc2\xf3\x54\x84\x53\xe3\xb9\x20"
+			  "\xa5\x37\x0\xbe\xe7\x7b\x48\xfb"
+			  "\x21\x3d\x9e\x48\x1d\x9e\xf5\xbf"
+			  "\x77\xd5\xb4\x4a\x53\x71\x94\x7a"
+			  "\x88\xa6\x6e\x6\x93\xca\x43\xa5"
+			  "\xc4\xf6\xcd\x53\x4b\x7b\x8e\xfe"
+			  "\xb4\x28\x7c\x42\x29\x32\x5d\x88"
+			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
+			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
+			  "\x31\x51\xec\x47\xc3\x51\x83\xc1",
+		.rlen	= 160
+	}
+};
+
+/*
+ * Speck test vectors taken from the original paper:
+ * "The Simon and Speck Families of Lightweight Block Ciphers"
+ * https://eprint.iacr.org/2013/404.pdf
+ *
+ * Note that the paper does not make byte and word order clear.  But it was
+ * confirmed with the authors that the intended orders are little endian byte
+ * order and (y, x) word order.  Equivalently, the printed test vectors, when
+ * looking at only the bytes (ignoring the whitespace that divides them into
+ * words), are backwards: the left-most byte is actually the one with the
+ * highest memory address, while the right-most byte is actually the one with
+ * the lowest memory address.
+ */
+
+static const struct cipher_testvec speck128_enc_tv_template[] = {
+	{ /* Speck128/128 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.klen	= 16,
+		.input	= "\x20\x6d\x61\x64\x65\x20\x69\x74"
+			  "\x20\x65\x71\x75\x69\x76\x61\x6c",
 		.ilen	= 16,
 		.ilen	= 16,
-		.result	= "\xc1\x35\x2e\x53\xf0\x96\x4d\x9c"
-			  "\x2e\x18\xe6\x99\xcd\xd3\x15\x68",
+		.result	= "\x18\x0d\x57\x5c\xdf\xfe\x60\x78"
+			  "\x65\x32\x78\x79\x51\x98\x5d\xa6",
 		.rlen	= 16,
 		.rlen	= 16,
-	}, {
-		.key	= "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
-			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
-			  "\xb2\xfb\x64\xce\x60\x97\x87\x8d"
-			  "\x17\xfc\xe4\x5a\x49\xe8\x30\xb7"
-			  "\x6e\x78\x17\xe7\x2d\x5e\x12\xd4"
-			  "\x60\x64\x04\x7a\xf1\x2f\x9e\x0c",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+	}, { /* Speck128/192 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17",
+		.klen	= 24,
+		.input	= "\x65\x6e\x74\x20\x74\x6f\x20\x43"
+			  "\x68\x69\x65\x66\x20\x48\x61\x72",
 		.ilen	= 16,
 		.ilen	= 16,
-		.result	= "\x86\x0a\xc6\xa9\x1a\x9f\xe7\xe6"
-			  "\x64\x3b\x33\xd6\xd5\x84\xd6\xdf",
+		.result	= "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9"
+			  "\x66\x55\x13\x13\x3a\xcf\xe4\x1b",
+		.rlen	= 16,
+	}, { /* Speck128/256 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+		.klen	= 32,
+		.input	= "\x70\x6f\x6f\x6e\x65\x72\x2e\x20"
+			  "\x49\x6e\x20\x74\x68\x6f\x73\x65",
+		.ilen	= 16,
+		.result	= "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e"
+			  "\x3e\xf5\xc0\x05\x04\x01\x09\x41",
 		.rlen	= 16,
 		.rlen	= 16,
-	}, {
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
-			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
-			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
-			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
-			  "\x8f\xcb\x8d\x6b\xe3\xa6\x29\x90"
-			  "\xfe\x2a\x62\x82\xae\x6d\x8b\xf6"
-			  "\xad\x1e\x9e\x20\x5f\x38\xbe\x04"
-			  "\xda\x10\x8e\xed\xa2\xa4\x87\xab"
-			  "\xda\x6b\xb4\x0c\x75\xba\xd3\x7c"
-			  "\xc9\xac\x42\x31\x95\x7c\xc9\x04"
-			  "\xeb\xd5\x6e\x32\x69\x8a\xdb\xa6"
-			  "\x15\xd7\x3f\x4f\x2f\x66\x69\x03"
-			  "\x9c\x1f\x54\x0f\xde\x1f\xf3\x65"
-			  "\x4c\x96\x12\xed\x7c\x92\x03\x01"
-			  "\x6f\xbc\x35\x93\xac\xf1\x27\xf1"
-			  "\xb4\x96\x82\x5a\x5f\xb0\xa0\x50"
-			  "\x89\xa4\x8e\x66\x44\x85\xcc\xfd"
-			  "\x33\x14\x70\xe3\x96\xb2\xc3\xd3"
-			  "\xbb\x54\x5a\x1a\xf9\x74\xa2\xc5"
-			  "\x2d\x64\x75\xdd\xb4\x54\xe6\x74"
-			  "\x8c\xd3\x9d\x9e\x86\xab\x51\x53"
-			  "\xb7\x93\x3e\x6f\xd0\x4e\x2c\x40"
-			  "\xf6\xa8\x2e\x3e\x9d\xf4\x66\xa5"
-			  "\x76\x12\x73\x44\x1a\x56\xd7\x72"
-			  "\x88\xcd\x21\x8c\x4c\x0f\xfe\xda"
-			  "\x95\xe0\x3a\xa6\xa5\x84\x46\xcd"
-			  "\xd5\x3e\x9d\x3a\xe2\x67\xe6\x60"
-			  "\x1a\xe2\x70\x85\x58\xc2\x1b\x09"
-			  "\xe1\xd7\x2c\xca\xad\xa8\x8f\xf9"
-			  "\xac\xb3\x0e\xdb\xca\x2e\xe2\xb8"
-			  "\x51\x71\xd9\x3c\x6c\xf1\x56\xf8"
-			  "\xea\x9c\xf1\xfb\x0c\xe6\xb7\x10"
-			  "\x1c\xf8\xa9\x7c\xe8\x53\x35\xc1"
-			  "\x90\x3e\x76\x4a\x74\xa4\x21\x2c"
-			  "\xf6\x2c\x4e\x0f\x94\x3a\x88\x2e"
-			  "\x41\x09\x6a\x33\x7d\xf6\xdd\x3f"
-			  "\x8d\x23\x31\x74\x84\xeb\x88\x6e"
-			  "\xcc\xb9\xbc\x22\x83\x19\x07\x22"
-			  "\xa5\x2d\xdf\xa5\xf3\x80\x85\x78"
-			  "\x84\x39\x6a\x6d\x6a\x99\x4f\xa5"
-			  "\x15\xfe\x46\xb0\xe4\x6c\xa5\x41"
-			  "\x3c\xce\x8f\x42\x60\x71\xa7\x75"
-			  "\x08\x40\x65\x8a\x82\xbf\xf5\x43"
-			  "\x71\x96\xa9\x4d\x44\x8a\x20\xbe"
-			  "\xfa\x4d\xbb\xc0\x7d\x31\x96\x65"
-			  "\xe7\x75\xe5\x3e\xfd\x92\x3b\xc9"
-			  "\x55\xbb\x16\x7e\xf7\xc2\x8c\xa4"
-			  "\x40\x1d\xe5\xef\x0e\xdf\xe4\x9a"
-			  "\x62\x73\x65\xfd\x46\x63\x25\x3d"
-			  "\x2b\xaf\xe5\x64\xfe\xa5\x5c\xcf"
-			  "\x24\xf3\xb4\xac\x64\xba\xdf\x4b"
-			  "\xc6\x96\x7d\x81\x2d\x8d\x97\xf7"
-			  "\xc5\x68\x77\x84\x32\x2b\xcc\x85"
-			  "\x74\x96\xf0\x12\x77\x61\xb9\xeb"
-			  "\x71\xaa\x82\xcb\x1c\xdb\x89\xc8"
-			  "\xc6\xb5\xe3\x5c\x7d\x39\x07\x24"
-			  "\xda\x39\x87\x45\xc0\x2b\xbb\x01"
-			  "\xac\xbc\x2a\x5c\x7f\xfc\xe8\xce"
-			  "\x6d\x9c\x6f\xed\xd3\xc1\xa1\xd6"
-			  "\xc5\x55\xa9\x66\x2f\xe1\xc8\x32"
-			  "\xa6\x5d\xa4\x3a\x98\x73\xe8\x45"
-			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
-			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
-			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
-		.ilen	= 512,
-		.result	= "\xe3\x5a\x38\x0f\x4d\x92\x3a\x74"
-			  "\x15\xb1\x50\x8c\x9a\xd8\x99\x1d"
-			  "\x82\xec\xf1\x5f\x03\x6d\x02\x58"
-			  "\x90\x67\xfc\xdd\x8d\xe1\x38\x08"
-			  "\x7b\xc9\x9b\x4b\x04\x09\x50\x15"
-			  "\xce\xab\xda\x33\x30\x20\x12\xfa"
-			  "\x83\xc4\xa6\x9a\x2e\x7d\x90\xd9"
-			  "\xa6\xa6\x67\x43\xb4\xa7\xa8\x5c"
-			  "\xbb\x6a\x49\x2b\x8b\xf8\xd0\x22"
-			  "\xe5\x9e\xba\xe8\x8c\x67\xb8\x5b"
-			  "\x60\xbc\xf5\xa4\x95\x4e\x66\xe5"
-			  "\x6d\x8e\xa9\xf6\x65\x2e\x04\xf5"
-			  "\xba\xb5\xdb\x88\xc2\xf6\x7a\x4b"
-			  "\x89\x58\x7c\x9a\xae\x26\xe8\xb7"
-			  "\xb7\x28\xcc\xd6\xcc\xa5\x98\x4d"
-			  "\xb9\x91\xcb\xb4\xe4\x8b\x96\x47"
-			  "\x5f\x03\x8b\xdd\x94\xd1\xee\x12"
-			  "\xa7\x83\x80\xf2\xc1\x15\x74\x4f"
-			  "\x49\xf9\xb0\x7e\x6f\xdc\x73\x2f"
-			  "\xe2\xcf\xe0\x1b\x34\xa5\xa0\x52"
-			  "\xfb\x3c\x5d\x85\x91\xe6\x6d\x98"
-			  "\x04\xd6\xdd\x4c\x00\x64\xd9\x54"
-			  "\x5c\x3c\x08\x1d\x4c\x06\x9f\xb8"
-			  "\x1c\x4d\x8d\xdc\xa4\x3c\xb9\x3b"
-			  "\x9e\x85\xce\xc3\xa8\x4a\x0c\xd9"
-			  "\x04\xc3\x6f\x17\x66\xa9\x1f\x59"
-			  "\xd9\xe2\x19\x36\xa3\x88\xb8\x0b"
-			  "\x0f\x4a\x4d\xf8\xc8\x6f\xd5\x43"
-			  "\xeb\xa0\xab\x1f\x61\xc0\x06\xeb"
-			  "\x93\xb7\xb8\x6f\x0d\xbd\x07\x49"
-			  "\xb3\xac\x5d\xcf\x31\xa0\x27\x26"
-			  "\x21\xbe\x94\x2e\x19\xea\xf4\xee"
-			  "\xb5\x13\x89\xf7\x94\x0b\xef\x59"
-			  "\x44\xc5\x78\x8b\x3c\x3b\x71\x20"
-			  "\xf9\x35\x0c\x70\x74\xdc\x5b\xc2"
-			  "\xb4\x11\x0e\x2c\x61\xa1\x52\x46"
-			  "\x18\x11\x16\xc6\x86\x44\xa7\xaf"
-			  "\xd5\x0c\x7d\xa6\x9e\x25\x2d\x1b"
-			  "\x9a\x8f\x0f\xf8\x6a\x61\xa0\xea"
-			  "\x3f\x0e\x90\xd6\x8f\x83\x30\x64"
-			  "\xb5\x51\x2d\x08\x3c\xcd\x99\x36"
-			  "\x96\xd4\xb1\xb5\x48\x30\xca\x48"
-			  "\xf7\x11\xa8\xf5\x97\x8a\x6a\x6d"
-			  "\x12\x33\x2f\xc0\xe8\xda\xec\x8a"
-			  "\xe1\x88\x72\x63\xde\x20\xa3\xe1"
-			  "\x8e\xac\x84\x37\x35\xf5\xf7\x3f"
-			  "\x00\x02\x0e\xe4\xc1\x53\x68\x3f"
-			  "\xaa\xd5\xac\x52\x3d\x20\x2f\x4d"
-			  "\x7c\x83\xd0\xbd\xaa\x97\x35\x36"
-			  "\x98\x88\x59\x5d\xe7\x24\xe3\x90"
-			  "\x9d\x30\x47\xa7\xc3\x60\x35\xf4"
-			  "\xd5\xdb\x0e\x4d\x44\xc1\x81\x8b"
-			  "\xfd\xbd\xc3\x2b\xba\x68\xfe\x8d"
-			  "\x49\x5a\x3c\x8a\xa3\x01\xae\x25"
-			  "\x42\xab\xd2\x87\x1b\x35\xd6\xd2"
-			  "\xd7\x70\x1c\x1f\x72\xd1\xe1\x39"
-			  "\x1c\x58\xa2\xb4\xd0\x78\x55\x72"
-			  "\x76\x59\xea\xd9\xd7\x6e\x63\x8b"
-			  "\xcc\x9b\xa7\x74\x89\xfc\xa3\x68"
-			  "\x86\x28\xd1\xbb\x54\x8d\x66\xad"
-			  "\x2a\x92\xf9\x4e\x04\x3d\xae\xfd"
-			  "\x1b\x2b\x7f\xc3\x2f\x1a\x78\x0a"
-			  "\x5c\xc6\x84\xfe\x7c\xcb\x26\xfd"
-			  "\xd9\x51\x0f\xd7\x94\x2f\xc5\xa7",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
 	},
 	},
 };
 };
 
 
-static const struct cipher_testvec serpent_lrw_dec_tv_template[] = {
-	/* Generated from AES-LRW test vectors */
-	/* same as enc vectors with input and result reversed */
-	{
-		.key	= "\x45\x62\xac\x25\xf8\x28\x17\x6d"
-			  "\x4c\x26\x84\x14\xb5\x68\x01\x85"
-			  "\x25\x8e\x2a\x05\xe7\x3e\x9d\x03"
-			  "\xee\x5a\x83\x0c\xcc\x09\x4c\x87",
+static const struct cipher_testvec speck128_dec_tv_template[] = {
+	{ /* Speck128/128 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.klen	= 16,
+		.input	= "\x18\x0d\x57\x5c\xdf\xfe\x60\x78"
+			  "\x65\x32\x78\x79\x51\x98\x5d\xa6",
+		.ilen	= 16,
+		.result	= "\x20\x6d\x61\x64\x65\x20\x69\x74"
+			  "\x20\x65\x71\x75\x69\x76\x61\x6c",
+		.rlen	= 16,
+	}, { /* Speck128/192 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17",
+		.klen	= 24,
+		.input	= "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9"
+			  "\x66\x55\x13\x13\x3a\xcf\xe4\x1b",
+		.ilen	= 16,
+		.result	= "\x65\x6e\x74\x20\x74\x6f\x20\x43"
+			  "\x68\x69\x65\x66\x20\x48\x61\x72",
+		.rlen	= 16,
+	}, { /* Speck128/256 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 		.klen	= 32,
 		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x6f\xbf\xd4\xa4\x5d\x71\x16\x79"
-			  "\x63\x9c\xa6\x8e\x40\xbe\x0d\x8a",
+		.input	= "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e"
+			  "\x3e\xf5\xc0\x05\x04\x01\x09\x41",
 		.ilen	= 16,
 		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
+		.result	= "\x70\x6f\x6f\x6e\x65\x72\x2e\x20"
+			  "\x49\x6e\x20\x74\x68\x6f\x73\x65",
 		.rlen	= 16,
 		.rlen	= 16,
+	},
+};
+
+/*
+ * Speck128-XTS test vectors, taken from the AES-XTS test vectors with the
+ * result recomputed with Speck128 as the cipher
+ */
+
+static const struct cipher_testvec speck128_xts_enc_tv_template[] = {
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ilen	= 32,
+		.result	= "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62"
+			  "\x3b\x99\x4a\x64\x74\x77\xac\xed"
+			  "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42"
+			  "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54",
+		.rlen	= 32,
 	}, {
 	}, {
-		.key	= "\x59\x70\x47\x14\xf5\x57\x47\x8c"
-			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
-			  "\x0d\x48\xf0\xb7\xb1\x5a\x53\xea"
-			  "\x1c\xaa\x6b\x29\xc2\xca\xfb\xaf",
+		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.ilen	= 32,
+		.result	= "\xfb\x53\x81\x75\x6f\x9f\x34\xad"
+			  "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a"
+			  "\xd4\x84\xa4\x53\xd5\x88\x73\x1b"
+			  "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6",
+		.rlen	= 32,
+	}, {
+		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.ilen	= 32,
+		.result	= "\x21\x52\x84\x15\xd1\xf7\x21\x55"
+			  "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d"
+			  "\xda\x63\xb2\xf1\x82\xb0\x89\x59"
+			  "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92",
+		.rlen	= 32,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95",
 		.klen	= 32,
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x02",
-		.input	= "\xfd\xb2\x66\x98\x80\x96\x55\xad"
-			  "\x08\x94\x54\x9c\x21\x7c\x69\xe3",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ilen	= 512,
+		.result	= "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82"
+			  "\x53\xd0\xed\x2d\x30\xc1\x20\xef"
+			  "\x70\x67\x5e\xff\x09\x70\xbb\xc1"
+			  "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48"
+			  "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7"
+			  "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9"
+			  "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44"
+			  "\x19\xc5\x58\x84\x63\xb9\x12\x68"
+			  "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c"
+			  "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd"
+			  "\x74\x79\x2e\xb4\x44\xd7\x69\xc4"
+			  "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d"
+			  "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb"
+			  "\x6d\x13\x65\xa0\xf9\x31\x12\xe2"
+			  "\x26\xd1\xec\x2b\x0a\x8b\x59\x99"
+			  "\xa7\x49\xa0\x0e\x09\x33\x85\x50"
+			  "\xc3\x23\xca\x7a\xdd\x13\x45\x5f"
+			  "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f"
+			  "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6"
+			  "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f"
+			  "\x79\x91\x8d\x36\x13\x7b\xd0\x4a"
+			  "\x6c\x39\xfb\x53\xb8\x6f\x02\x51"
+			  "\xa5\x20\xac\x24\x1c\x73\x59\x73"
+			  "\x58\x61\x3a\x87\x58\xb3\x20\x56"
+			  "\x39\x06\x2b\x4d\xd3\x20\x2b\x89"
+			  "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd"
+			  "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91"
+			  "\x09\x35\x71\x50\x65\xac\x92\xe3"
+			  "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92"
+			  "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9"
+			  "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d"
+			  "\x77\x04\x80\xa9\xbf\x38\xb5\xbd"
+			  "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8"
+			  "\x2a\x26\xcc\x49\x14\x6d\x55\x01"
+			  "\x06\x94\xd8\xb2\x2d\x53\x83\x1b"
+			  "\x8f\xd4\xdd\x57\x12\x7e\x18\xba"
+			  "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d"
+			  "\x24\xa9\x60\xa4\x97\x85\x86\x2a"
+			  "\x01\x00\x09\xf1\xcb\x4a\x24\x1c"
+			  "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4"
+			  "\x97\x1c\x10\xc6\x4d\x66\x4f\x98"
+			  "\x87\x30\xac\xd5\xea\x73\x49\x10"
+			  "\x80\xea\xe5\x5f\x4d\x5f\x03\x33"
+			  "\x66\x02\x35\x3d\x60\x06\x36\x4f"
+			  "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8"
+			  "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28"
+			  "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93"
+			  "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30"
+			  "\xcc\x75\xcf\x16\x26\xa9\x26\x3b"
+			  "\xe7\x68\x2f\x15\x21\x5b\xe4\x00"
+			  "\xbd\x48\x50\xcd\x75\x70\xc4\x62"
+			  "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b"
+			  "\x51\x66\x02\x69\x04\x97\x36\xd4"
+			  "\x75\xae\x0b\xa3\x42\xf8\xca\x79"
+			  "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2"
+			  "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd"
+			  "\xea\x15\x5a\xa0\x85\x7e\x81\x0d"
+			  "\x03\xe7\x05\x39\xf5\x05\x26\xee"
+			  "\xec\xaa\x1f\x3d\xc9\x98\x76\x01"
+			  "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4"
+			  "\x50\x65\x50\x6d\x04\x1f\xdf\x5a"
+			  "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca"
+			  "\x47\x26\xef\x39\xb8\xb4\xf2\xd1"
+			  "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf",
+		.rlen	= 512,
 	}, {
 	}, {
-		.key	= "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
-			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
-			  "\xcd\xf9\x0b\x16\x0c\x64\x8f\xb6"
-			  "\xb0\x0d\x0d\x1b\xae\x85\x87\x1f",
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x62\x49\x77\x57\x24\x70\x93\x69"
+			  "\x99\x59\x57\x49\x66\x96\x76\x27"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95"
+			  "\x02\x88\x41\x97\x16\x93\x99\x37"
+			  "\x51\x05\x82\x09\x74\x94\x45\x92",
+		.klen	= 64,
+		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ilen	= 512,
+		.result	= "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1"
+			  "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb"
+			  "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73"
+			  "\x92\x99\xde\xd3\x76\xed\xcd\x63"
+			  "\x64\x3a\x22\x57\xc1\x43\x49\xd4"
+			  "\x79\x36\x31\x19\x62\xae\x10\x7e"
+			  "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa"
+			  "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0"
+			  "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00"
+			  "\xfc\x81\x99\x8a\x14\x62\xf5\x7e"
+			  "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec"
+			  "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6"
+			  "\x62\x62\x37\xfe\x0a\x4c\x4a\x37"
+			  "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e"
+			  "\x85\x3c\x4f\x26\x64\x85\xbc\x68"
+			  "\xb0\xe0\x86\x5e\x26\x41\xce\x11"
+			  "\x50\xda\x97\x14\xe9\x9e\xc7\x6d"
+			  "\x3b\xdc\x43\xde\x2b\x27\x69\x7d"
+			  "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31"
+			  "\x14\x4d\xf0\x74\x37\xfd\x07\x25"
+			  "\x96\x55\xe5\xfc\x9e\x27\x2a\x74"
+			  "\x1b\x83\x4d\x15\x83\xac\x57\xa0"
+			  "\xac\xa5\xd0\x38\xef\x19\x56\x53"
+			  "\x25\x4b\xfc\xce\x04\x23\xe5\x6b"
+			  "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5"
+			  "\xed\x22\x34\x1c\x5d\xed\x17\x06"
+			  "\x36\xa3\xe6\x77\xb9\x97\x46\xb8"
+			  "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc"
+			  "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82"
+			  "\x35\x91\x3d\x1b\xe4\x97\x9f\x92"
+			  "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1"
+			  "\x8d\x39\xfc\x42\xfb\x38\x80\xb9"
+			  "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1"
+			  "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7"
+			  "\xa1\xbf\xf7\xda\x95\x93\x4b\x78"
+			  "\x19\xf5\x94\xf9\xd2\x00\x33\x37"
+			  "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee"
+			  "\x42\xb2\x9e\x2c\x5f\x48\x23\x26"
+			  "\x15\x25\x17\x03\x3d\xfe\x2c\xfc"
+			  "\xeb\xba\xda\xe0\x00\x05\xb6\xa6"
+			  "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf"
+			  "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a"
+			  "\x49\xa1\xc3\xfa\x10\x52\xb9\x14"
+			  "\xad\xb7\x73\xf8\x78\x12\xc8\x59"
+			  "\x17\x80\x4c\x57\x39\xf1\x6d\x80"
+			  "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21"
+			  "\xec\xce\xb7\xc8\x02\x8a\xed\x53"
+			  "\x2c\x25\x68\x2e\x1f\x85\x5e\x67"
+			  "\xd1\x07\x7a\x3a\x89\x08\xe0\x34"
+			  "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40"
+			  "\x31\x15\x72\xa0\xf0\x73\xd9\x3b"
+			  "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2"
+			  "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8"
+			  "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6"
+			  "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58"
+			  "\xcc\x1f\x48\x49\x65\x47\x75\xe9"
+			  "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07"
+			  "\xf2\xec\x76\xd8\x8f\x09\xf3\x16"
+			  "\xa1\x51\x89\x3b\xeb\x96\x42\xac"
+			  "\x65\xe0\x67\x63\x29\xdc\xb4\x7d"
+			  "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb"
+			  "\x66\x8d\x13\xca\xe0\x59\x2a\x00"
+			  "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5"
+			  "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c",
+		.rlen	= 512,
+		.also_non_np = 1,
+		.np	= 3,
+		.tap	= { 512 - 20, 4, 16 },
+	}
+};
+
+static const struct cipher_testvec speck128_xts_dec_tv_template[] = {
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 32,
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x14\x5e\x3d\x70\xc0\x6e\x9c\x34"
-			  "\x5b\x5e\xcf\x0f\xe4\x8c\x21\x5c",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
-			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
-			  "\xca\xc6\xbc\x35\x4d\x4a\x65\x54"
-			  "\x90\xae\x61\xcf\x7b\xae\xbd\xcc"
-			  "\xad\xe4\x94\xc5\x4a\x29\xae\x70",
-		.klen	= 40,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x25\x39\xaa\xa5\xf0\x65\xc8\xdc"
-			  "\x5d\x45\x95\x30\x8f\xff\x2f\x1b",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62"
+			  "\x3b\x99\x4a\x64\x74\x77\xac\xed"
+			  "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42"
+			  "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54",
+		.ilen	= 32,
+		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.rlen	= 32,
 	}, {
 	}, {
-		.key	= "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
-			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
-			  "\xa0\x19\x07\xc0\x9d\xf7\xbb\xdd"
-			  "\x52\x13\xb2\xb7\xf0\xff\x11\xd8"
-			  "\xd6\x08\xd0\xcd\x2e\xb1\x17\x6f",
-		.klen	= 40,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x0c\x20\x20\x63\xd6\x8b\xfc\x8f"
-			  "\xc0\xe2\x17\xbb\xd2\x59\x6f\x26",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
+		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xfb\x53\x81\x75\x6f\x9f\x34\xad"
+			  "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a"
+			  "\xd4\x84\xa4\x53\xd5\x88\x73\x1b"
+			  "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6",
+		.ilen	= 32,
+		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.rlen	= 32,
 	}, {
 	}, {
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\xc1\x35\x2e\x53\xf0\x96\x4d\x9c"
-			  "\x2e\x18\xe6\x99\xcd\xd3\x15\x68",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
+		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x21\x52\x84\x15\xd1\xf7\x21\x55"
+			  "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d"
+			  "\xda\x63\xb2\xf1\x82\xb0\x89\x59"
+			  "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92",
+		.ilen	= 32,
+		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.rlen	= 32,
 	}, {
 	}, {
-		.key	= "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
-			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
-			  "\xb2\xfb\x64\xce\x60\x97\x87\x8d"
-			  "\x17\xfc\xe4\x5a\x49\xe8\x30\xb7"
-			  "\x6e\x78\x17\xe7\x2d\x5e\x12\xd4"
-			  "\x60\x64\x04\x7a\xf1\x2f\x9e\x0c",
-		.klen	= 48,
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95",
+		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x86\x0a\xc6\xa9\x1a\x9f\xe7\xe6"
-			  "\x64\x3b\x33\xd6\xd5\x84\xd6\xdf",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82"
+			  "\x53\xd0\xed\x2d\x30\xc1\x20\xef"
+			  "\x70\x67\x5e\xff\x09\x70\xbb\xc1"
+			  "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48"
+			  "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7"
+			  "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9"
+			  "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44"
+			  "\x19\xc5\x58\x84\x63\xb9\x12\x68"
+			  "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c"
+			  "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd"
+			  "\x74\x79\x2e\xb4\x44\xd7\x69\xc4"
+			  "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d"
+			  "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb"
+			  "\x6d\x13\x65\xa0\xf9\x31\x12\xe2"
+			  "\x26\xd1\xec\x2b\x0a\x8b\x59\x99"
+			  "\xa7\x49\xa0\x0e\x09\x33\x85\x50"
+			  "\xc3\x23\xca\x7a\xdd\x13\x45\x5f"
+			  "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f"
+			  "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6"
+			  "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f"
+			  "\x79\x91\x8d\x36\x13\x7b\xd0\x4a"
+			  "\x6c\x39\xfb\x53\xb8\x6f\x02\x51"
+			  "\xa5\x20\xac\x24\x1c\x73\x59\x73"
+			  "\x58\x61\x3a\x87\x58\xb3\x20\x56"
+			  "\x39\x06\x2b\x4d\xd3\x20\x2b\x89"
+			  "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd"
+			  "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91"
+			  "\x09\x35\x71\x50\x65\xac\x92\xe3"
+			  "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92"
+			  "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9"
+			  "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d"
+			  "\x77\x04\x80\xa9\xbf\x38\xb5\xbd"
+			  "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8"
+			  "\x2a\x26\xcc\x49\x14\x6d\x55\x01"
+			  "\x06\x94\xd8\xb2\x2d\x53\x83\x1b"
+			  "\x8f\xd4\xdd\x57\x12\x7e\x18\xba"
+			  "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d"
+			  "\x24\xa9\x60\xa4\x97\x85\x86\x2a"
+			  "\x01\x00\x09\xf1\xcb\x4a\x24\x1c"
+			  "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4"
+			  "\x97\x1c\x10\xc6\x4d\x66\x4f\x98"
+			  "\x87\x30\xac\xd5\xea\x73\x49\x10"
+			  "\x80\xea\xe5\x5f\x4d\x5f\x03\x33"
+			  "\x66\x02\x35\x3d\x60\x06\x36\x4f"
+			  "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8"
+			  "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28"
+			  "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93"
+			  "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30"
+			  "\xcc\x75\xcf\x16\x26\xa9\x26\x3b"
+			  "\xe7\x68\x2f\x15\x21\x5b\xe4\x00"
+			  "\xbd\x48\x50\xcd\x75\x70\xc4\x62"
+			  "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b"
+			  "\x51\x66\x02\x69\x04\x97\x36\xd4"
+			  "\x75\xae\x0b\xa3\x42\xf8\xca\x79"
+			  "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2"
+			  "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd"
+			  "\xea\x15\x5a\xa0\x85\x7e\x81\x0d"
+			  "\x03\xe7\x05\x39\xf5\x05\x26\xee"
+			  "\xec\xaa\x1f\x3d\xc9\x98\x76\x01"
+			  "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4"
+			  "\x50\x65\x50\x6d\x04\x1f\xdf\x5a"
+			  "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca"
+			  "\x47\x26\xef\x39\xb8\xb4\xf2\xd1"
+			  "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf",
+		.ilen	= 512,
+		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.rlen	= 512,
 	}, {
 	}, {
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\xe3\x5a\x38\x0f\x4d\x92\x3a\x74"
-			  "\x15\xb1\x50\x8c\x9a\xd8\x99\x1d"
-			  "\x82\xec\xf1\x5f\x03\x6d\x02\x58"
-			  "\x90\x67\xfc\xdd\x8d\xe1\x38\x08"
-			  "\x7b\xc9\x9b\x4b\x04\x09\x50\x15"
-			  "\xce\xab\xda\x33\x30\x20\x12\xfa"
-			  "\x83\xc4\xa6\x9a\x2e\x7d\x90\xd9"
-			  "\xa6\xa6\x67\x43\xb4\xa7\xa8\x5c"
-			  "\xbb\x6a\x49\x2b\x8b\xf8\xd0\x22"
-			  "\xe5\x9e\xba\xe8\x8c\x67\xb8\x5b"
-			  "\x60\xbc\xf5\xa4\x95\x4e\x66\xe5"
-			  "\x6d\x8e\xa9\xf6\x65\x2e\x04\xf5"
-			  "\xba\xb5\xdb\x88\xc2\xf6\x7a\x4b"
-			  "\x89\x58\x7c\x9a\xae\x26\xe8\xb7"
-			  "\xb7\x28\xcc\xd6\xcc\xa5\x98\x4d"
-			  "\xb9\x91\xcb\xb4\xe4\x8b\x96\x47"
-			  "\x5f\x03\x8b\xdd\x94\xd1\xee\x12"
-			  "\xa7\x83\x80\xf2\xc1\x15\x74\x4f"
-			  "\x49\xf9\xb0\x7e\x6f\xdc\x73\x2f"
-			  "\xe2\xcf\xe0\x1b\x34\xa5\xa0\x52"
-			  "\xfb\x3c\x5d\x85\x91\xe6\x6d\x98"
-			  "\x04\xd6\xdd\x4c\x00\x64\xd9\x54"
-			  "\x5c\x3c\x08\x1d\x4c\x06\x9f\xb8"
-			  "\x1c\x4d\x8d\xdc\xa4\x3c\xb9\x3b"
-			  "\x9e\x85\xce\xc3\xa8\x4a\x0c\xd9"
-			  "\x04\xc3\x6f\x17\x66\xa9\x1f\x59"
-			  "\xd9\xe2\x19\x36\xa3\x88\xb8\x0b"
-			  "\x0f\x4a\x4d\xf8\xc8\x6f\xd5\x43"
-			  "\xeb\xa0\xab\x1f\x61\xc0\x06\xeb"
-			  "\x93\xb7\xb8\x6f\x0d\xbd\x07\x49"
-			  "\xb3\xac\x5d\xcf\x31\xa0\x27\x26"
-			  "\x21\xbe\x94\x2e\x19\xea\xf4\xee"
-			  "\xb5\x13\x89\xf7\x94\x0b\xef\x59"
-			  "\x44\xc5\x78\x8b\x3c\x3b\x71\x20"
-			  "\xf9\x35\x0c\x70\x74\xdc\x5b\xc2"
-			  "\xb4\x11\x0e\x2c\x61\xa1\x52\x46"
-			  "\x18\x11\x16\xc6\x86\x44\xa7\xaf"
-			  "\xd5\x0c\x7d\xa6\x9e\x25\x2d\x1b"
-			  "\x9a\x8f\x0f\xf8\x6a\x61\xa0\xea"
-			  "\x3f\x0e\x90\xd6\x8f\x83\x30\x64"
-			  "\xb5\x51\x2d\x08\x3c\xcd\x99\x36"
-			  "\x96\xd4\xb1\xb5\x48\x30\xca\x48"
-			  "\xf7\x11\xa8\xf5\x97\x8a\x6a\x6d"
-			  "\x12\x33\x2f\xc0\xe8\xda\xec\x8a"
-			  "\xe1\x88\x72\x63\xde\x20\xa3\xe1"
-			  "\x8e\xac\x84\x37\x35\xf5\xf7\x3f"
-			  "\x00\x02\x0e\xe4\xc1\x53\x68\x3f"
-			  "\xaa\xd5\xac\x52\x3d\x20\x2f\x4d"
-			  "\x7c\x83\xd0\xbd\xaa\x97\x35\x36"
-			  "\x98\x88\x59\x5d\xe7\x24\xe3\x90"
-			  "\x9d\x30\x47\xa7\xc3\x60\x35\xf4"
-			  "\xd5\xdb\x0e\x4d\x44\xc1\x81\x8b"
-			  "\xfd\xbd\xc3\x2b\xba\x68\xfe\x8d"
-			  "\x49\x5a\x3c\x8a\xa3\x01\xae\x25"
-			  "\x42\xab\xd2\x87\x1b\x35\xd6\xd2"
-			  "\xd7\x70\x1c\x1f\x72\xd1\xe1\x39"
-			  "\x1c\x58\xa2\xb4\xd0\x78\x55\x72"
-			  "\x76\x59\xea\xd9\xd7\x6e\x63\x8b"
-			  "\xcc\x9b\xa7\x74\x89\xfc\xa3\x68"
-			  "\x86\x28\xd1\xbb\x54\x8d\x66\xad"
-			  "\x2a\x92\xf9\x4e\x04\x3d\xae\xfd"
-			  "\x1b\x2b\x7f\xc3\x2f\x1a\x78\x0a"
-			  "\x5c\xc6\x84\xfe\x7c\xcb\x26\xfd"
-			  "\xd9\x51\x0f\xd7\x94\x2f\xc5\xa7",
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x62\x49\x77\x57\x24\x70\x93\x69"
+			  "\x99\x59\x57\x49\x66\x96\x76\x27"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95"
+			  "\x02\x88\x41\x97\x16\x93\x99\x37"
+			  "\x51\x05\x82\x09\x74\x94\x45\x92",
+		.klen	= 64,
+		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1"
+			  "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb"
+			  "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73"
+			  "\x92\x99\xde\xd3\x76\xed\xcd\x63"
+			  "\x64\x3a\x22\x57\xc1\x43\x49\xd4"
+			  "\x79\x36\x31\x19\x62\xae\x10\x7e"
+			  "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa"
+			  "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0"
+			  "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00"
+			  "\xfc\x81\x99\x8a\x14\x62\xf5\x7e"
+			  "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec"
+			  "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6"
+			  "\x62\x62\x37\xfe\x0a\x4c\x4a\x37"
+			  "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e"
+			  "\x85\x3c\x4f\x26\x64\x85\xbc\x68"
+			  "\xb0\xe0\x86\x5e\x26\x41\xce\x11"
+			  "\x50\xda\x97\x14\xe9\x9e\xc7\x6d"
+			  "\x3b\xdc\x43\xde\x2b\x27\x69\x7d"
+			  "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31"
+			  "\x14\x4d\xf0\x74\x37\xfd\x07\x25"
+			  "\x96\x55\xe5\xfc\x9e\x27\x2a\x74"
+			  "\x1b\x83\x4d\x15\x83\xac\x57\xa0"
+			  "\xac\xa5\xd0\x38\xef\x19\x56\x53"
+			  "\x25\x4b\xfc\xce\x04\x23\xe5\x6b"
+			  "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5"
+			  "\xed\x22\x34\x1c\x5d\xed\x17\x06"
+			  "\x36\xa3\xe6\x77\xb9\x97\x46\xb8"
+			  "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc"
+			  "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82"
+			  "\x35\x91\x3d\x1b\xe4\x97\x9f\x92"
+			  "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1"
+			  "\x8d\x39\xfc\x42\xfb\x38\x80\xb9"
+			  "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1"
+			  "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7"
+			  "\xa1\xbf\xf7\xda\x95\x93\x4b\x78"
+			  "\x19\xf5\x94\xf9\xd2\x00\x33\x37"
+			  "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee"
+			  "\x42\xb2\x9e\x2c\x5f\x48\x23\x26"
+			  "\x15\x25\x17\x03\x3d\xfe\x2c\xfc"
+			  "\xeb\xba\xda\xe0\x00\x05\xb6\xa6"
+			  "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf"
+			  "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a"
+			  "\x49\xa1\xc3\xfa\x10\x52\xb9\x14"
+			  "\xad\xb7\x73\xf8\x78\x12\xc8\x59"
+			  "\x17\x80\x4c\x57\x39\xf1\x6d\x80"
+			  "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21"
+			  "\xec\xce\xb7\xc8\x02\x8a\xed\x53"
+			  "\x2c\x25\x68\x2e\x1f\x85\x5e\x67"
+			  "\xd1\x07\x7a\x3a\x89\x08\xe0\x34"
+			  "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40"
+			  "\x31\x15\x72\xa0\xf0\x73\xd9\x3b"
+			  "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2"
+			  "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8"
+			  "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6"
+			  "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58"
+			  "\xcc\x1f\x48\x49\x65\x47\x75\xe9"
+			  "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07"
+			  "\xf2\xec\x76\xd8\x8f\x09\xf3\x16"
+			  "\xa1\x51\x89\x3b\xeb\x96\x42\xac"
+			  "\x65\xe0\x67\x63\x29\xdc\xb4\x7d"
+			  "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb"
+			  "\x66\x8d\x13\xca\xe0\x59\x2a\x00"
+			  "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5"
+			  "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c",
 		.ilen	= 512,
 		.ilen	= 512,
-		.result	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
-			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
-			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
-			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
-			  "\x8f\xcb\x8d\x6b\xe3\xa6\x29\x90"
-			  "\xfe\x2a\x62\x82\xae\x6d\x8b\xf6"
-			  "\xad\x1e\x9e\x20\x5f\x38\xbe\x04"
-			  "\xda\x10\x8e\xed\xa2\xa4\x87\xab"
-			  "\xda\x6b\xb4\x0c\x75\xba\xd3\x7c"
-			  "\xc9\xac\x42\x31\x95\x7c\xc9\x04"
-			  "\xeb\xd5\x6e\x32\x69\x8a\xdb\xa6"
-			  "\x15\xd7\x3f\x4f\x2f\x66\x69\x03"
-			  "\x9c\x1f\x54\x0f\xde\x1f\xf3\x65"
-			  "\x4c\x96\x12\xed\x7c\x92\x03\x01"
-			  "\x6f\xbc\x35\x93\xac\xf1\x27\xf1"
-			  "\xb4\x96\x82\x5a\x5f\xb0\xa0\x50"
-			  "\x89\xa4\x8e\x66\x44\x85\xcc\xfd"
-			  "\x33\x14\x70\xe3\x96\xb2\xc3\xd3"
-			  "\xbb\x54\x5a\x1a\xf9\x74\xa2\xc5"
-			  "\x2d\x64\x75\xdd\xb4\x54\xe6\x74"
-			  "\x8c\xd3\x9d\x9e\x86\xab\x51\x53"
-			  "\xb7\x93\x3e\x6f\xd0\x4e\x2c\x40"
-			  "\xf6\xa8\x2e\x3e\x9d\xf4\x66\xa5"
-			  "\x76\x12\x73\x44\x1a\x56\xd7\x72"
-			  "\x88\xcd\x21\x8c\x4c\x0f\xfe\xda"
-			  "\x95\xe0\x3a\xa6\xa5\x84\x46\xcd"
-			  "\xd5\x3e\x9d\x3a\xe2\x67\xe6\x60"
-			  "\x1a\xe2\x70\x85\x58\xc2\x1b\x09"
-			  "\xe1\xd7\x2c\xca\xad\xa8\x8f\xf9"
-			  "\xac\xb3\x0e\xdb\xca\x2e\xe2\xb8"
-			  "\x51\x71\xd9\x3c\x6c\xf1\x56\xf8"
-			  "\xea\x9c\xf1\xfb\x0c\xe6\xb7\x10"
-			  "\x1c\xf8\xa9\x7c\xe8\x53\x35\xc1"
-			  "\x90\x3e\x76\x4a\x74\xa4\x21\x2c"
-			  "\xf6\x2c\x4e\x0f\x94\x3a\x88\x2e"
-			  "\x41\x09\x6a\x33\x7d\xf6\xdd\x3f"
-			  "\x8d\x23\x31\x74\x84\xeb\x88\x6e"
-			  "\xcc\xb9\xbc\x22\x83\x19\x07\x22"
-			  "\xa5\x2d\xdf\xa5\xf3\x80\x85\x78"
-			  "\x84\x39\x6a\x6d\x6a\x99\x4f\xa5"
-			  "\x15\xfe\x46\xb0\xe4\x6c\xa5\x41"
-			  "\x3c\xce\x8f\x42\x60\x71\xa7\x75"
-			  "\x08\x40\x65\x8a\x82\xbf\xf5\x43"
-			  "\x71\x96\xa9\x4d\x44\x8a\x20\xbe"
-			  "\xfa\x4d\xbb\xc0\x7d\x31\x96\x65"
-			  "\xe7\x75\xe5\x3e\xfd\x92\x3b\xc9"
-			  "\x55\xbb\x16\x7e\xf7\xc2\x8c\xa4"
-			  "\x40\x1d\xe5\xef\x0e\xdf\xe4\x9a"
-			  "\x62\x73\x65\xfd\x46\x63\x25\x3d"
-			  "\x2b\xaf\xe5\x64\xfe\xa5\x5c\xcf"
-			  "\x24\xf3\xb4\xac\x64\xba\xdf\x4b"
-			  "\xc6\x96\x7d\x81\x2d\x8d\x97\xf7"
-			  "\xc5\x68\x77\x84\x32\x2b\xcc\x85"
-			  "\x74\x96\xf0\x12\x77\x61\xb9\xeb"
-			  "\x71\xaa\x82\xcb\x1c\xdb\x89\xc8"
-			  "\xc6\xb5\xe3\x5c\x7d\x39\x07\x24"
-			  "\xda\x39\x87\x45\xc0\x2b\xbb\x01"
-			  "\xac\xbc\x2a\x5c\x7f\xfc\xe8\xce"
-			  "\x6d\x9c\x6f\xed\xd3\xc1\xa1\xd6"
-			  "\xc5\x55\xa9\x66\x2f\xe1\xc8\x32"
-			  "\xa6\x5d\xa4\x3a\x98\x73\xe8\x45"
-			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
-			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
-			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
+		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
 		.rlen	= 512,
 		.rlen	= 512,
 		.also_non_np = 1,
 		.also_non_np = 1,
 		.np	= 3,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
 		.tap	= { 512 - 20, 4, 16 },
+	}
+};
+
+static const struct cipher_testvec speck64_enc_tv_template[] = {
+	{ /* Speck64/96 */
+		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
+			  "\x10\x11\x12\x13",
+		.klen	= 12,
+		.input	= "\x65\x61\x6e\x73\x20\x46\x61\x74",
+		.ilen	= 8,
+		.result	= "\x6c\x94\x75\x41\xec\x52\x79\x9f",
+		.rlen	= 8,
+	}, { /* Speck64/128 */
+		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
+			  "\x10\x11\x12\x13\x18\x19\x1a\x1b",
+		.klen	= 16,
+		.input	= "\x2d\x43\x75\x74\x74\x65\x72\x3b",
+		.ilen	= 8,
+		.result	= "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c",
+		.rlen	= 8,
 	},
 	},
 };
 };
 
 
-static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
-	/* Generated from AES-XTS test vectors */
+static const struct cipher_testvec speck64_dec_tv_template[] = {
+	{ /* Speck64/96 */
+		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
+			  "\x10\x11\x12\x13",
+		.klen	= 12,
+		.input	= "\x6c\x94\x75\x41\xec\x52\x79\x9f",
+		.ilen	= 8,
+		.result	= "\x65\x61\x6e\x73\x20\x46\x61\x74",
+		.rlen	= 8,
+	}, { /* Speck64/128 */
+		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
+			  "\x10\x11\x12\x13\x18\x19\x1a\x1b",
+		.klen	= 16,
+		.input	= "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c",
+		.ilen	= 8,
+		.result	= "\x2d\x43\x75\x74\x74\x65\x72\x3b",
+		.rlen	= 8,
+	},
+};
+
+/*
+ * Speck64-XTS test vectors, taken from the AES-XTS test vectors with the result
+ * recomputed with Speck64 as the cipher, and key lengths adjusted
+ */
+
+static const struct cipher_testvec speck64_xts_enc_tv_template[] = {
 	{
 	{
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 32,
+		.klen	= 24,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -13653,17 +15546,16 @@ static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.ilen	= 32,
 		.ilen	= 32,
-		.result	= "\xe1\x08\xb8\x1d\x2c\xf5\x33\x64"
-			  "\xc8\x12\x04\xc7\xb3\x70\xe8\xc4"
-			  "\x6a\x31\xc5\xf3\x00\xca\xb9\x16"
-			  "\xde\xe2\x77\x66\xf7\xfe\x62\x08",
+		.result	= "\x84\xaf\x54\x07\x19\xd4\x7c\xa6"
+			  "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2"
+			  "\x80\xf5\x72\xe7\xcd\xf0\x99\x22"
+			  "\x35\xa7\x2f\x06\xef\xdc\x51\xaa",
 		.rlen	= 32,
 		.rlen	= 32,
 	}, {
 	}, {
 		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
 		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
 			  "\x11\x11\x11\x11\x11\x11\x11\x11"
 			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
 			  "\x22\x22\x22\x22\x22\x22\x22\x22",
 			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
+		.klen	= 24,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
@@ -13671,17 +15563,16 @@ static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
 		.ilen	= 32,
 		.ilen	= 32,
-		.result	= "\x1a\x0a\x09\x5f\xcd\x07\x07\x98"
-			  "\x41\x86\x12\xaf\xb3\xd7\x68\x13"
-			  "\xed\x81\xcd\x06\x87\x43\x1a\xbb"
-			  "\x13\x3d\xd6\x1e\x2b\xe1\x77\xbe",
+		.result	= "\x12\x56\x73\xcd\x15\x87\xa8\x59"
+			  "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f"
+			  "\xb3\x12\x69\x7e\x36\xeb\x52\xff"
+			  "\x62\xdd\xba\x90\xb3\xe1\xee\x99",
 		.rlen	= 32,
 		.rlen	= 32,
 	}, {
 	}, {
 		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
 		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
 			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
 			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
 			  "\x22\x22\x22\x22\x22\x22\x22\x22",
 			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
+		.klen	= 24,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
@@ -13689,17 +15580,16 @@ static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
 		.ilen	= 32,
 		.ilen	= 32,
-		.result	= "\xf9\x9b\x28\xb8\x5c\xaf\x8c\x61"
-			  "\xb6\x1c\x81\x8f\x2c\x87\x60\x89"
-			  "\x0d\x8d\x7a\xe8\x60\x48\xcc\x86"
-			  "\xc1\x68\x45\xaa\x00\xe9\x24\xc5",
+		.result	= "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c"
+			  "\x27\x36\xc0\xbf\x5d\xea\x36\x37"
+			  "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b"
+			  "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34",
 		.rlen	= 32,
 		.rlen	= 32,
 	}, {
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95",
-		.klen	= 32,
+			  "\x31\x41\x59\x26\x53\x58\x97\x93",
+		.klen	= 24,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
@@ -13749,99 +15639,95 @@ static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
 			  "\x60\x61\x62\x63\x64\x65\x66\x67"
 			  "\x60\x61\x62\x63\x64\x65\x66\x67"
 			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
 			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
 			  "\x70\x71\x72\x73\x74\x75\x76\x77"
 			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\xfe\x47\x4a\xc8\x60\x7e\xb4\x8b"
-			  "\x0d\x10\xf4\xb0\x0d\xba\xf8\x53"
-			  "\x65\x6e\x38\x4b\xdb\xaa\xb1\x9e"
-			  "\x28\xca\xb0\x22\xb3\x85\x75\xf4"
-			  "\x00\x5c\x75\x14\x06\xd6\x25\x82"
-			  "\xe6\xcb\x08\xf7\x29\x90\x23\x8e"
-			  "\xa4\x68\x57\xe4\xf0\xd8\x32\xf3"
-			  "\x80\x51\x67\xb5\x0b\x85\x69\xe8"
-			  "\x19\xfe\xc4\xc7\x3e\xea\x90\xd3"
-			  "\x8f\xa3\xf2\x0a\xac\x17\x4b\xa0"
-			  "\x63\x5a\x16\x0f\xf0\xce\x66\x1f"
-			  "\x2c\x21\x07\xf1\xa4\x03\xa3\x44"
-			  "\x41\x61\x87\x5d\x6b\xb3\xef\xd4"
-			  "\xfc\xaa\x32\x7e\x55\x58\x04\x41"
-			  "\xc9\x07\x33\xc6\xa2\x68\xd6\x5a"
-			  "\x55\x79\x4b\x6f\xcf\x89\xb9\x19"
-			  "\xe5\x54\x13\x15\xb2\x1a\xfa\x15"
-			  "\xc2\xf0\x06\x59\xfa\xa0\x25\x05"
-			  "\x58\xfa\x43\x91\x16\x85\x40\xbb"
-			  "\x0d\x34\x4d\xc5\x1e\x20\xd5\x08"
-			  "\xcd\x22\x22\x41\x11\x9f\x6c\x7c"
-			  "\x8d\x57\xc9\xba\x57\xe8\x2c\xf7"
-			  "\xa0\x42\xa8\xde\xfc\xa3\xca\x98"
-			  "\x4b\x43\xb1\xce\x4b\xbf\x01\x67"
-			  "\x6e\x29\x60\xbd\x10\x14\x84\x82"
-			  "\x83\x82\x0c\x63\x73\x92\x02\x7c"
-			  "\x55\x37\x20\x80\x17\x51\xc8\xbc"
-			  "\x46\x02\xcb\x38\x07\x6d\xe2\x85"
-			  "\xaa\x29\xaf\x24\x58\x0d\xf0\x75"
-			  "\x08\x0a\xa5\x34\x25\x16\xf3\x74"
-			  "\xa7\x0b\x97\xbe\xc1\xa9\xdc\x29"
-			  "\x1a\x0a\x56\xc1\x1a\x91\x97\x8c"
-			  "\x0b\xc7\x16\xed\x5a\x22\xa6\x2e"
-			  "\x8c\x2b\x4f\x54\x76\x47\x53\x8e"
-			  "\xe8\x00\xec\x92\xb9\x55\xe6\xa2"
-			  "\xf3\xe2\x4f\x6a\x66\x60\xd0\x87"
-			  "\xe6\xd1\xcc\xe3\x6a\xc5\x2d\x21"
-			  "\xcc\x9d\x6a\xb6\x75\xaa\xe2\x19"
-			  "\x21\x9f\xa1\x5e\x4c\xfd\x72\xf9"
-			  "\x94\x4e\x63\xc7\xae\xfc\xed\x47"
-			  "\xe2\xfe\x7a\x63\x77\xfe\x97\x82"
-			  "\xb1\x10\x6e\x36\x1d\xe1\xc4\x80"
-			  "\xec\x69\x41\xec\xa7\x8a\xe0\x2f"
-			  "\xe3\x49\x26\xa2\x41\xb2\x08\x0f"
-			  "\x28\xb4\xa7\x39\xa1\x99\x2d\x1e"
-			  "\x43\x42\x35\xd0\xcf\xec\x77\x67"
-			  "\xb2\x3b\x9e\x1c\x35\xde\x4f\x5e"
-			  "\x73\x3f\x5d\x6f\x07\x4b\x2e\x50"
-			  "\xab\x6c\x6b\xff\xea\x00\x67\xaa"
-			  "\x0e\x82\x32\xdd\x3d\xb5\xe5\x76"
-			  "\x2b\x77\x3f\xbe\x12\x75\xfb\x92"
-			  "\xc6\x89\x67\x4d\xca\xf7\xd4\x50"
-			  "\xc0\x74\x47\xcc\xd9\x0a\xd4\xc6"
-			  "\x3b\x17\x2e\xe3\x35\xbb\x53\xb5"
-			  "\x86\xad\x51\xcc\xd5\x96\xb8\xdc"
-			  "\x03\x57\xe6\x98\x52\x2f\x61\x62"
-			  "\xc4\x5c\x9c\x36\x71\x07\xfb\x94"
-			  "\xe3\x02\xc4\x2b\x08\x75\xc7\x35"
-			  "\xfb\x2e\x88\x7b\xbb\x67\x00\xe1"
-			  "\xc9\xdd\x99\xb2\x13\x53\x1a\x4e"
-			  "\x76\x87\x19\x04\x1a\x2f\x38\x3e"
-			  "\xef\x91\x64\x1d\x18\x07\x4e\x31"
-			  "\x88\x21\x7c\xb0\xa5\x12\x4c\x3c"
-			  "\xb0\x20\xbd\xda\xdf\xf9\x7c\xdd",
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ilen	= 512,
+		.result	= "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e"
+			  "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09"
+			  "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3"
+			  "\x11\xc7\x39\x96\xd0\x95\xf4\x56"
+			  "\xf4\xdd\x03\x38\x01\x44\x2c\xcf"
+			  "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66"
+			  "\xfe\x3d\xc6\xfb\x01\x23\x51\x43"
+			  "\xd5\xd2\x13\x86\x94\x34\xe9\x62"
+			  "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef"
+			  "\x76\x35\x04\x3f\xdb\x23\x9d\x0b"
+			  "\x85\x42\xb9\x02\xd6\xcc\xdb\x96"
+			  "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d"
+			  "\xae\xd2\x04\xd5\xda\xc1\x7e\x24"
+			  "\x8c\x73\xbe\x48\x7e\xcf\x65\x28"
+			  "\x29\xe5\xbe\x54\x30\xcb\x46\x95"
+			  "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe"
+			  "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69"
+			  "\xa1\x09\x95\x71\x26\xe9\xc4\xdf"
+			  "\xe6\x31\xc3\x46\xda\xaf\x0b\x41"
+			  "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3"
+			  "\x82\xc0\x37\x27\xfc\x91\xa7\x05"
+			  "\xfb\xc5\xdc\x2b\x74\x96\x48\x43"
+			  "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f"
+			  "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a"
+			  "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c"
+			  "\x07\xff\xf3\x72\x74\x48\xb5\x40"
+			  "\x50\xb5\xdd\x90\x43\x31\x18\x15"
+			  "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a"
+			  "\x29\x93\x90\x8b\xda\x07\xf0\x35"
+			  "\x6d\x90\x88\x09\x4e\x83\xf5\x5b"
+			  "\x94\x12\xbb\x33\x27\x1d\x3f\x23"
+			  "\x51\xa8\x7c\x07\xa2\xae\x77\xa6"
+			  "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f"
+			  "\x66\xdd\xcd\x75\x24\x8b\x33\xf7"
+			  "\x20\xdb\x83\x9b\x4f\x11\x63\x6e"
+			  "\xcf\x37\xef\xc9\x11\x01\x5c\x45"
+			  "\x32\x99\x7c\x3c\x9e\x42\x89\xe3"
+			  "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05"
+			  "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc"
+			  "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d"
+			  "\xa0\xa8\x89\x3b\x73\x39\xa5\x94"
+			  "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89"
+			  "\x10\xff\xaf\xef\xca\xdd\x4f\x80"
+			  "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7"
+			  "\x33\xca\x00\x8b\x8b\x3f\xea\xec"
+			  "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f"
+			  "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5"
+			  "\x64\xa3\xf1\x1a\x76\x28\xcc\x35"
+			  "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b"
+			  "\xc7\x1b\x53\x17\x02\xea\xd1\xad"
+			  "\x13\x51\x73\xc0\xa0\xb2\x05\x32"
+			  "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19"
+			  "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d"
+			  "\x59\xda\xee\x1a\x22\x18\xda\x0d"
+			  "\x88\x0f\x55\x8b\x72\x62\xfd\xc1"
+			  "\x69\x13\xcd\x0d\x5f\xc1\x09\x52"
+			  "\xee\xd6\xe3\x84\x4d\xee\xf6\x88"
+			  "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f"
+			  "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54"
+			  "\x7d\x69\x8d\x00\x62\x77\x0d\x14"
+			  "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3"
+			  "\x50\xf7\x5f\xf4\xc2\xca\x41\x97"
+			  "\x37\xbe\x75\x74\xcd\xf0\x75\x6e"
+			  "\x25\x23\x94\xbd\xda\x8d\xb0\xd4",
 		.rlen	= 512,
 		.rlen	= 512,
 	}, {
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
 			  "\x62\x49\x77\x57\x24\x70\x93\x69"
 			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95"
-			  "\x02\x88\x41\x97\x16\x93\x99\x37"
-			  "\x51\x05\x82\x09\x74\x94\x45\x92",
-		.klen	= 64,
+			  "\x99\x59\x57\x49\x66\x96\x76\x27",
+		.klen	= 32,
 		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
 		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
@@ -13909,92 +15795,89 @@ static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
 		.ilen	= 512,
 		.ilen	= 512,
-		.result	= "\x2b\xc9\xb4\x6b\x10\x94\xa9\x32"
-			  "\xaa\xb0\x20\xc6\x44\x3d\x74\x1f"
-			  "\x75\x01\xa7\xf6\xf5\xf7\x62\x1b"
-			  "\x80\x1b\x82\xcb\x01\x59\x91\x7f"
-			  "\x80\x3a\x98\xf0\xd2\xca\xc4\xc3"
-			  "\x34\xfd\xe6\x11\xf9\x33\x45\x12"
-			  "\x48\xc5\x8c\x25\xf1\xc5\xc5\x23"
-			  "\xd3\x44\xb4\x73\xd5\x04\xc0\xb7"
-			  "\xca\x2f\xf5\xcd\xc5\xb4\xdd\xb0"
-			  "\xf4\x60\xe8\xfb\xc6\x9c\xc5\x78"
-			  "\xcd\xec\x7d\xdc\x19\x9c\x72\x64"
-			  "\x63\x0b\x38\x2e\x76\xdd\x2d\x36"
-			  "\x49\xb0\x1d\xea\x78\x9e\x00\xca"
-			  "\x20\xcc\x1b\x1e\x98\x74\xab\xed"
-			  "\x79\xf7\xd0\x6c\xd8\x93\x80\x29"
-			  "\xac\xa5\x5e\x34\xa9\xab\xa0\x55"
-			  "\x9a\xea\xaa\x95\x4d\x7b\xfe\x46"
-			  "\x26\x8a\xfd\x88\xa2\xa8\xa6\xae"
-			  "\x25\x42\x17\xbf\x76\x8f\x1c\x3d"
-			  "\xec\x9a\xda\x64\x96\xb5\x61\xff"
-			  "\x99\xeb\x12\x96\x85\x82\x9d\xd5"
-			  "\x81\x85\x14\xa8\x59\xac\x8c\x94"
-			  "\xbb\x3b\x85\x2b\xdf\xb3\x0c\xba"
-			  "\x82\xc6\x4d\xca\x86\xea\x53\x28"
-			  "\x4c\xe0\x4e\x31\xe3\x73\x2f\x79"
-			  "\x9d\x42\xe1\x03\xe3\x8b\xc4\xff"
-			  "\x05\xca\x81\x7b\xda\xa2\xde\x63"
-			  "\x3a\x10\xbe\xc2\xac\x32\xc4\x05"
-			  "\x47\x7e\xef\x67\xe2\x5f\x5b\xae"
-			  "\xed\xf1\x70\x34\x16\x9a\x07\x7b"
-			  "\xf2\x25\x2b\xb0\xf8\x3c\x15\x9a"
-			  "\xa6\x59\x55\x5f\xc1\xf4\x1e\xcd"
-			  "\x93\x1f\x06\xba\xd4\x9a\x22\x69"
-			  "\xfa\x8e\x95\x0d\xf3\x23\x59\x2c"
-			  "\xfe\x00\xba\xf0\x0e\xbc\x6d\xd6"
-			  "\x62\xf0\x7a\x0e\x83\x3e\xdb\x32"
-			  "\xfd\x43\x7d\xda\x42\x51\x87\x43"
-			  "\x9d\xf9\xef\xf4\x30\x97\xf8\x09"
-			  "\x88\xfc\x3f\x93\x70\xc1\x4a\xec"
-			  "\x27\x5f\x11\xac\x71\xc7\x48\x46"
-			  "\x2f\xf9\xdf\x8d\x9f\xf7\x2e\x56"
-			  "\x0d\x4e\xb0\x32\x76\xce\x86\x81"
-			  "\xcd\xdf\xe4\x00\xbf\xfd\x5f\x24"
-			  "\xaf\xf7\x9a\xde\xff\x18\xac\x14"
-			  "\x90\xc5\x01\x39\x34\x0f\x24\xf3"
-			  "\x13\x2f\x5e\x4f\x30\x9a\x36\x40"
-			  "\xec\xea\xbc\xcd\x9e\x0e\x5b\x23"
-			  "\x50\x88\x97\x40\x69\xb1\x37\xf5"
-			  "\xc3\x15\xf9\x3f\xb7\x79\x64\xe8"
-			  "\x7b\x10\x20\xb9\x2b\x46\x83\x5b"
-			  "\xd8\x39\xfc\xe4\xfa\x88\x52\xf2"
-			  "\x72\xb0\x97\x4e\x89\xb3\x48\x00"
-			  "\xc1\x16\x73\x50\x77\xba\xa6\x65"
-			  "\x20\x2d\xb0\x02\x27\x89\xda\x99"
-			  "\x45\xfb\xe9\xd3\x1d\x39\x2f\xd6"
-			  "\x2a\xda\x09\x12\x11\xaf\xe6\x57"
-			  "\x01\x04\x8a\xff\x86\x8b\xac\xf8"
-			  "\xee\xe4\x1c\x98\x5b\xcf\x6b\x76"
-			  "\xa3\x0e\x33\x74\x40\x18\x39\x72"
-			  "\x66\x50\x31\xfd\x70\xdf\xe8\x51"
-			  "\x96\x21\x36\xb2\x9b\xfa\x85\xd1"
-			  "\x30\x05\xc8\x92\x98\x80\xff\x7a"
-			  "\xaf\x43\x0b\xc5\x20\x41\x92\x20"
-			  "\xd4\xa0\x91\x98\x11\x5f\x4d\xb1",
+		.result	= "\x55\xed\x71\xd3\x02\x8e\x15\x3b"
+			  "\xc6\x71\x29\x2d\x3e\x89\x9f\x59"
+			  "\x68\x6a\xcc\x8a\x56\x97\xf3\x95"
+			  "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c"
+			  "\x78\x16\xea\x80\xdb\x33\x75\x94"
+			  "\xf9\x29\xc4\x2b\x76\x75\x97\xc7"
+			  "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b"
+			  "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee"
+			  "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a"
+			  "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c"
+			  "\xf5\xec\x32\x74\xa3\xb8\x03\x88"
+			  "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f"
+			  "\x84\x5e\x46\xed\x20\x89\xb6\x44"
+			  "\x8d\xd0\xed\x54\x47\x16\xbe\x95"
+			  "\x8a\xb3\x6b\x72\xc4\x32\x52\x13"
+			  "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6"
+			  "\x44\x18\xdd\x8c\x6e\xca\x6e\x45"
+			  "\x8f\x1e\x10\x07\x57\x25\x98\x7b"
+			  "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8"
+			  "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb"
+			  "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff"
+			  "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e"
+			  "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d"
+			  "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65"
+			  "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a"
+			  "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a"
+			  "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78"
+			  "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3"
+			  "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e"
+			  "\x35\x10\x30\x82\x0d\xe7\xc5\x9b"
+			  "\xde\x44\x18\xbd\x9f\xd1\x45\xa9"
+			  "\x7b\x7a\x4a\xad\x35\x65\x27\xca"
+			  "\xb2\xc3\xd4\x9b\x71\x86\x70\xee"
+			  "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf"
+			  "\xfc\x42\xc8\x31\x59\xbe\x16\x60"
+			  "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14"
+			  "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef"
+			  "\x52\x7f\x29\x51\x94\x20\x67\x3c"
+			  "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63"
+			  "\xe7\xff\x73\x25\xd1\xdd\x96\x8a"
+			  "\x98\x52\x6d\xf3\xac\x3e\xf2\x18"
+			  "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed"
+			  "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e"
+			  "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad"
+			  "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa"
+			  "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81"
+			  "\x65\x53\x0f\x41\x11\xbd\x98\x99"
+			  "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d"
+			  "\x84\x98\xf9\x34\xed\x33\x2a\x1f"
+			  "\x82\xed\xc1\x73\x98\xd3\x02\xdc"
+			  "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76"
+			  "\x63\x51\x34\x9d\x96\x12\xae\xce"
+			  "\x83\xc9\x76\x5e\xa4\x1b\x53\x37"
+			  "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d"
+			  "\x54\x27\x74\xbb\x10\x86\x57\x46"
+			  "\x68\xe1\xed\x14\xe7\x9d\xfc\x84"
+			  "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf"
+			  "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d"
+			  "\x7b\x4f\x38\x55\x36\x71\x64\xc1"
+			  "\xfc\x5c\x75\x52\x33\x02\x18\xf8"
+			  "\x17\xe1\x2b\xc2\x43\x39\xbd\x76"
+			  "\x9b\x63\x76\x32\x2f\x19\x72\x10"
+			  "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5"
+			  "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c",
 		.rlen	= 512,
 		.rlen	= 512,
 		.also_non_np = 1,
 		.also_non_np = 1,
 		.np	= 3,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
 		.tap	= { 512 - 20, 4, 16 },
-	},
+	}
 };
 };
 
 
-static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
-	/* Generated from AES-XTS test vectors */
-	/* same as enc vectors with input and result reversed */
+static const struct cipher_testvec speck64_xts_dec_tv_template[] = {
 	{
 	{
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 32,
+		.klen	= 24,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xe1\x08\xb8\x1d\x2c\xf5\x33\x64"
-			  "\xc8\x12\x04\xc7\xb3\x70\xe8\xc4"
-			  "\x6a\x31\xc5\xf3\x00\xca\xb9\x16"
-			  "\xde\xe2\x77\x66\xf7\xfe\x62\x08",
+		.input	= "\x84\xaf\x54\x07\x19\xd4\x7c\xa6"
+			  "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2"
+			  "\x80\xf5\x72\xe7\xcd\xf0\x99\x22"
+			  "\x35\xa7\x2f\x06\xef\xdc\x51\xaa",
 		.ilen	= 32,
 		.ilen	= 32,
 		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -14004,15 +15887,14 @@ static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
 	}, {
 	}, {
 		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
 		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
 			  "\x11\x11\x11\x11\x11\x11\x11\x11"
 			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
 			  "\x22\x22\x22\x22\x22\x22\x22\x22",
 			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
+		.klen	= 24,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x1a\x0a\x09\x5f\xcd\x07\x07\x98"
-			  "\x41\x86\x12\xaf\xb3\xd7\x68\x13"
-			  "\xed\x81\xcd\x06\x87\x43\x1a\xbb"
-			  "\x13\x3d\xd6\x1e\x2b\xe1\x77\xbe",
+		.input	= "\x12\x56\x73\xcd\x15\x87\xa8\x59"
+			  "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f"
+			  "\xb3\x12\x69\x7e\x36\xeb\x52\xff"
+			  "\x62\xdd\xba\x90\xb3\xe1\xee\x99",
 		.ilen	= 32,
 		.ilen	= 32,
 		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
@@ -14022,15 +15904,14 @@ static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
 	}, {
 	}, {
 		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
 		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
 			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
 			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
 			  "\x22\x22\x22\x22\x22\x22\x22\x22",
 			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
+		.klen	= 24,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xf9\x9b\x28\xb8\x5c\xaf\x8c\x61"
-			  "\xb6\x1c\x81\x8f\x2c\x87\x60\x89"
-			  "\x0d\x8d\x7a\xe8\x60\x48\xcc\x86"
-			  "\xc1\x68\x45\xaa\x00\xe9\x24\xc5",
+		.input	= "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c"
+			  "\x27\x36\xc0\xbf\x5d\xea\x36\x37"
+			  "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b"
+			  "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34",
 		.ilen	= 32,
 		.ilen	= 32,
 		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
@@ -14040,75 +15921,74 @@ static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
 	}, {
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95",
-		.klen	= 32,
+			  "\x31\x41\x59\x26\x53\x58\x97\x93",
+		.klen	= 24,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xfe\x47\x4a\xc8\x60\x7e\xb4\x8b"
-			  "\x0d\x10\xf4\xb0\x0d\xba\xf8\x53"
-			  "\x65\x6e\x38\x4b\xdb\xaa\xb1\x9e"
-			  "\x28\xca\xb0\x22\xb3\x85\x75\xf4"
-			  "\x00\x5c\x75\x14\x06\xd6\x25\x82"
-			  "\xe6\xcb\x08\xf7\x29\x90\x23\x8e"
-			  "\xa4\x68\x57\xe4\xf0\xd8\x32\xf3"
-			  "\x80\x51\x67\xb5\x0b\x85\x69\xe8"
-			  "\x19\xfe\xc4\xc7\x3e\xea\x90\xd3"
-			  "\x8f\xa3\xf2\x0a\xac\x17\x4b\xa0"
-			  "\x63\x5a\x16\x0f\xf0\xce\x66\x1f"
-			  "\x2c\x21\x07\xf1\xa4\x03\xa3\x44"
-			  "\x41\x61\x87\x5d\x6b\xb3\xef\xd4"
-			  "\xfc\xaa\x32\x7e\x55\x58\x04\x41"
-			  "\xc9\x07\x33\xc6\xa2\x68\xd6\x5a"
-			  "\x55\x79\x4b\x6f\xcf\x89\xb9\x19"
-			  "\xe5\x54\x13\x15\xb2\x1a\xfa\x15"
-			  "\xc2\xf0\x06\x59\xfa\xa0\x25\x05"
-			  "\x58\xfa\x43\x91\x16\x85\x40\xbb"
-			  "\x0d\x34\x4d\xc5\x1e\x20\xd5\x08"
-			  "\xcd\x22\x22\x41\x11\x9f\x6c\x7c"
-			  "\x8d\x57\xc9\xba\x57\xe8\x2c\xf7"
-			  "\xa0\x42\xa8\xde\xfc\xa3\xca\x98"
-			  "\x4b\x43\xb1\xce\x4b\xbf\x01\x67"
-			  "\x6e\x29\x60\xbd\x10\x14\x84\x82"
-			  "\x83\x82\x0c\x63\x73\x92\x02\x7c"
-			  "\x55\x37\x20\x80\x17\x51\xc8\xbc"
-			  "\x46\x02\xcb\x38\x07\x6d\xe2\x85"
-			  "\xaa\x29\xaf\x24\x58\x0d\xf0\x75"
-			  "\x08\x0a\xa5\x34\x25\x16\xf3\x74"
-			  "\xa7\x0b\x97\xbe\xc1\xa9\xdc\x29"
-			  "\x1a\x0a\x56\xc1\x1a\x91\x97\x8c"
-			  "\x0b\xc7\x16\xed\x5a\x22\xa6\x2e"
-			  "\x8c\x2b\x4f\x54\x76\x47\x53\x8e"
-			  "\xe8\x00\xec\x92\xb9\x55\xe6\xa2"
-			  "\xf3\xe2\x4f\x6a\x66\x60\xd0\x87"
-			  "\xe6\xd1\xcc\xe3\x6a\xc5\x2d\x21"
-			  "\xcc\x9d\x6a\xb6\x75\xaa\xe2\x19"
-			  "\x21\x9f\xa1\x5e\x4c\xfd\x72\xf9"
-			  "\x94\x4e\x63\xc7\xae\xfc\xed\x47"
-			  "\xe2\xfe\x7a\x63\x77\xfe\x97\x82"
-			  "\xb1\x10\x6e\x36\x1d\xe1\xc4\x80"
-			  "\xec\x69\x41\xec\xa7\x8a\xe0\x2f"
-			  "\xe3\x49\x26\xa2\x41\xb2\x08\x0f"
-			  "\x28\xb4\xa7\x39\xa1\x99\x2d\x1e"
-			  "\x43\x42\x35\xd0\xcf\xec\x77\x67"
-			  "\xb2\x3b\x9e\x1c\x35\xde\x4f\x5e"
-			  "\x73\x3f\x5d\x6f\x07\x4b\x2e\x50"
-			  "\xab\x6c\x6b\xff\xea\x00\x67\xaa"
-			  "\x0e\x82\x32\xdd\x3d\xb5\xe5\x76"
-			  "\x2b\x77\x3f\xbe\x12\x75\xfb\x92"
-			  "\xc6\x89\x67\x4d\xca\xf7\xd4\x50"
-			  "\xc0\x74\x47\xcc\xd9\x0a\xd4\xc6"
-			  "\x3b\x17\x2e\xe3\x35\xbb\x53\xb5"
-			  "\x86\xad\x51\xcc\xd5\x96\xb8\xdc"
-			  "\x03\x57\xe6\x98\x52\x2f\x61\x62"
-			  "\xc4\x5c\x9c\x36\x71\x07\xfb\x94"
-			  "\xe3\x02\xc4\x2b\x08\x75\xc7\x35"
-			  "\xfb\x2e\x88\x7b\xbb\x67\x00\xe1"
-			  "\xc9\xdd\x99\xb2\x13\x53\x1a\x4e"
-			  "\x76\x87\x19\x04\x1a\x2f\x38\x3e"
-			  "\xef\x91\x64\x1d\x18\x07\x4e\x31"
-			  "\x88\x21\x7c\xb0\xa5\x12\x4c\x3c"
-			  "\xb0\x20\xbd\xda\xdf\xf9\x7c\xdd",
+		.input	= "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e"
+			  "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09"
+			  "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3"
+			  "\x11\xc7\x39\x96\xd0\x95\xf4\x56"
+			  "\xf4\xdd\x03\x38\x01\x44\x2c\xcf"
+			  "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66"
+			  "\xfe\x3d\xc6\xfb\x01\x23\x51\x43"
+			  "\xd5\xd2\x13\x86\x94\x34\xe9\x62"
+			  "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef"
+			  "\x76\x35\x04\x3f\xdb\x23\x9d\x0b"
+			  "\x85\x42\xb9\x02\xd6\xcc\xdb\x96"
+			  "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d"
+			  "\xae\xd2\x04\xd5\xda\xc1\x7e\x24"
+			  "\x8c\x73\xbe\x48\x7e\xcf\x65\x28"
+			  "\x29\xe5\xbe\x54\x30\xcb\x46\x95"
+			  "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe"
+			  "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69"
+			  "\xa1\x09\x95\x71\x26\xe9\xc4\xdf"
+			  "\xe6\x31\xc3\x46\xda\xaf\x0b\x41"
+			  "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3"
+			  "\x82\xc0\x37\x27\xfc\x91\xa7\x05"
+			  "\xfb\xc5\xdc\x2b\x74\x96\x48\x43"
+			  "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f"
+			  "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a"
+			  "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c"
+			  "\x07\xff\xf3\x72\x74\x48\xb5\x40"
+			  "\x50\xb5\xdd\x90\x43\x31\x18\x15"
+			  "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a"
+			  "\x29\x93\x90\x8b\xda\x07\xf0\x35"
+			  "\x6d\x90\x88\x09\x4e\x83\xf5\x5b"
+			  "\x94\x12\xbb\x33\x27\x1d\x3f\x23"
+			  "\x51\xa8\x7c\x07\xa2\xae\x77\xa6"
+			  "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f"
+			  "\x66\xdd\xcd\x75\x24\x8b\x33\xf7"
+			  "\x20\xdb\x83\x9b\x4f\x11\x63\x6e"
+			  "\xcf\x37\xef\xc9\x11\x01\x5c\x45"
+			  "\x32\x99\x7c\x3c\x9e\x42\x89\xe3"
+			  "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05"
+			  "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc"
+			  "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d"
+			  "\xa0\xa8\x89\x3b\x73\x39\xa5\x94"
+			  "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89"
+			  "\x10\xff\xaf\xef\xca\xdd\x4f\x80"
+			  "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7"
+			  "\x33\xca\x00\x8b\x8b\x3f\xea\xec"
+			  "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f"
+			  "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5"
+			  "\x64\xa3\xf1\x1a\x76\x28\xcc\x35"
+			  "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b"
+			  "\xc7\x1b\x53\x17\x02\xea\xd1\xad"
+			  "\x13\x51\x73\xc0\xa0\xb2\x05\x32"
+			  "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19"
+			  "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d"
+			  "\x59\xda\xee\x1a\x22\x18\xda\x0d"
+			  "\x88\x0f\x55\x8b\x72\x62\xfd\xc1"
+			  "\x69\x13\xcd\x0d\x5f\xc1\x09\x52"
+			  "\xee\xd6\xe3\x84\x4d\xee\xf6\x88"
+			  "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f"
+			  "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54"
+			  "\x7d\x69\x8d\x00\x62\x77\x0d\x14"
+			  "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3"
+			  "\x50\xf7\x5f\xf4\xc2\xca\x41\x97"
+			  "\x37\xbe\x75\x74\xcd\xf0\x75\x6e"
+			  "\x25\x23\x94\xbd\xda\x8d\xb0\xd4",
 		.ilen	= 512,
 		.ilen	= 512,
 		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
@@ -14179,78 +16059,74 @@ static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
 			  "\x62\x49\x77\x57\x24\x70\x93\x69"
 			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95"
-			  "\x02\x88\x41\x97\x16\x93\x99\x37"
-			  "\x51\x05\x82\x09\x74\x94\x45\x92",
-		.klen	= 64,
+			  "\x99\x59\x57\x49\x66\x96\x76\x27",
+		.klen	= 32,
 		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
 		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x2b\xc9\xb4\x6b\x10\x94\xa9\x32"
-			  "\xaa\xb0\x20\xc6\x44\x3d\x74\x1f"
-			  "\x75\x01\xa7\xf6\xf5\xf7\x62\x1b"
-			  "\x80\x1b\x82\xcb\x01\x59\x91\x7f"
-			  "\x80\x3a\x98\xf0\xd2\xca\xc4\xc3"
-			  "\x34\xfd\xe6\x11\xf9\x33\x45\x12"
-			  "\x48\xc5\x8c\x25\xf1\xc5\xc5\x23"
-			  "\xd3\x44\xb4\x73\xd5\x04\xc0\xb7"
-			  "\xca\x2f\xf5\xcd\xc5\xb4\xdd\xb0"
-			  "\xf4\x60\xe8\xfb\xc6\x9c\xc5\x78"
-			  "\xcd\xec\x7d\xdc\x19\x9c\x72\x64"
-			  "\x63\x0b\x38\x2e\x76\xdd\x2d\x36"
-			  "\x49\xb0\x1d\xea\x78\x9e\x00\xca"
-			  "\x20\xcc\x1b\x1e\x98\x74\xab\xed"
-			  "\x79\xf7\xd0\x6c\xd8\x93\x80\x29"
-			  "\xac\xa5\x5e\x34\xa9\xab\xa0\x55"
-			  "\x9a\xea\xaa\x95\x4d\x7b\xfe\x46"
-			  "\x26\x8a\xfd\x88\xa2\xa8\xa6\xae"
-			  "\x25\x42\x17\xbf\x76\x8f\x1c\x3d"
-			  "\xec\x9a\xda\x64\x96\xb5\x61\xff"
-			  "\x99\xeb\x12\x96\x85\x82\x9d\xd5"
-			  "\x81\x85\x14\xa8\x59\xac\x8c\x94"
-			  "\xbb\x3b\x85\x2b\xdf\xb3\x0c\xba"
-			  "\x82\xc6\x4d\xca\x86\xea\x53\x28"
-			  "\x4c\xe0\x4e\x31\xe3\x73\x2f\x79"
-			  "\x9d\x42\xe1\x03\xe3\x8b\xc4\xff"
-			  "\x05\xca\x81\x7b\xda\xa2\xde\x63"
-			  "\x3a\x10\xbe\xc2\xac\x32\xc4\x05"
-			  "\x47\x7e\xef\x67\xe2\x5f\x5b\xae"
-			  "\xed\xf1\x70\x34\x16\x9a\x07\x7b"
-			  "\xf2\x25\x2b\xb0\xf8\x3c\x15\x9a"
-			  "\xa6\x59\x55\x5f\xc1\xf4\x1e\xcd"
-			  "\x93\x1f\x06\xba\xd4\x9a\x22\x69"
-			  "\xfa\x8e\x95\x0d\xf3\x23\x59\x2c"
-			  "\xfe\x00\xba\xf0\x0e\xbc\x6d\xd6"
-			  "\x62\xf0\x7a\x0e\x83\x3e\xdb\x32"
-			  "\xfd\x43\x7d\xda\x42\x51\x87\x43"
-			  "\x9d\xf9\xef\xf4\x30\x97\xf8\x09"
-			  "\x88\xfc\x3f\x93\x70\xc1\x4a\xec"
-			  "\x27\x5f\x11\xac\x71\xc7\x48\x46"
-			  "\x2f\xf9\xdf\x8d\x9f\xf7\x2e\x56"
-			  "\x0d\x4e\xb0\x32\x76\xce\x86\x81"
-			  "\xcd\xdf\xe4\x00\xbf\xfd\x5f\x24"
-			  "\xaf\xf7\x9a\xde\xff\x18\xac\x14"
-			  "\x90\xc5\x01\x39\x34\x0f\x24\xf3"
-			  "\x13\x2f\x5e\x4f\x30\x9a\x36\x40"
-			  "\xec\xea\xbc\xcd\x9e\x0e\x5b\x23"
-			  "\x50\x88\x97\x40\x69\xb1\x37\xf5"
-			  "\xc3\x15\xf9\x3f\xb7\x79\x64\xe8"
-			  "\x7b\x10\x20\xb9\x2b\x46\x83\x5b"
-			  "\xd8\x39\xfc\xe4\xfa\x88\x52\xf2"
-			  "\x72\xb0\x97\x4e\x89\xb3\x48\x00"
-			  "\xc1\x16\x73\x50\x77\xba\xa6\x65"
-			  "\x20\x2d\xb0\x02\x27\x89\xda\x99"
-			  "\x45\xfb\xe9\xd3\x1d\x39\x2f\xd6"
-			  "\x2a\xda\x09\x12\x11\xaf\xe6\x57"
-			  "\x01\x04\x8a\xff\x86\x8b\xac\xf8"
-			  "\xee\xe4\x1c\x98\x5b\xcf\x6b\x76"
-			  "\xa3\x0e\x33\x74\x40\x18\x39\x72"
-			  "\x66\x50\x31\xfd\x70\xdf\xe8\x51"
-			  "\x96\x21\x36\xb2\x9b\xfa\x85\xd1"
-			  "\x30\x05\xc8\x92\x98\x80\xff\x7a"
-			  "\xaf\x43\x0b\xc5\x20\x41\x92\x20"
-			  "\xd4\xa0\x91\x98\x11\x5f\x4d\xb1",
+		.input	= "\x55\xed\x71\xd3\x02\x8e\x15\x3b"
+			  "\xc6\x71\x29\x2d\x3e\x89\x9f\x59"
+			  "\x68\x6a\xcc\x8a\x56\x97\xf3\x95"
+			  "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c"
+			  "\x78\x16\xea\x80\xdb\x33\x75\x94"
+			  "\xf9\x29\xc4\x2b\x76\x75\x97\xc7"
+			  "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b"
+			  "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee"
+			  "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a"
+			  "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c"
+			  "\xf5\xec\x32\x74\xa3\xb8\x03\x88"
+			  "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f"
+			  "\x84\x5e\x46\xed\x20\x89\xb6\x44"
+			  "\x8d\xd0\xed\x54\x47\x16\xbe\x95"
+			  "\x8a\xb3\x6b\x72\xc4\x32\x52\x13"
+			  "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6"
+			  "\x44\x18\xdd\x8c\x6e\xca\x6e\x45"
+			  "\x8f\x1e\x10\x07\x57\x25\x98\x7b"
+			  "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8"
+			  "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb"
+			  "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff"
+			  "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e"
+			  "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d"
+			  "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65"
+			  "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a"
+			  "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a"
+			  "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78"
+			  "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3"
+			  "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e"
+			  "\x35\x10\x30\x82\x0d\xe7\xc5\x9b"
+			  "\xde\x44\x18\xbd\x9f\xd1\x45\xa9"
+			  "\x7b\x7a\x4a\xad\x35\x65\x27\xca"
+			  "\xb2\xc3\xd4\x9b\x71\x86\x70\xee"
+			  "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf"
+			  "\xfc\x42\xc8\x31\x59\xbe\x16\x60"
+			  "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14"
+			  "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef"
+			  "\x52\x7f\x29\x51\x94\x20\x67\x3c"
+			  "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63"
+			  "\xe7\xff\x73\x25\xd1\xdd\x96\x8a"
+			  "\x98\x52\x6d\xf3\xac\x3e\xf2\x18"
+			  "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed"
+			  "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e"
+			  "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad"
+			  "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa"
+			  "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81"
+			  "\x65\x53\x0f\x41\x11\xbd\x98\x99"
+			  "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d"
+			  "\x84\x98\xf9\x34\xed\x33\x2a\x1f"
+			  "\x82\xed\xc1\x73\x98\xd3\x02\xdc"
+			  "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76"
+			  "\x63\x51\x34\x9d\x96\x12\xae\xce"
+			  "\x83\xc9\x76\x5e\xa4\x1b\x53\x37"
+			  "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d"
+			  "\x54\x27\x74\xbb\x10\x86\x57\x46"
+			  "\x68\xe1\xed\x14\xe7\x9d\xfc\x84"
+			  "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf"
+			  "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d"
+			  "\x7b\x4f\x38\x55\x36\x71\x64\xc1"
+			  "\xfc\x5c\x75\x52\x33\x02\x18\xf8"
+			  "\x17\xe1\x2b\xc2\x43\x39\xbd\x76"
+			  "\x9b\x63\x76\x32\x2f\x19\x72\x10"
+			  "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5"
+			  "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c",
 		.ilen	= 512,
 		.ilen	= 512,
 		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
@@ -14320,7 +16196,7 @@ static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
 		.also_non_np = 1,
 		.also_non_np = 1,
 		.np	= 3,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
 		.tap	= { 512 - 20, 4, 16 },
-	},
+	}
 };
 };
 
 
 /* Cast6 test vectors from RFC 2612 */
 /* Cast6 test vectors from RFC 2612 */

+ 0 - 72
crypto/xts.c

@@ -357,78 +357,6 @@ static int decrypt(struct skcipher_request *req)
 	return do_decrypt(req, init_crypt(req, decrypt_done));
 	return do_decrypt(req, init_crypt(req, decrypt_done));
 }
 }
 
 
-int xts_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
-	      struct scatterlist *ssrc, unsigned int nbytes,
-	      struct xts_crypt_req *req)
-{
-	const unsigned int bsize = XTS_BLOCK_SIZE;
-	const unsigned int max_blks = req->tbuflen / bsize;
-	struct blkcipher_walk walk;
-	unsigned int nblocks;
-	le128 *src, *dst, *t;
-	le128 *t_buf = req->tbuf;
-	int err, i;
-
-	BUG_ON(max_blks < 1);
-
-	blkcipher_walk_init(&walk, sdst, ssrc, nbytes);
-
-	err = blkcipher_walk_virt(desc, &walk);
-	nbytes = walk.nbytes;
-	if (!nbytes)
-		return err;
-
-	nblocks = min(nbytes / bsize, max_blks);
-	src = (le128 *)walk.src.virt.addr;
-	dst = (le128 *)walk.dst.virt.addr;
-
-	/* calculate first value of T */
-	req->tweak_fn(req->tweak_ctx, (u8 *)&t_buf[0], walk.iv);
-
-	i = 0;
-	goto first;
-
-	for (;;) {
-		do {
-			for (i = 0; i < nblocks; i++) {
-				gf128mul_x_ble(&t_buf[i], t);
-first:
-				t = &t_buf[i];
-
-				/* PP <- T xor P */
-				le128_xor(dst + i, t, src + i);
-			}
-
-			/* CC <- E(Key2,PP) */
-			req->crypt_fn(req->crypt_ctx, (u8 *)dst,
-				      nblocks * bsize);
-
-			/* C <- T xor CC */
-			for (i = 0; i < nblocks; i++)
-				le128_xor(dst + i, dst + i, &t_buf[i]);
-
-			src += nblocks;
-			dst += nblocks;
-			nbytes -= nblocks * bsize;
-			nblocks = min(nbytes / bsize, max_blks);
-		} while (nblocks > 0);
-
-		*(le128 *)walk.iv = *t;
-
-		err = blkcipher_walk_done(desc, &walk, nbytes);
-		nbytes = walk.nbytes;
-		if (!nbytes)
-			break;
-
-		nblocks = min(nbytes / bsize, max_blks);
-		src = (le128 *)walk.src.virt.addr;
-		dst = (le128 *)walk.dst.virt.addr;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(xts_crypt);
-
 static int init_tfm(struct crypto_skcipher *tfm)
 static int init_tfm(struct crypto_skcipher *tfm)
 {
 {
 	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
 	struct skcipher_instance *inst = skcipher_alg_instance(tfm);

+ 7 - 0
drivers/char/hw_random/Kconfig

@@ -452,3 +452,10 @@ config UML_RANDOM
 	  (check your distro, or download from
 	  (check your distro, or download from
 	  http://sourceforge.net/projects/gkernel/).  rngd periodically reads
 	  http://sourceforge.net/projects/gkernel/).  rngd periodically reads
 	  /dev/hwrng and injects the entropy into /dev/random.
 	  /dev/hwrng and injects the entropy into /dev/random.
+
+config HW_RANDOM_KEYSTONE
+	depends on ARCH_KEYSTONE
+	default HW_RANDOM
+	tristate "TI Keystone NETCP SA Hardware random number generator"
+	help
+	  This option enables Keystone's hardware random generator.

+ 1 - 0
drivers/char/hw_random/Makefile

@@ -38,3 +38,4 @@ obj-$(CONFIG_HW_RANDOM_MESON) += meson-rng.o
 obj-$(CONFIG_HW_RANDOM_CAVIUM) += cavium-rng.o cavium-rng-vf.o
 obj-$(CONFIG_HW_RANDOM_CAVIUM) += cavium-rng.o cavium-rng-vf.o
 obj-$(CONFIG_HW_RANDOM_MTK)	+= mtk-rng.o
 obj-$(CONFIG_HW_RANDOM_MTK)	+= mtk-rng.o
 obj-$(CONFIG_HW_RANDOM_S390) += s390-trng.o
 obj-$(CONFIG_HW_RANDOM_S390) += s390-trng.o
+obj-$(CONFIG_HW_RANDOM_KEYSTONE) += ks-sa-rng.o

+ 2 - 0
drivers/char/hw_random/bcm2835-rng.c

@@ -163,6 +163,8 @@ static int bcm2835_rng_probe(struct platform_device *pdev)
 
 
 	/* Clock is optional on most platforms */
 	/* Clock is optional on most platforms */
 	priv->clk = devm_clk_get(dev, NULL);
 	priv->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
 
 
 	priv->rng.name = pdev->name;
 	priv->rng.name = pdev->name;
 	priv->rng.init = bcm2835_rng_init;
 	priv->rng.init = bcm2835_rng_init;

+ 1 - 1
drivers/char/hw_random/cavium-rng-vf.c

@@ -77,7 +77,7 @@ static int cavium_rng_probe_vf(struct	pci_dev		*pdev,
 }
 }
 
 
 /* Remove the VF */
 /* Remove the VF */
-void  cavium_rng_remove_vf(struct pci_dev *pdev)
+static void  cavium_rng_remove_vf(struct pci_dev *pdev)
 {
 {
 	struct cavium_rng *rng;
 	struct cavium_rng *rng;
 
 

+ 1 - 1
drivers/char/hw_random/cavium-rng.c

@@ -62,7 +62,7 @@ static int cavium_rng_probe(struct pci_dev *pdev,
 }
 }
 
 
 /* Disable VF and RNG Hardware */
 /* Disable VF and RNG Hardware */
-void  cavium_rng_remove(struct pci_dev *pdev)
+static void cavium_rng_remove(struct pci_dev *pdev)
 {
 {
 	struct cavium_rng_pf *rng;
 	struct cavium_rng_pf *rng;
 
 

+ 1 - 1
drivers/char/hw_random/imx-rngc.c

@@ -300,7 +300,7 @@ static int __maybe_unused imx_rngc_resume(struct device *dev)
 	return 0;
 	return 0;
 }
 }
 
 
-SIMPLE_DEV_PM_OPS(imx_rngc_pm_ops, imx_rngc_suspend, imx_rngc_resume);
+static SIMPLE_DEV_PM_OPS(imx_rngc_pm_ops, imx_rngc_suspend, imx_rngc_resume);
 
 
 static const struct of_device_id imx_rngc_dt_ids[] = {
 static const struct of_device_id imx_rngc_dt_ids[] = {
 	{ .compatible = "fsl,imx25-rngb", .data = NULL, },
 	{ .compatible = "fsl,imx25-rngb", .data = NULL, },

+ 257 - 0
drivers/char/hw_random/ks-sa-rng.c

@@ -0,0 +1,257 @@
+/*
+ * Random Number Generator driver for the Keystone SOC
+ *
+ * Copyright (C) 2016 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Authors:	Sandeep Nair
+ *		Vitaly Andrianov
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/hw_random.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/pm_runtime.h>
+#include <linux/err.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+
+#define SA_CMD_STATUS_OFS			0x8
+
+/* TRNG enable control in SA System module*/
+#define SA_CMD_STATUS_REG_TRNG_ENABLE		BIT(3)
+
+/* TRNG start control in TRNG module */
+#define TRNG_CNTL_REG_TRNG_ENABLE		BIT(10)
+
+/* Data ready indicator in STATUS register */
+#define TRNG_STATUS_REG_READY			BIT(0)
+
+/* Data ready clear control in INTACK register */
+#define TRNG_INTACK_REG_READY			BIT(0)
+
+/*
+ * Number of samples taken to gather entropy during startup.
+ * If value is 0, the number of samples is 2^24 else
+ * equals value times 2^8.
+ */
+#define TRNG_DEF_STARTUP_CYCLES			0
+#define TRNG_CNTL_REG_STARTUP_CYCLES_SHIFT	16
+
+/*
+ * Minimum number of samples taken to regenerate entropy
+ * If value is 0, the number of samples is 2^24 else
+ * equals value times 2^6.
+ */
+#define TRNG_DEF_MIN_REFILL_CYCLES		1
+#define TRNG_CFG_REG_MIN_REFILL_CYCLES_SHIFT	0
+
+/*
+ * Maximum number of samples taken to regenerate entropy
+ * If value is 0, the number of samples is 2^24 else
+ * equals value times 2^8.
+ */
+#define TRNG_DEF_MAX_REFILL_CYCLES		0
+#define TRNG_CFG_REG_MAX_REFILL_CYCLES_SHIFT	16
+
+/* Number of CLK input cycles between samples */
+#define TRNG_DEF_CLK_DIV_CYCLES			0
+#define TRNG_CFG_REG_SAMPLE_DIV_SHIFT		8
+
+/* Maximum retries to get rng data */
+#define SA_MAX_RNG_DATA_RETRIES			5
+/* Delay between retries (in usecs) */
+#define SA_RNG_DATA_RETRY_DELAY			5
+
+struct trng_regs {
+	u32	output_l;
+	u32	output_h;
+	u32	status;
+	u32	intmask;
+	u32	intack;
+	u32	control;
+	u32	config;
+};
+
+struct ks_sa_rng {
+	struct device	*dev;
+	struct hwrng	rng;
+	struct clk	*clk;
+	struct regmap	*regmap_cfg;
+	struct trng_regs *reg_rng;
+};
+
+static int ks_sa_rng_init(struct hwrng *rng)
+{
+	u32 value;
+	struct device *dev = (struct device *)rng->priv;
+	struct ks_sa_rng *ks_sa_rng = dev_get_drvdata(dev);
+
+	/* Enable RNG module */
+	regmap_write_bits(ks_sa_rng->regmap_cfg, SA_CMD_STATUS_OFS,
+			  SA_CMD_STATUS_REG_TRNG_ENABLE,
+			  SA_CMD_STATUS_REG_TRNG_ENABLE);
+
+	/* Configure RNG module */
+	writel(0, &ks_sa_rng->reg_rng->control);
+	value = TRNG_DEF_STARTUP_CYCLES << TRNG_CNTL_REG_STARTUP_CYCLES_SHIFT;
+	writel(value, &ks_sa_rng->reg_rng->control);
+
+	value =	(TRNG_DEF_MIN_REFILL_CYCLES <<
+		 TRNG_CFG_REG_MIN_REFILL_CYCLES_SHIFT) |
+		(TRNG_DEF_MAX_REFILL_CYCLES <<
+		 TRNG_CFG_REG_MAX_REFILL_CYCLES_SHIFT) |
+		(TRNG_DEF_CLK_DIV_CYCLES <<
+		 TRNG_CFG_REG_SAMPLE_DIV_SHIFT);
+
+	writel(value, &ks_sa_rng->reg_rng->config);
+
+	/* Disable all interrupts from TRNG */
+	writel(0, &ks_sa_rng->reg_rng->intmask);
+
+	/* Enable RNG */
+	value = readl(&ks_sa_rng->reg_rng->control);
+	value |= TRNG_CNTL_REG_TRNG_ENABLE;
+	writel(value, &ks_sa_rng->reg_rng->control);
+
+	return 0;
+}
+
+static void ks_sa_rng_cleanup(struct hwrng *rng)
+{
+	struct device *dev = (struct device *)rng->priv;
+	struct ks_sa_rng *ks_sa_rng = dev_get_drvdata(dev);
+
+	/* Disable RNG */
+	writel(0, &ks_sa_rng->reg_rng->control);
+	regmap_write_bits(ks_sa_rng->regmap_cfg, SA_CMD_STATUS_OFS,
+			  SA_CMD_STATUS_REG_TRNG_ENABLE, 0);
+}
+
+static int ks_sa_rng_data_read(struct hwrng *rng, u32 *data)
+{
+	struct device *dev = (struct device *)rng->priv;
+	struct ks_sa_rng *ks_sa_rng = dev_get_drvdata(dev);
+
+	/* Read random data */
+	data[0] = readl(&ks_sa_rng->reg_rng->output_l);
+	data[1] = readl(&ks_sa_rng->reg_rng->output_h);
+
+	writel(TRNG_INTACK_REG_READY, &ks_sa_rng->reg_rng->intack);
+
+	return sizeof(u32) * 2;
+}
+
+static int ks_sa_rng_data_present(struct hwrng *rng, int wait)
+{
+	struct device *dev = (struct device *)rng->priv;
+	struct ks_sa_rng *ks_sa_rng = dev_get_drvdata(dev);
+
+	u32	ready;
+	int	j;
+
+	for (j = 0; j < SA_MAX_RNG_DATA_RETRIES; j++) {
+		ready = readl(&ks_sa_rng->reg_rng->status);
+		ready &= TRNG_STATUS_REG_READY;
+
+		if (ready || !wait)
+			break;
+
+		udelay(SA_RNG_DATA_RETRY_DELAY);
+	}
+
+	return ready;
+}
+
+static int ks_sa_rng_probe(struct platform_device *pdev)
+{
+	struct ks_sa_rng	*ks_sa_rng;
+	struct device		*dev = &pdev->dev;
+	int			ret;
+	struct resource		*mem;
+
+	ks_sa_rng = devm_kzalloc(dev, sizeof(*ks_sa_rng), GFP_KERNEL);
+	if (!ks_sa_rng)
+		return -ENOMEM;
+
+	ks_sa_rng->dev = dev;
+	ks_sa_rng->rng = (struct hwrng) {
+		.name = "ks_sa_hwrng",
+		.init = ks_sa_rng_init,
+		.data_read = ks_sa_rng_data_read,
+		.data_present = ks_sa_rng_data_present,
+		.cleanup = ks_sa_rng_cleanup,
+	};
+	ks_sa_rng->rng.priv = (unsigned long)dev;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ks_sa_rng->reg_rng = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(ks_sa_rng->reg_rng))
+		return PTR_ERR(ks_sa_rng->reg_rng);
+
+	ks_sa_rng->regmap_cfg =
+		syscon_regmap_lookup_by_phandle(dev->of_node,
+						"ti,syscon-sa-cfg");
+
+	if (IS_ERR(ks_sa_rng->regmap_cfg)) {
+		dev_err(dev, "syscon_node_to_regmap failed\n");
+		return -EINVAL;
+	}
+
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enable SA power-domain\n");
+		pm_runtime_disable(dev);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, ks_sa_rng);
+
+	return devm_hwrng_register(&pdev->dev, &ks_sa_rng->rng);
+}
+
+static int ks_sa_rng_remove(struct platform_device *pdev)
+{
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static const struct of_device_id ks_sa_rng_dt_match[] = {
+	{
+		.compatible = "ti,keystone-rng",
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ks_sa_rng_dt_match);
+
+static struct platform_driver ks_sa_rng_driver = {
+	.driver		= {
+		.name	= "ks-sa-rng",
+		.of_match_table = ks_sa_rng_dt_match,
+	},
+	.probe		= ks_sa_rng_probe,
+	.remove		= ks_sa_rng_remove,
+};
+
+module_platform_driver(ks_sa_rng_driver);
+
+MODULE_DESCRIPTION("Keystone NETCP SA H/W Random Number Generator driver");
+MODULE_AUTHOR("Vitaly Andrianov <vitalya@ti.com>");
+MODULE_LICENSE("GPL");

+ 14 - 9
drivers/char/hw_random/mxc-rnga.c

@@ -16,16 +16,13 @@
  * This driver is based on other RNG drivers.
  * This driver is based on other RNG drivers.
  */
  */
 
 
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
 #include <linux/clk.h>
 #include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/ioport.h>
-#include <linux/platform_device.h>
-#include <linux/hw_random.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
+#include <linux/hw_random.h>
 #include <linux/io.h>
 #include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 
 /* RNGA Registers */
 /* RNGA Registers */
 #define RNGA_CONTROL			0x00
 #define RNGA_CONTROL			0x00
@@ -197,10 +194,18 @@ static int __exit mxc_rnga_remove(struct platform_device *pdev)
 	return 0;
 	return 0;
 }
 }
 
 
+static const struct of_device_id mxc_rnga_of_match[] = {
+	{ .compatible = "fsl,imx21-rnga", },
+	{ .compatible = "fsl,imx31-rnga", },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, mxc_rnga_of_match);
+
 static struct platform_driver mxc_rnga_driver = {
 static struct platform_driver mxc_rnga_driver = {
 	.driver = {
 	.driver = {
-		   .name = "mxc_rnga",
-		   },
+		.name = "mxc_rnga",
+		.of_match_table = mxc_rnga_of_match,
+	},
 	.remove = __exit_p(mxc_rnga_remove),
 	.remove = __exit_p(mxc_rnga_remove),
 };
 };
 
 

+ 18 - 4
drivers/char/hw_random/omap-rng.c

@@ -150,6 +150,7 @@ struct omap_rng_dev {
 	const struct omap_rng_pdata	*pdata;
 	const struct omap_rng_pdata	*pdata;
 	struct hwrng rng;
 	struct hwrng rng;
 	struct clk 			*clk;
 	struct clk 			*clk;
+	struct clk			*clk_reg;
 };
 };
 
 
 static inline u32 omap_rng_read(struct omap_rng_dev *priv, u16 reg)
 static inline u32 omap_rng_read(struct omap_rng_dev *priv, u16 reg)
@@ -480,6 +481,19 @@ static int omap_rng_probe(struct platform_device *pdev)
 		}
 		}
 	}
 	}
 
 
+	priv->clk_reg = devm_clk_get(&pdev->dev, "reg");
+	if (IS_ERR(priv->clk_reg) && PTR_ERR(priv->clk_reg) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+	if (!IS_ERR(priv->clk_reg)) {
+		ret = clk_prepare_enable(priv->clk_reg);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"Unable to enable the register clk: %d\n",
+				ret);
+			goto err_register;
+		}
+	}
+
 	ret = (dev->of_node) ? of_get_omap_rng_device_details(priv, pdev) :
 	ret = (dev->of_node) ? of_get_omap_rng_device_details(priv, pdev) :
 				get_omap_rng_device_details(priv);
 				get_omap_rng_device_details(priv);
 	if (ret)
 	if (ret)
@@ -499,8 +513,8 @@ err_register:
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
 
-	if (!IS_ERR(priv->clk))
-		clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(priv->clk_reg);
+	clk_disable_unprepare(priv->clk);
 err_ioremap:
 err_ioremap:
 	dev_err(dev, "initialization failed.\n");
 	dev_err(dev, "initialization failed.\n");
 	return ret;
 	return ret;
@@ -517,8 +531,8 @@ static int omap_rng_remove(struct platform_device *pdev)
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
 
-	if (!IS_ERR(priv->clk))
-		clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(priv->clk_reg);
 
 
 	return 0;
 	return 0;
 }
 }

+ 28 - 16
drivers/char/hw_random/stm32-rng.c

@@ -16,15 +16,18 @@
 #include <linux/delay.h>
 #include <linux/delay.h>
 #include <linux/hw_random.h>
 #include <linux/hw_random.h>
 #include <linux/io.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/of_platform.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_runtime.h>
+#include <linux/reset.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 
 
 #define RNG_CR 0x00
 #define RNG_CR 0x00
 #define RNG_CR_RNGEN BIT(2)
 #define RNG_CR_RNGEN BIT(2)
+#define RNG_CR_CED BIT(5)
 
 
 #define RNG_SR 0x04
 #define RNG_SR 0x04
 #define RNG_SR_SEIS BIT(6)
 #define RNG_SR_SEIS BIT(6)
@@ -33,19 +36,12 @@
 
 
 #define RNG_DR 0x08
 #define RNG_DR 0x08
 
 
-/*
- * It takes 40 cycles @ 48MHz to generate each random number (e.g. <1us).
- * At the time of writing STM32 parts max out at ~200MHz meaning a timeout
- * of 500 leaves us a very comfortable margin for error. The loop to which
- * the timeout applies takes at least 4 instructions per iteration so the
- * timeout is enough to take us up to multi-GHz parts!
- */
-#define RNG_TIMEOUT 500
-
 struct stm32_rng_private {
 struct stm32_rng_private {
 	struct hwrng rng;
 	struct hwrng rng;
 	void __iomem *base;
 	void __iomem *base;
 	struct clk *clk;
 	struct clk *clk;
+	struct reset_control *rst;
+	bool ced;
 };
 };
 
 
 static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
@@ -59,13 +55,16 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 
 
 	while (max > sizeof(u32)) {
 	while (max > sizeof(u32)) {
 		sr = readl_relaxed(priv->base + RNG_SR);
 		sr = readl_relaxed(priv->base + RNG_SR);
+		/* Manage timeout which is based on timer and take */
+		/* care of initial delay time when enabling rng	*/
 		if (!sr && wait) {
 		if (!sr && wait) {
-			unsigned int timeout = RNG_TIMEOUT;
-
-			do {
-				cpu_relax();
-				sr = readl_relaxed(priv->base + RNG_SR);
-			} while (!sr && --timeout);
+			retval = readl_relaxed_poll_timeout_atomic(priv->base
+								   + RNG_SR,
+								   sr, sr,
+								   10, 50000);
+			if (retval)
+				dev_err((struct device *)priv->rng.priv,
+					"%s: timeout %x!\n", __func__, sr);
 		}
 		}
 
 
 		/* If error detected or data not ready... */
 		/* If error detected or data not ready... */
@@ -99,7 +98,11 @@ static int stm32_rng_init(struct hwrng *rng)
 	if (err)
 	if (err)
 		return err;
 		return err;
 
 
-	writel_relaxed(RNG_CR_RNGEN, priv->base + RNG_CR);
+	if (priv->ced)
+		writel_relaxed(RNG_CR_RNGEN, priv->base + RNG_CR);
+	else
+		writel_relaxed(RNG_CR_RNGEN | RNG_CR_CED,
+			       priv->base + RNG_CR);
 
 
 	/* clear error indicators */
 	/* clear error indicators */
 	writel_relaxed(0, priv->base + RNG_SR);
 	writel_relaxed(0, priv->base + RNG_SR);
@@ -140,6 +143,15 @@ static int stm32_rng_probe(struct platform_device *ofdev)
 	if (IS_ERR(priv->clk))
 	if (IS_ERR(priv->clk))
 		return PTR_ERR(priv->clk);
 		return PTR_ERR(priv->clk);
 
 
+	priv->rst = devm_reset_control_get(&ofdev->dev, NULL);
+	if (!IS_ERR(priv->rst)) {
+		reset_control_assert(priv->rst);
+		udelay(2);
+		reset_control_deassert(priv->rst);
+	}
+
+	priv->ced = of_property_read_bool(np, "clock-error-detect");
+
 	dev_set_drvdata(dev, priv);
 	dev_set_drvdata(dev, priv);
 
 
 	priv->rng.name = dev_driver_string(dev),
 	priv->rng.name = dev_driver_string(dev),

+ 27 - 7
drivers/crypto/Kconfig

@@ -464,13 +464,6 @@ if CRYPTO_DEV_UX500
 	source "drivers/crypto/ux500/Kconfig"
 	source "drivers/crypto/ux500/Kconfig"
 endif # if CRYPTO_DEV_UX500
 endif # if CRYPTO_DEV_UX500
 
 
-config CRYPTO_DEV_BFIN_CRC
-	tristate "Support for Blackfin CRC hardware"
-	depends on BF60x
-	help
-	  Newer Blackfin processors have CRC hardware. Select this if you
-	  want to use the Blackfin CRC module.
-
 config CRYPTO_DEV_ATMEL_AUTHENC
 config CRYPTO_DEV_ATMEL_AUTHENC
 	tristate "Support for Atmel IPSEC/SSL hw accelerator"
 	tristate "Support for Atmel IPSEC/SSL hw accelerator"
 	depends on HAS_DMA
 	depends on HAS_DMA
@@ -730,4 +723,31 @@ config CRYPTO_DEV_ARTPEC6
 
 
 	  To compile this driver as a module, choose M here.
 	  To compile this driver as a module, choose M here.
 
 
+config CRYPTO_DEV_CCREE
+	tristate "Support for ARM TrustZone CryptoCell family of security processors"
+	depends on CRYPTO && CRYPTO_HW && OF && HAS_DMA
+	default n
+	select CRYPTO_HASH
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_DES
+	select CRYPTO_AEAD
+	select CRYPTO_AUTHENC
+	select CRYPTO_SHA1
+	select CRYPTO_MD5
+	select CRYPTO_SHA256
+	select CRYPTO_SHA512
+	select CRYPTO_HMAC
+	select CRYPTO_AES
+	select CRYPTO_CBC
+	select CRYPTO_ECB
+	select CRYPTO_CTR
+	select CRYPTO_XTS
+	help
+	  Say 'Y' to enable a driver for the REE interface of the Arm
+	  TrustZone CryptoCell family of processors. Currently the
+	  CryptoCell 712, 710 and 630 are supported.
+	  Choose this if you wish to use hardware acceleration of
+	  cryptographic operations on the system REE.
+	  If unsure say Y.
+
 endif # CRYPTO_HW
 endif # CRYPTO_HW

+ 1 - 1
drivers/crypto/Makefile

@@ -3,9 +3,9 @@ obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_ECC) += atmel-ecc.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_ECC) += atmel-ecc.o
-obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o
 obj-$(CONFIG_CRYPTO_DEV_CAVIUM_ZIP) += cavium/
 obj-$(CONFIG_CRYPTO_DEV_CAVIUM_ZIP) += cavium/
 obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
 obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
+obj-$(CONFIG_CRYPTO_DEV_CCREE) += ccree/
 obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
 obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
 obj-$(CONFIG_CRYPTO_DEV_CPT) += cavium/cpt/
 obj-$(CONFIG_CRYPTO_DEV_CPT) += cavium/cpt/
 obj-$(CONFIG_CRYPTO_DEV_NITROX) += cavium/nitrox/
 obj-$(CONFIG_CRYPTO_DEV_NITROX) += cavium/nitrox/

+ 2 - 6
drivers/crypto/atmel-aes.c

@@ -2155,7 +2155,7 @@ static int atmel_aes_authenc_setkey(struct crypto_aead *tfm, const u8 *key,
 
 
 badkey:
 badkey:
 	crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	memzero_explicit(&key, sizeof(keys));
+	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 	return -EINVAL;
 }
 }
 
 
@@ -2602,16 +2602,13 @@ static struct crypto_platform_data *atmel_aes_of_init(struct platform_device *pd
 	}
 	}
 
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata) {
-		dev_err(&pdev->dev, "could not allocate memory for pdata\n");
+	if (!pdata)
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
-	}
 
 
 	pdata->dma_slave = devm_kzalloc(&pdev->dev,
 	pdata->dma_slave = devm_kzalloc(&pdev->dev,
 					sizeof(*(pdata->dma_slave)),
 					sizeof(*(pdata->dma_slave)),
 					GFP_KERNEL);
 					GFP_KERNEL);
 	if (!pdata->dma_slave) {
 	if (!pdata->dma_slave) {
-		dev_err(&pdev->dev, "could not allocate memory for dma_slave\n");
 		devm_kfree(&pdev->dev, pdata);
 		devm_kfree(&pdev->dev, pdata);
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
 	}
 	}
@@ -2649,7 +2646,6 @@ static int atmel_aes_probe(struct platform_device *pdev)
 
 
 	aes_dd = devm_kzalloc(&pdev->dev, sizeof(*aes_dd), GFP_KERNEL);
 	aes_dd = devm_kzalloc(&pdev->dev, sizeof(*aes_dd), GFP_KERNEL);
 	if (aes_dd == NULL) {
 	if (aes_dd == NULL) {
-		dev_err(dev, "unable to alloc data struct.\n");
 		err = -ENOMEM;
 		err = -ENOMEM;
 		goto aes_dd_err;
 		goto aes_dd_err;
 	}
 	}

+ 2 - 7
drivers/crypto/atmel-sha.c

@@ -2726,18 +2726,14 @@ static struct crypto_platform_data *atmel_sha_of_init(struct platform_device *pd
 	}
 	}
 
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata) {
-		dev_err(&pdev->dev, "could not allocate memory for pdata\n");
+	if (!pdata)
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
-	}
 
 
 	pdata->dma_slave = devm_kzalloc(&pdev->dev,
 	pdata->dma_slave = devm_kzalloc(&pdev->dev,
 					sizeof(*(pdata->dma_slave)),
 					sizeof(*(pdata->dma_slave)),
 					GFP_KERNEL);
 					GFP_KERNEL);
-	if (!pdata->dma_slave) {
-		dev_err(&pdev->dev, "could not allocate memory for dma_slave\n");
+	if (!pdata->dma_slave)
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
-	}
 
 
 	return pdata;
 	return pdata;
 }
 }
@@ -2758,7 +2754,6 @@ static int atmel_sha_probe(struct platform_device *pdev)
 
 
 	sha_dd = devm_kzalloc(&pdev->dev, sizeof(*sha_dd), GFP_KERNEL);
 	sha_dd = devm_kzalloc(&pdev->dev, sizeof(*sha_dd), GFP_KERNEL);
 	if (sha_dd == NULL) {
 	if (sha_dd == NULL) {
-		dev_err(dev, "unable to alloc data struct.\n");
 		err = -ENOMEM;
 		err = -ENOMEM;
 		goto sha_dd_err;
 		goto sha_dd_err;
 	}
 	}

+ 2 - 7
drivers/crypto/atmel-tdes.c

@@ -1312,18 +1312,14 @@ static struct crypto_platform_data *atmel_tdes_of_init(struct platform_device *p
 	}
 	}
 
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata) {
-		dev_err(&pdev->dev, "could not allocate memory for pdata\n");
+	if (!pdata)
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
-	}
 
 
 	pdata->dma_slave = devm_kzalloc(&pdev->dev,
 	pdata->dma_slave = devm_kzalloc(&pdev->dev,
 					sizeof(*(pdata->dma_slave)),
 					sizeof(*(pdata->dma_slave)),
 					GFP_KERNEL);
 					GFP_KERNEL);
-	if (!pdata->dma_slave) {
-		dev_err(&pdev->dev, "could not allocate memory for dma_slave\n");
+	if (!pdata->dma_slave)
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
-	}
 
 
 	return pdata;
 	return pdata;
 }
 }
@@ -1344,7 +1340,6 @@ static int atmel_tdes_probe(struct platform_device *pdev)
 
 
 	tdes_dd = devm_kmalloc(&pdev->dev, sizeof(*tdes_dd), GFP_KERNEL);
 	tdes_dd = devm_kmalloc(&pdev->dev, sizeof(*tdes_dd), GFP_KERNEL);
 	if (tdes_dd == NULL) {
 	if (tdes_dd == NULL) {
-		dev_err(dev, "unable to alloc data struct.\n");
 		err = -ENOMEM;
 		err = -ENOMEM;
 		goto tdes_dd_err;
 		goto tdes_dd_err;
 	}
 	}

+ 2 - 2
drivers/crypto/bcm/cipher.c

@@ -818,7 +818,7 @@ static int handle_ahash_req(struct iproc_reqctx_s *rctx)
 
 
 	/* AES hashing keeps key size in type field, so need to copy it here */
 	/* AES hashing keeps key size in type field, so need to copy it here */
 	if (hash_parms.alg == HASH_ALG_AES)
 	if (hash_parms.alg == HASH_ALG_AES)
-		hash_parms.type = cipher_parms.type;
+		hash_parms.type = (enum hash_type)cipher_parms.type;
 	else
 	else
 		hash_parms.type = spu->spu_hash_type(rctx->total_sent);
 		hash_parms.type = spu->spu_hash_type(rctx->total_sent);
 
 
@@ -1409,7 +1409,7 @@ static int handle_aead_req(struct iproc_reqctx_s *rctx)
 						rctx->iv_ctr_len);
 						rctx->iv_ctr_len);
 
 
 	if (ctx->auth.alg == HASH_ALG_AES)
 	if (ctx->auth.alg == HASH_ALG_AES)
-		hash_parms.type = ctx->cipher_type;
+		hash_parms.type = (enum hash_type)ctx->cipher_type;
 
 
 	/* General case AAD padding (CCM and RFC4543 special cases below) */
 	/* General case AAD padding (CCM and RFC4543 special cases below) */
 	aead_parms.aad_pad_len = spu->spu_gcm_ccm_pad_len(ctx->cipher.mode,
 	aead_parms.aad_pad_len = spu->spu_gcm_ccm_pad_len(ctx->cipher.mode,

+ 0 - 1
drivers/crypto/bcm/util.c

@@ -279,7 +279,6 @@ int do_shash(unsigned char *name, unsigned char *result,
 	sdesc = kmalloc(size, GFP_KERNEL);
 	sdesc = kmalloc(size, GFP_KERNEL);
 	if (!sdesc) {
 	if (!sdesc) {
 		rc = -ENOMEM;
 		rc = -ENOMEM;
-		pr_err("%s: Memory allocation failure\n", __func__);
 		goto do_shash_err;
 		goto do_shash_err;
 	}
 	}
 	sdesc->shash.tfm = hash;
 	sdesc->shash.tfm = hash;

+ 0 - 743
drivers/crypto/bfin_crc.c

@@ -1,743 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Support Blackfin CRC HW acceleration.
- *
- * Copyright 2012 Analog Devices Inc.
- *
- * Licensed under the GPL-2.
- */
-
-#include <linux/err.h>
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
-#include <linux/delay.h>
-#include <linux/crypto.h>
-#include <linux/cryptohash.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/algapi.h>
-#include <crypto/hash.h>
-#include <crypto/internal/hash.h>
-#include <asm/unaligned.h>
-
-#include <asm/dma.h>
-#include <asm/portmux.h>
-#include <asm/io.h>
-
-#include "bfin_crc.h"
-
-#define CRC_CCRYPTO_QUEUE_LENGTH	5
-
-#define DRIVER_NAME "bfin-hmac-crc"
-#define CHKSUM_DIGEST_SIZE      4
-#define CHKSUM_BLOCK_SIZE       1
-
-#define CRC_MAX_DMA_DESC	100
-
-#define CRC_CRYPTO_STATE_UPDATE		1
-#define CRC_CRYPTO_STATE_FINALUPDATE	2
-#define CRC_CRYPTO_STATE_FINISH		3
-
-struct bfin_crypto_crc {
-	struct list_head	list;
-	struct device		*dev;
-	spinlock_t		lock;
-
-	int			irq;
-	int			dma_ch;
-	u32			poly;
-	struct crc_register	*regs;
-
-	struct ahash_request	*req; /* current request in operation */
-	struct dma_desc_array	*sg_cpu; /* virt addr of sg dma descriptors */
-	dma_addr_t		sg_dma; /* phy addr of sg dma descriptors */
-	u8			*sg_mid_buf;
-	dma_addr_t		sg_mid_dma; /* phy addr of sg mid buffer */
-
-	struct tasklet_struct	done_task;
-	struct crypto_queue	queue; /* waiting requests */
-
-	u8			busy:1; /* crc device in operation flag */
-};
-
-static struct bfin_crypto_crc_list {
-	struct list_head	dev_list;
-	spinlock_t		lock;
-} crc_list;
-
-struct bfin_crypto_crc_reqctx {
-	struct bfin_crypto_crc	*crc;
-
-	unsigned int		total;	/* total request bytes */
-	size_t			sg_buflen; /* bytes for this update */
-	unsigned int		sg_nents;
-	struct scatterlist	*sg; /* sg list head for this update*/
-	struct scatterlist	bufsl[2]; /* chained sg list */
-
-	size_t			bufnext_len;
-	size_t			buflast_len;
-	u8			bufnext[CHKSUM_DIGEST_SIZE]; /* extra bytes for next udpate */
-	u8			buflast[CHKSUM_DIGEST_SIZE]; /* extra bytes from last udpate */
-
-	u8			flag;
-};
-
-struct bfin_crypto_crc_ctx {
-	struct bfin_crypto_crc	*crc;
-	u32			key;
-};
-
-/*
- * get element in scatter list by given index
- */
-static struct scatterlist *sg_get(struct scatterlist *sg_list, unsigned int nents,
-				unsigned int index)
-{
-	struct scatterlist *sg = NULL;
-	int i;
-
-	for_each_sg(sg_list, sg, nents, i)
-		if (i == index)
-			break;
-
-	return sg;
-}
-
-static int bfin_crypto_crc_init_hw(struct bfin_crypto_crc *crc, u32 key)
-{
-	writel(0, &crc->regs->datacntrld);
-	writel(MODE_CALC_CRC << OPMODE_OFFSET, &crc->regs->control);
-	writel(key, &crc->regs->curresult);
-
-	/* setup CRC interrupts */
-	writel(CMPERRI | DCNTEXPI, &crc->regs->status);
-	writel(CMPERRI | DCNTEXPI, &crc->regs->intrenset);
-
-	return 0;
-}
-
-static int bfin_crypto_crc_init(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
-	struct bfin_crypto_crc *crc;
-
-	dev_dbg(ctx->crc->dev, "crc_init\n");
-	spin_lock_bh(&crc_list.lock);
-	list_for_each_entry(crc, &crc_list.dev_list, list) {
-		crc_ctx->crc = crc;
-		break;
-	}
-	spin_unlock_bh(&crc_list.lock);
-
-	if (sg_nents(req->src) > CRC_MAX_DMA_DESC) {
-		dev_dbg(ctx->crc->dev, "init: requested sg list is too big > %d\n",
-			CRC_MAX_DMA_DESC);
-		return -EINVAL;
-	}
-
-	ctx->crc = crc;
-	ctx->bufnext_len = 0;
-	ctx->buflast_len = 0;
-	ctx->sg_buflen = 0;
-	ctx->total = 0;
-	ctx->flag = 0;
-
-	/* init crc results */
-	put_unaligned_le32(crc_ctx->key, req->result);
-
-	dev_dbg(ctx->crc->dev, "init: digest size: %d\n",
-		crypto_ahash_digestsize(tfm));
-
-	return bfin_crypto_crc_init_hw(crc, crc_ctx->key);
-}
-
-static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc)
-{
-	struct scatterlist *sg;
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(crc->req);
-	int i = 0, j = 0;
-	unsigned long dma_config;
-	unsigned int dma_count;
-	unsigned int dma_addr;
-	unsigned int mid_dma_count = 0;
-	int dma_mod;
-
-	dma_map_sg(crc->dev, ctx->sg, ctx->sg_nents, DMA_TO_DEVICE);
-
-	for_each_sg(ctx->sg, sg, ctx->sg_nents, j) {
-		dma_addr = sg_dma_address(sg);
-		/* deduce extra bytes in last sg */
-		if (sg_is_last(sg))
-			dma_count = sg_dma_len(sg) - ctx->bufnext_len;
-		else
-			dma_count = sg_dma_len(sg);
-
-		if (mid_dma_count) {
-			/* Append last middle dma buffer to 4 bytes with first
-			   bytes in current sg buffer. Move addr of current
-			   sg and deduce the length of current sg.
-			 */
-			memcpy(crc->sg_mid_buf +(i << 2) + mid_dma_count,
-				sg_virt(sg),
-				CHKSUM_DIGEST_SIZE - mid_dma_count);
-			dma_addr += CHKSUM_DIGEST_SIZE - mid_dma_count;
-			dma_count -= CHKSUM_DIGEST_SIZE - mid_dma_count;
-
-			dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 |
-				DMAEN | PSIZE_32 | WDSIZE_32;
-
-			/* setup new dma descriptor for next middle dma */
-			crc->sg_cpu[i].start_addr = crc->sg_mid_dma + (i << 2);
-			crc->sg_cpu[i].cfg = dma_config;
-			crc->sg_cpu[i].x_count = 1;
-			crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE;
-			dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
-				"cfg:0x%x, x_count:0x%x, x_modify:0x%x\n",
-				i, crc->sg_cpu[i].start_addr,
-				crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
-				crc->sg_cpu[i].x_modify);
-			i++;
-		}
-
-		dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 | DMAEN | PSIZE_32;
-		/* chop current sg dma len to multiple of 32 bits */
-		mid_dma_count = dma_count % 4;
-		dma_count &= ~0x3;
-
-		if (dma_addr % 4 == 0) {
-			dma_config |= WDSIZE_32;
-			dma_count >>= 2;
-			dma_mod = 4;
-		} else if (dma_addr % 2 == 0) {
-			dma_config |= WDSIZE_16;
-			dma_count >>= 1;
-			dma_mod = 2;
-		} else {
-			dma_config |= WDSIZE_8;
-			dma_mod = 1;
-		}
-
-		crc->sg_cpu[i].start_addr = dma_addr;
-		crc->sg_cpu[i].cfg = dma_config;
-		crc->sg_cpu[i].x_count = dma_count;
-		crc->sg_cpu[i].x_modify = dma_mod;
-		dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
-			"cfg:0x%x, x_count:0x%x, x_modify:0x%x\n",
-			i, crc->sg_cpu[i].start_addr,
-			crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
-			crc->sg_cpu[i].x_modify);
-		i++;
-
-		if (mid_dma_count) {
-			/* copy extra bytes to next middle dma buffer */
-			memcpy(crc->sg_mid_buf + (i << 2),
-				(u8*)sg_virt(sg) + (dma_count << 2),
-				mid_dma_count);
-		}
-	}
-
-	dma_config = DMAFLOW_ARRAY | RESTART | NDSIZE_3 | DMAEN | PSIZE_32 | WDSIZE_32;
-	/* For final update req, append the buffer for next update as well*/
-	if (ctx->bufnext_len && (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE ||
-		ctx->flag == CRC_CRYPTO_STATE_FINISH)) {
-		crc->sg_cpu[i].start_addr = dma_map_single(crc->dev, ctx->bufnext,
-						CHKSUM_DIGEST_SIZE, DMA_TO_DEVICE);
-		crc->sg_cpu[i].cfg = dma_config;
-		crc->sg_cpu[i].x_count = 1;
-		crc->sg_cpu[i].x_modify = CHKSUM_DIGEST_SIZE;
-		dev_dbg(crc->dev, "%d: crc_dma: start_addr:0x%lx, "
-			"cfg:0x%x, x_count:0x%x, x_modify:0x%x\n",
-			i, crc->sg_cpu[i].start_addr,
-			crc->sg_cpu[i].cfg, crc->sg_cpu[i].x_count,
-			crc->sg_cpu[i].x_modify);
-		i++;
-	}
-
-	if (i == 0)
-		return;
-
-	/* Set the last descriptor to stop mode */
-	crc->sg_cpu[i - 1].cfg &= ~(DMAFLOW | NDSIZE);
-	crc->sg_cpu[i - 1].cfg |= DI_EN;
-	set_dma_curr_desc_addr(crc->dma_ch, (unsigned long *)crc->sg_dma);
-	set_dma_x_count(crc->dma_ch, 0);
-	set_dma_x_modify(crc->dma_ch, 0);
-	set_dma_config(crc->dma_ch, dma_config);
-}
-
-static int bfin_crypto_crc_handle_queue(struct bfin_crypto_crc *crc,
-				  struct ahash_request *req)
-{
-	struct crypto_async_request *async_req, *backlog;
-	struct bfin_crypto_crc_reqctx *ctx;
-	struct scatterlist *sg;
-	int ret = 0;
-	int nsg, i, j;
-	unsigned int nextlen;
-	unsigned long flags;
-	u32 reg;
-
-	spin_lock_irqsave(&crc->lock, flags);
-	if (req)
-		ret = ahash_enqueue_request(&crc->queue, req);
-	if (crc->busy) {
-		spin_unlock_irqrestore(&crc->lock, flags);
-		return ret;
-	}
-	backlog = crypto_get_backlog(&crc->queue);
-	async_req = crypto_dequeue_request(&crc->queue);
-	if (async_req)
-		crc->busy = 1;
-	spin_unlock_irqrestore(&crc->lock, flags);
-
-	if (!async_req)
-		return ret;
-
-	if (backlog)
-		backlog->complete(backlog, -EINPROGRESS);
-
-	req = ahash_request_cast(async_req);
-	crc->req = req;
-	ctx = ahash_request_ctx(req);
-	ctx->sg = NULL;
-	ctx->sg_buflen = 0;
-	ctx->sg_nents = 0;
-
-	dev_dbg(crc->dev, "handling new req, flag=%u, nbytes: %d\n",
-						ctx->flag, req->nbytes);
-
-	if (ctx->flag == CRC_CRYPTO_STATE_FINISH) {
-		if (ctx->bufnext_len == 0) {
-			crc->busy = 0;
-			return 0;
-		}
-
-		/* Pack last crc update buffer to 32bit */
-		memset(ctx->bufnext + ctx->bufnext_len, 0,
-				CHKSUM_DIGEST_SIZE - ctx->bufnext_len);
-	} else {
-		/* Pack small data which is less than 32bit to buffer for next update. */
-		if (ctx->bufnext_len + req->nbytes < CHKSUM_DIGEST_SIZE) {
-			memcpy(ctx->bufnext + ctx->bufnext_len,
-				sg_virt(req->src), req->nbytes);
-			ctx->bufnext_len += req->nbytes;
-			if (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE &&
-				ctx->bufnext_len) {
-				goto finish_update;
-			} else {
-				crc->busy = 0;
-				return 0;
-			}
-		}
-
-		if (ctx->bufnext_len) {
-			/* Chain in extra bytes of last update */
-			ctx->buflast_len = ctx->bufnext_len;
-			memcpy(ctx->buflast, ctx->bufnext, ctx->buflast_len);
-
-			nsg = ctx->sg_buflen ? 2 : 1;
-			sg_init_table(ctx->bufsl, nsg);
-			sg_set_buf(ctx->bufsl, ctx->buflast, ctx->buflast_len);
-			if (nsg > 1)
-				sg_chain(ctx->bufsl, nsg, req->src);
-			ctx->sg = ctx->bufsl;
-		} else
-			ctx->sg = req->src;
-
-		/* Chop crc buffer size to multiple of 32 bit */
-		nsg = sg_nents(ctx->sg);
-		ctx->sg_nents = nsg;
-		ctx->sg_buflen = ctx->buflast_len + req->nbytes;
-		ctx->bufnext_len = ctx->sg_buflen % 4;
-		ctx->sg_buflen &= ~0x3;
-
-		if (ctx->bufnext_len) {
-			/* copy extra bytes to buffer for next update */
-			memset(ctx->bufnext, 0, CHKSUM_DIGEST_SIZE);
-			nextlen = ctx->bufnext_len;
-			for (i = nsg - 1; i >= 0; i--) {
-				sg = sg_get(ctx->sg, nsg, i);
-				j = min(nextlen, sg_dma_len(sg));
-				memcpy(ctx->bufnext + nextlen - j,
-					sg_virt(sg) + sg_dma_len(sg) - j, j);
-				if (j == sg_dma_len(sg))
-					ctx->sg_nents--;
-				nextlen -= j;
-				if (nextlen == 0)
-					break;
-			}
-		}
-	}
-
-finish_update:
-	if (ctx->bufnext_len && (ctx->flag == CRC_CRYPTO_STATE_FINALUPDATE ||
-		ctx->flag == CRC_CRYPTO_STATE_FINISH))
-		ctx->sg_buflen += CHKSUM_DIGEST_SIZE;
-
-	/* set CRC data count before start DMA */
-	writel(ctx->sg_buflen >> 2, &crc->regs->datacnt);
-
-	/* setup and enable CRC DMA */
-	bfin_crypto_crc_config_dma(crc);
-
-	/* finally kick off CRC operation */
-	reg = readl(&crc->regs->control);
-	writel(reg | BLKEN, &crc->regs->control);
-
-	return -EINPROGRESS;
-}
-
-static int bfin_crypto_crc_update(struct ahash_request *req)
-{
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
-
-	if (!req->nbytes)
-		return 0;
-
-	dev_dbg(ctx->crc->dev, "crc_update\n");
-	ctx->total += req->nbytes;
-	ctx->flag = CRC_CRYPTO_STATE_UPDATE;
-
-	return bfin_crypto_crc_handle_queue(ctx->crc, req);
-}
-
-static int bfin_crypto_crc_final(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
-
-	dev_dbg(ctx->crc->dev, "crc_final\n");
-	ctx->flag = CRC_CRYPTO_STATE_FINISH;
-	crc_ctx->key = 0;
-
-	return bfin_crypto_crc_handle_queue(ctx->crc, req);
-}
-
-static int bfin_crypto_crc_finup(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
-	struct bfin_crypto_crc_reqctx *ctx = ahash_request_ctx(req);
-
-	dev_dbg(ctx->crc->dev, "crc_finishupdate\n");
-	ctx->total += req->nbytes;
-	ctx->flag = CRC_CRYPTO_STATE_FINALUPDATE;
-	crc_ctx->key = 0;
-
-	return bfin_crypto_crc_handle_queue(ctx->crc, req);
-}
-
-static int bfin_crypto_crc_digest(struct ahash_request *req)
-{
-	int ret;
-
-	ret = bfin_crypto_crc_init(req);
-	if (ret)
-		return ret;
-
-	return bfin_crypto_crc_finup(req);
-}
-
-static int bfin_crypto_crc_setkey(struct crypto_ahash *tfm, const u8 *key,
-			unsigned int keylen)
-{
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_ahash_ctx(tfm);
-
-	dev_dbg(crc_ctx->crc->dev, "crc_setkey\n");
-	if (keylen != CHKSUM_DIGEST_SIZE) {
-		crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	crc_ctx->key = get_unaligned_le32(key);
-
-	return 0;
-}
-
-static int bfin_crypto_crc_cra_init(struct crypto_tfm *tfm)
-{
-	struct bfin_crypto_crc_ctx *crc_ctx = crypto_tfm_ctx(tfm);
-
-	crc_ctx->key = 0;
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				 sizeof(struct bfin_crypto_crc_reqctx));
-
-	return 0;
-}
-
-static void bfin_crypto_crc_cra_exit(struct crypto_tfm *tfm)
-{
-}
-
-static struct ahash_alg algs = {
-	.init		= bfin_crypto_crc_init,
-	.update		= bfin_crypto_crc_update,
-	.final		= bfin_crypto_crc_final,
-	.finup		= bfin_crypto_crc_finup,
-	.digest		= bfin_crypto_crc_digest,
-	.setkey		= bfin_crypto_crc_setkey,
-	.halg.digestsize	= CHKSUM_DIGEST_SIZE,
-	.halg.base	= {
-		.cra_name		= "hmac(crc32)",
-		.cra_driver_name	= DRIVER_NAME,
-		.cra_priority		= 100,
-		.cra_flags		= CRYPTO_ALG_TYPE_AHASH |
-						CRYPTO_ALG_ASYNC |
-						CRYPTO_ALG_OPTIONAL_KEY,
-		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct bfin_crypto_crc_ctx),
-		.cra_alignmask		= 3,
-		.cra_module		= THIS_MODULE,
-		.cra_init		= bfin_crypto_crc_cra_init,
-		.cra_exit		= bfin_crypto_crc_cra_exit,
-	}
-};
-
-static void bfin_crypto_crc_done_task(unsigned long data)
-{
-	struct bfin_crypto_crc *crc = (struct bfin_crypto_crc *)data;
-
-	bfin_crypto_crc_handle_queue(crc, NULL);
-}
-
-static irqreturn_t bfin_crypto_crc_handler(int irq, void *dev_id)
-{
-	struct bfin_crypto_crc *crc = dev_id;
-	u32 reg;
-
-	if (readl(&crc->regs->status) & DCNTEXP) {
-		writel(DCNTEXP, &crc->regs->status);
-
-		/* prepare results */
-		put_unaligned_le32(readl(&crc->regs->result),
-			crc->req->result);
-
-		reg = readl(&crc->regs->control);
-		writel(reg & ~BLKEN, &crc->regs->control);
-		crc->busy = 0;
-
-		if (crc->req->base.complete)
-			crc->req->base.complete(&crc->req->base, 0);
-
-		tasklet_schedule(&crc->done_task);
-
-		return IRQ_HANDLED;
-	} else
-		return IRQ_NONE;
-}
-
-#ifdef CONFIG_PM
-/**
- *	bfin_crypto_crc_suspend - suspend crc device
- *	@pdev: device being suspended
- *	@state: requested suspend state
- */
-static int bfin_crypto_crc_suspend(struct platform_device *pdev, pm_message_t state)
-{
-	struct bfin_crypto_crc *crc = platform_get_drvdata(pdev);
-	int i = 100000;
-
-	while ((readl(&crc->regs->control) & BLKEN) && --i)
-		cpu_relax();
-
-	if (i == 0)
-		return -EBUSY;
-
-	return 0;
-}
-#else
-# define bfin_crypto_crc_suspend NULL
-#endif
-
-#define bfin_crypto_crc_resume NULL
-
-/**
- *	bfin_crypto_crc_probe - Initialize module
- *
- */
-static int bfin_crypto_crc_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct resource *res;
-	struct bfin_crypto_crc *crc;
-	unsigned int timeout = 100000;
-	int ret;
-
-	crc = devm_kzalloc(dev, sizeof(*crc), GFP_KERNEL);
-	if (!crc) {
-		dev_err(&pdev->dev, "fail to malloc bfin_crypto_crc\n");
-		return -ENOMEM;
-	}
-
-	crc->dev = dev;
-
-	INIT_LIST_HEAD(&crc->list);
-	spin_lock_init(&crc->lock);
-	tasklet_init(&crc->done_task, bfin_crypto_crc_done_task, (unsigned long)crc);
-	crypto_init_queue(&crc->queue, CRC_CCRYPTO_QUEUE_LENGTH);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	crc->regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR((void *)crc->regs)) {
-		dev_err(&pdev->dev, "Cannot map CRC IO\n");
-		return PTR_ERR((void *)crc->regs);
-	}
-
-	crc->irq = platform_get_irq(pdev, 0);
-	if (crc->irq < 0) {
-		dev_err(&pdev->dev, "No CRC DCNTEXP IRQ specified\n");
-		return -ENOENT;
-	}
-
-	ret = devm_request_irq(dev, crc->irq, bfin_crypto_crc_handler,
-			IRQF_SHARED, dev_name(dev), crc);
-	if (ret) {
-		dev_err(&pdev->dev, "Unable to request blackfin crc irq\n");
-		return ret;
-	}
-
-	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-	if (res == NULL) {
-		dev_err(&pdev->dev, "No CRC DMA channel specified\n");
-		return -ENOENT;
-	}
-	crc->dma_ch = res->start;
-
-	ret = request_dma(crc->dma_ch, dev_name(dev));
-	if (ret) {
-		dev_err(&pdev->dev, "Unable to attach Blackfin CRC DMA channel\n");
-		return ret;
-	}
-
-	crc->sg_cpu = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, &crc->sg_dma, GFP_KERNEL);
-	if (crc->sg_cpu == NULL) {
-		ret = -ENOMEM;
-		goto out_error_dma;
-	}
-	/*
-	 * need at most CRC_MAX_DMA_DESC sg + CRC_MAX_DMA_DESC middle  +
-	 * 1 last + 1 next dma descriptors
-	 */
-	crc->sg_mid_buf = (u8 *)(crc->sg_cpu + ((CRC_MAX_DMA_DESC + 1) << 1));
-	crc->sg_mid_dma = crc->sg_dma + sizeof(struct dma_desc_array)
-			* ((CRC_MAX_DMA_DESC + 1) << 1);
-
-	writel(0, &crc->regs->control);
-	crc->poly = (u32)pdev->dev.platform_data;
-	writel(crc->poly, &crc->regs->poly);
-
-	while (!(readl(&crc->regs->status) & LUTDONE) && (--timeout) > 0)
-		cpu_relax();
-
-	if (timeout == 0)
-		dev_info(&pdev->dev, "init crc poly timeout\n");
-
-	platform_set_drvdata(pdev, crc);
-
-	spin_lock(&crc_list.lock);
-	list_add(&crc->list, &crc_list.dev_list);
-	spin_unlock(&crc_list.lock);
-
-	if (list_is_singular(&crc_list.dev_list)) {
-		ret = crypto_register_ahash(&algs);
-		if (ret) {
-			dev_err(&pdev->dev,
-				"Can't register crypto ahash device\n");
-			goto out_error_dma;
-		}
-	}
-
-	dev_info(&pdev->dev, "initialized\n");
-
-	return 0;
-
-out_error_dma:
-	if (crc->sg_cpu)
-		dma_free_coherent(&pdev->dev, PAGE_SIZE, crc->sg_cpu, crc->sg_dma);
-	free_dma(crc->dma_ch);
-
-	return ret;
-}
-
-/**
- *	bfin_crypto_crc_remove - Initialize module
- *
- */
-static int bfin_crypto_crc_remove(struct platform_device *pdev)
-{
-	struct bfin_crypto_crc *crc = platform_get_drvdata(pdev);
-
-	if (!crc)
-		return -ENODEV;
-
-	spin_lock(&crc_list.lock);
-	list_del(&crc->list);
-	spin_unlock(&crc_list.lock);
-
-	crypto_unregister_ahash(&algs);
-	tasklet_kill(&crc->done_task);
-	free_dma(crc->dma_ch);
-
-	return 0;
-}
-
-static struct platform_driver bfin_crypto_crc_driver = {
-	.probe     = bfin_crypto_crc_probe,
-	.remove    = bfin_crypto_crc_remove,
-	.suspend   = bfin_crypto_crc_suspend,
-	.resume    = bfin_crypto_crc_resume,
-	.driver    = {
-		.name  = DRIVER_NAME,
-	},
-};
-
-/**
- *	bfin_crypto_crc_mod_init - Initialize module
- *
- *	Checks the module params and registers the platform driver.
- *	Real work is in the platform probe function.
- */
-static int __init bfin_crypto_crc_mod_init(void)
-{
-	int ret;
-
-	pr_info("Blackfin hardware CRC crypto driver\n");
-
-	INIT_LIST_HEAD(&crc_list.dev_list);
-	spin_lock_init(&crc_list.lock);
-
-	ret = platform_driver_register(&bfin_crypto_crc_driver);
-	if (ret) {
-		pr_err("unable to register driver\n");
-		return ret;
-	}
-
-	return 0;
-}
-
-/**
- *	bfin_crypto_crc_mod_exit - Deinitialize module
- */
-static void __exit bfin_crypto_crc_mod_exit(void)
-{
-	platform_driver_unregister(&bfin_crypto_crc_driver);
-}
-
-module_init(bfin_crypto_crc_mod_init);
-module_exit(bfin_crypto_crc_mod_exit);
-
-MODULE_AUTHOR("Sonic Zhang <sonic.zhang@analog.com>");
-MODULE_DESCRIPTION("Blackfin CRC hardware crypto driver");
-MODULE_LICENSE("GPL");

+ 0 - 124
drivers/crypto/bfin_crc.h

@@ -1,124 +0,0 @@
-/*
- * bfin_crc.h - interface to Blackfin CRC controllers
- *
- * Copyright 2012 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#ifndef __BFIN_CRC_H__
-#define __BFIN_CRC_H__
-
-/* Function driver which use hardware crc must initialize the structure */
-struct crc_info {
-	/* Input data address */
-	unsigned char *in_addr;
-	/* Output data address */
-	unsigned char *out_addr;
-	/* Input or output bytes */
-	unsigned long datasize;
-	union {
-	/* CRC to compare with that of input buffer */
-	unsigned long crc_compare;
-	/* Value to compare with input data */
-	unsigned long val_verify;
-	/* Value to fill */
-	unsigned long val_fill;
-	};
-	/* Value to program the 32b CRC Polynomial */
-	unsigned long crc_poly;
-	union {
-	/* CRC calculated from the input data */
-	unsigned long crc_result;
-	/* First failed position to verify input data */
-	unsigned long pos_verify;
-	};
-	/* CRC mirror flags */
-	unsigned int bitmirr:1;
-	unsigned int bytmirr:1;
-	unsigned int w16swp:1;
-	unsigned int fdsel:1;
-	unsigned int rsltmirr:1;
-	unsigned int polymirr:1;
-	unsigned int cmpmirr:1;
-};
-
-/* Userspace interface */
-#define CRC_IOC_MAGIC		'C'
-#define CRC_IOC_CALC_CRC	_IOWR('C', 0x01, unsigned int)
-#define CRC_IOC_MEMCPY_CRC	_IOWR('C', 0x02, unsigned int)
-#define CRC_IOC_VERIFY_VAL	_IOWR('C', 0x03, unsigned int)
-#define CRC_IOC_FILL_VAL	_IOWR('C', 0x04, unsigned int)
-
-
-#ifdef __KERNEL__
-
-#include <linux/types.h>
-#include <linux/spinlock.h>
-
-struct crc_register {
-	u32 control;
-	u32 datacnt;
-	u32 datacntrld;
-	u32 __pad_1[2];
-	u32 compare;
-	u32 fillval;
-	u32 datafifo;
-	u32 intren;
-	u32 intrenset;
-	u32 intrenclr;
-	u32 poly;
-	u32 __pad_2[4];
-	u32 status;
-	u32 datacntcap;
-	u32 __pad_3;
-	u32 result;
-	u32 curresult;
-	u32 __pad_4[3];
-	u32 revid;
-};
-
-/* CRC_STATUS Masks */
-#define CMPERR			0x00000002	/* Compare error */
-#define DCNTEXP			0x00000010	/* datacnt register expired */
-#define IBR			0x00010000	/* Input buffer ready */
-#define OBR			0x00020000	/* Output buffer ready */
-#define IRR			0x00040000	/* Immediate result readt */
-#define LUTDONE			0x00080000	/* Look-up table generation done */
-#define FSTAT			0x00700000	/* FIFO status */
-#define MAX_FIFO		4		/* Max fifo size */
-
-/* CRC_CONTROL Masks */
-#define BLKEN			0x00000001	/* Block enable */
-#define OPMODE			0x000000F0	/* Operation mode */
-#define OPMODE_OFFSET		4		/* Operation mode mask offset*/
-#define MODE_DMACPY_CRC		1		/* MTM CRC compute and compare */
-#define MODE_DATA_FILL		2		/* MTM data fill */
-#define MODE_CALC_CRC		3		/* MSM CRC compute and compare */
-#define MODE_DATA_VERIFY	4		/* MSM data verify */
-#define AUTOCLRZ		0x00000100	/* Auto clear to zero */
-#define AUTOCLRF		0x00000200	/* Auto clear to one */
-#define OBRSTALL		0x00001000	/* Stall on output buffer ready */
-#define IRRSTALL		0x00002000	/* Stall on immediate result ready */
-#define BITMIRR			0x00010000	/* Mirror bits within each byte of 32-bit input data */
-#define BITMIRR_OFFSET		16		/* Mirror bits offset */
-#define BYTMIRR			0x00020000	/* Mirror bytes of 32-bit input data */
-#define BYTMIRR_OFFSET		17		/* Mirror bytes offset */
-#define W16SWP			0x00040000	/* Mirror uppper and lower 16-bit word of 32-bit input data */
-#define W16SWP_OFFSET		18		/* Mirror 16-bit word offset */
-#define FDSEL			0x00080000	/* FIFO is written after input data is mirrored */
-#define FDSEL_OFFSET		19		/* Mirror FIFO offset */
-#define RSLTMIRR		0x00100000	/* CRC result registers are mirrored. */
-#define RSLTMIRR_OFFSET		20		/* Mirror CRC result offset. */
-#define POLYMIRR		0x00200000	/* CRC poly register is mirrored. */
-#define POLYMIRR_OFFSET		21		/* Mirror CRC poly offset. */
-#define CMPMIRR			0x00400000	/* CRC compare register is mirrored. */
-#define CMPMIRR_OFFSET		22		/* Mirror CRC compare offset. */
-
-/* CRC_INTREN Masks */
-#define CMPERRI 		0x02		/* CRC_ERROR_INTR */
-#define DCNTEXPI 		0x10		/* CRC_STATUS_INTR */
-
-#endif
-
-#endif

+ 15 - 6
drivers/crypto/caam/caamalg.c

@@ -328,6 +328,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 {
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 *desc;
 	u32 *desc;
 	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
 	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
 			ctx->cdata.keylen;
 			ctx->cdata.keylen;
@@ -349,7 +350,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	}
 	}
 
 
 	desc = ctx->sh_desc_enc;
 	desc = ctx->sh_desc_enc;
-	cnstr_shdsc_gcm_encap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_gcm_encap(desc, &ctx->cdata, ivsize, ctx->authsize, false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 				   desc_bytes(desc), ctx->dir);
 				   desc_bytes(desc), ctx->dir);
 
 
@@ -366,7 +367,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	}
 	}
 
 
 	desc = ctx->sh_desc_dec;
 	desc = ctx->sh_desc_dec;
-	cnstr_shdsc_gcm_decap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_gcm_decap(desc, &ctx->cdata, ivsize, ctx->authsize, false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 				   desc_bytes(desc), ctx->dir);
 				   desc_bytes(desc), ctx->dir);
 
 
@@ -387,6 +388,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 {
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 *desc;
 	u32 *desc;
 	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
 	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
 			ctx->cdata.keylen;
 			ctx->cdata.keylen;
@@ -408,7 +410,8 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	}
 	}
 
 
 	desc = ctx->sh_desc_enc;
 	desc = ctx->sh_desc_enc;
-	cnstr_shdsc_rfc4106_encap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_rfc4106_encap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 				   desc_bytes(desc), ctx->dir);
 				   desc_bytes(desc), ctx->dir);
 
 
@@ -425,7 +428,8 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	}
 	}
 
 
 	desc = ctx->sh_desc_dec;
 	desc = ctx->sh_desc_dec;
-	cnstr_shdsc_rfc4106_decap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_rfc4106_decap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 				   desc_bytes(desc), ctx->dir);
 				   desc_bytes(desc), ctx->dir);
 
 
@@ -447,6 +451,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 {
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 *desc;
 	u32 *desc;
 	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
 	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
 			ctx->cdata.keylen;
 			ctx->cdata.keylen;
@@ -468,7 +473,8 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	}
 	}
 
 
 	desc = ctx->sh_desc_enc;
 	desc = ctx->sh_desc_enc;
-	cnstr_shdsc_rfc4543_encap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_rfc4543_encap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_enc_dma,
 				   desc_bytes(desc), ctx->dir);
 				   desc_bytes(desc), ctx->dir);
 
 
@@ -485,7 +491,8 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	}
 	}
 
 
 	desc = ctx->sh_desc_dec;
 	desc = ctx->sh_desc_dec;
-	cnstr_shdsc_rfc4543_decap(desc, &ctx->cdata, ctx->authsize);
+	cnstr_shdsc_rfc4543_decap(desc, &ctx->cdata, ivsize, ctx->authsize,
+				  false);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_dec_dma,
 				   desc_bytes(desc), ctx->dir);
 				   desc_bytes(desc), ctx->dir);
 
 
@@ -563,9 +570,11 @@ static int aead_setkey(struct crypto_aead *aead,
 
 
 skip_split_key:
 skip_split_key:
 	ctx->cdata.keylen = keys.enckeylen;
 	ctx->cdata.keylen = keys.enckeylen;
+	memzero_explicit(&keys, sizeof(keys));
 	return aead_set_sh_desc(aead);
 	return aead_set_sh_desc(aead);
 badkey:
 badkey:
 	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 	return -EINVAL;
 }
 }
 
 

+ 152 - 13
drivers/crypto/caam/caamalg_desc.c

@@ -625,10 +625,13 @@ EXPORT_SYMBOL(cnstr_shdsc_aead_givencap);
  * @desc: pointer to buffer used for descriptor construction
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
  */
 void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
-			   unsigned int icvsize)
+			   unsigned int ivsize, unsigned int icvsize,
+			   const bool is_qi)
 {
 {
 	u32 *key_jump_cmd, *zero_payload_jump_cmd, *zero_assoc_jump_cmd1,
 	u32 *key_jump_cmd, *zero_payload_jump_cmd, *zero_assoc_jump_cmd1,
 	    *zero_assoc_jump_cmd2;
 	    *zero_assoc_jump_cmd2;
@@ -650,11 +653,35 @@ void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_ENCRYPT);
 			 OP_ALG_ENCRYPT);
 
 
+	if (is_qi) {
+		u32 *wait_load_cmd;
+
+		/* REG3 = assoclen */
+		append_seq_load(desc, 4, LDST_CLASS_DECO |
+				LDST_SRCDST_WORD_DECO_MATH3 |
+				(4 << LDST_OFFSET_SHIFT));
+
+		wait_load_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_CALM | JUMP_COND_NCP |
+					    JUMP_COND_NOP | JUMP_COND_NIP |
+					    JUMP_COND_NIFP);
+		set_jump_tgt_here(desc, wait_load_cmd);
+
+		append_math_sub_imm_u32(desc, VARSEQOUTLEN, SEQINLEN, IMM,
+					ivsize);
+	} else {
+		append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0,
+				CAAM_CMD_SZ);
+	}
+
 	/* if assoclen + cryptlen is ZERO, skip to ICV write */
 	/* if assoclen + cryptlen is ZERO, skip to ICV write */
-	append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
 	zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL |
 	zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL |
 						 JUMP_COND_MATH_Z);
 						 JUMP_COND_MATH_Z);
 
 
+	if (is_qi)
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+
 	/* if assoclen is ZERO, skip reading the assoc data */
 	/* if assoclen is ZERO, skip reading the assoc data */
 	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
 	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
 	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
 	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
@@ -686,8 +713,11 @@ void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
 			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
 
 
-	/* jump the zero-payload commands */
-	append_jump(desc, JUMP_TEST_ALL | 2);
+	/* jump to ICV writing */
+	if (is_qi)
+		append_jump(desc, JUMP_TEST_ALL | 4);
+	else
+		append_jump(desc, JUMP_TEST_ALL | 2);
 
 
 	/* zero-payload commands */
 	/* zero-payload commands */
 	set_jump_tgt_here(desc, zero_payload_jump_cmd);
 	set_jump_tgt_here(desc, zero_payload_jump_cmd);
@@ -695,10 +725,18 @@ void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 	/* read assoc data */
 	/* read assoc data */
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST1);
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST1);
+	if (is_qi)
+		/* jump to ICV writing */
+		append_jump(desc, JUMP_TEST_ALL | 2);
 
 
 	/* There is no input data */
 	/* There is no input data */
 	set_jump_tgt_here(desc, zero_assoc_jump_cmd2);
 	set_jump_tgt_here(desc, zero_assoc_jump_cmd2);
 
 
+	if (is_qi)
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 |
+				     FIFOLD_TYPE_LAST1);
+
 	/* write ICV */
 	/* write ICV */
 	append_seq_store(desc, icvsize, LDST_CLASS_1_CCB |
 	append_seq_store(desc, icvsize, LDST_CLASS_1_CCB |
 			 LDST_SRCDST_BYTE_CONTEXT);
 			 LDST_SRCDST_BYTE_CONTEXT);
@@ -715,10 +753,13 @@ EXPORT_SYMBOL(cnstr_shdsc_gcm_encap);
  * @desc: pointer to buffer used for descriptor construction
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
  */
 void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
 void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
-			   unsigned int icvsize)
+			   unsigned int ivsize, unsigned int icvsize,
+			   const bool is_qi)
 {
 {
 	u32 *key_jump_cmd, *zero_payload_jump_cmd, *zero_assoc_jump_cmd1;
 	u32 *key_jump_cmd, *zero_payload_jump_cmd, *zero_assoc_jump_cmd1;
 
 
@@ -739,6 +780,24 @@ void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 
 
+	if (is_qi) {
+		u32 *wait_load_cmd;
+
+		/* REG3 = assoclen */
+		append_seq_load(desc, 4, LDST_CLASS_DECO |
+				LDST_SRCDST_WORD_DECO_MATH3 |
+				(4 << LDST_OFFSET_SHIFT));
+
+		wait_load_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_CALM | JUMP_COND_NCP |
+					    JUMP_COND_NOP | JUMP_COND_NIP |
+					    JUMP_COND_NIFP);
+		set_jump_tgt_here(desc, wait_load_cmd);
+
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
 	/* if assoclen is ZERO, skip reading the assoc data */
 	/* if assoclen is ZERO, skip reading the assoc data */
 	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
 	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
 	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
 	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
@@ -791,10 +850,13 @@ EXPORT_SYMBOL(cnstr_shdsc_gcm_decap);
  * @desc: pointer to buffer used for descriptor construction
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
  */
 void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
 void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize)
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi)
 {
 {
 	u32 *key_jump_cmd;
 	u32 *key_jump_cmd;
 
 
@@ -815,7 +877,29 @@ void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_ENCRYPT);
 			 OP_ALG_ENCRYPT);
 
 
-	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
+	if (is_qi) {
+		u32 *wait_load_cmd;
+
+		/* REG3 = assoclen */
+		append_seq_load(desc, 4, LDST_CLASS_DECO |
+				LDST_SRCDST_WORD_DECO_MATH3 |
+				(4 << LDST_OFFSET_SHIFT));
+
+		wait_load_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_CALM | JUMP_COND_NCP |
+					    JUMP_COND_NOP | JUMP_COND_NIP |
+					    JUMP_COND_NIFP);
+		set_jump_tgt_here(desc, wait_load_cmd);
+
+		/* Read salt and IV */
+		append_fifo_load_as_imm(desc, (void *)(cdata->key_virt +
+					cdata->keylen), 4, FIFOLD_CLASS_CLASS1 |
+					FIFOLD_TYPE_IV);
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
+	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, ivsize);
 	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
 
 	/* Read assoc data */
 	/* Read assoc data */
@@ -823,7 +907,7 @@ void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
 
 
 	/* Skip IV */
 	/* Skip IV */
-	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_SKIP);
 
 
 	/* Will read cryptlen bytes */
 	/* Will read cryptlen bytes */
 	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
 	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
@@ -862,10 +946,13 @@ EXPORT_SYMBOL(cnstr_shdsc_rfc4106_encap);
  * @desc: pointer to buffer used for descriptor construction
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
  */
 void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
 void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize)
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi)
 {
 {
 	u32 *key_jump_cmd;
 	u32 *key_jump_cmd;
 
 
@@ -887,7 +974,29 @@ void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 
 
-	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
+	if (is_qi) {
+		u32 *wait_load_cmd;
+
+		/* REG3 = assoclen */
+		append_seq_load(desc, 4, LDST_CLASS_DECO |
+				LDST_SRCDST_WORD_DECO_MATH3 |
+				(4 << LDST_OFFSET_SHIFT));
+
+		wait_load_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_CALM | JUMP_COND_NCP |
+					    JUMP_COND_NOP | JUMP_COND_NIP |
+					    JUMP_COND_NIFP);
+		set_jump_tgt_here(desc, wait_load_cmd);
+
+		/* Read salt and IV */
+		append_fifo_load_as_imm(desc, (void *)(cdata->key_virt +
+					cdata->keylen), 4, FIFOLD_CLASS_CLASS1 |
+					FIFOLD_TYPE_IV);
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
+	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, ivsize);
 	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
 
 	/* Read assoc data */
 	/* Read assoc data */
@@ -895,7 +1004,7 @@ void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
 
 
 	/* Skip IV */
 	/* Skip IV */
-	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_SKIP);
 
 
 	/* Will read cryptlen bytes */
 	/* Will read cryptlen bytes */
 	append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG3, CAAM_CMD_SZ);
 	append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG3, CAAM_CMD_SZ);
@@ -934,10 +1043,13 @@ EXPORT_SYMBOL(cnstr_shdsc_rfc4106_decap);
  * @desc: pointer to buffer used for descriptor construction
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
  */
 void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
 void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize)
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi)
 {
 {
 	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
 	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
 
 
@@ -958,6 +1070,18 @@ void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_ENCRYPT);
 			 OP_ALG_ENCRYPT);
 
 
+	if (is_qi) {
+		/* assoclen is not needed, skip it */
+		append_seq_fifo_load(desc, 4, FIFOLD_CLASS_SKIP);
+
+		/* Read salt and IV */
+		append_fifo_load_as_imm(desc, (void *)(cdata->key_virt +
+					cdata->keylen), 4, FIFOLD_CLASS_CLASS1 |
+					FIFOLD_TYPE_IV);
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
 	/* assoclen + cryptlen = seqinlen */
 	/* assoclen + cryptlen = seqinlen */
 	append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ);
 	append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ);
 
 
@@ -1004,10 +1128,13 @@ EXPORT_SYMBOL(cnstr_shdsc_rfc4543_encap);
  * @desc: pointer to buffer used for descriptor construction
  * @desc: pointer to buffer used for descriptor construction
  * @cdata: pointer to block cipher transform definitions
  * @cdata: pointer to block cipher transform definitions
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
  *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
  * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_qi: true when called from caam/qi
  */
  */
 void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
 void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize)
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi)
 {
 {
 	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
 	u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
 
 
@@ -1028,6 +1155,18 @@ void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 	append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
 			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 			 OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 
 
+	if (is_qi) {
+		/* assoclen is not needed, skip it */
+		append_seq_fifo_load(desc, 4, FIFOLD_CLASS_SKIP);
+
+		/* Read salt and IV */
+		append_fifo_load_as_imm(desc, (void *)(cdata->key_virt +
+					cdata->keylen), 4, FIFOLD_CLASS_CLASS1 |
+					FIFOLD_TYPE_IV);
+		append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
+				     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	}
+
 	/* assoclen + cryptlen = seqoutlen */
 	/* assoclen + cryptlen = seqoutlen */
 	append_math_sub(desc, REG3, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 	append_math_sub(desc, REG3, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 
 

+ 18 - 6
drivers/crypto/caam/caamalg_desc.h

@@ -27,14 +27,20 @@
 #define DESC_GCM_BASE			(3 * CAAM_CMD_SZ)
 #define DESC_GCM_BASE			(3 * CAAM_CMD_SZ)
 #define DESC_GCM_ENC_LEN		(DESC_GCM_BASE + 16 * CAAM_CMD_SZ)
 #define DESC_GCM_ENC_LEN		(DESC_GCM_BASE + 16 * CAAM_CMD_SZ)
 #define DESC_GCM_DEC_LEN		(DESC_GCM_BASE + 12 * CAAM_CMD_SZ)
 #define DESC_GCM_DEC_LEN		(DESC_GCM_BASE + 12 * CAAM_CMD_SZ)
+#define DESC_QI_GCM_ENC_LEN		(DESC_GCM_ENC_LEN + 6 * CAAM_CMD_SZ)
+#define DESC_QI_GCM_DEC_LEN		(DESC_GCM_DEC_LEN + 3 * CAAM_CMD_SZ)
 
 
 #define DESC_RFC4106_BASE		(3 * CAAM_CMD_SZ)
 #define DESC_RFC4106_BASE		(3 * CAAM_CMD_SZ)
 #define DESC_RFC4106_ENC_LEN		(DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
 #define DESC_RFC4106_ENC_LEN		(DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
 #define DESC_RFC4106_DEC_LEN		(DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
 #define DESC_RFC4106_DEC_LEN		(DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
+#define DESC_QI_RFC4106_ENC_LEN		(DESC_RFC4106_ENC_LEN + 5 * CAAM_CMD_SZ)
+#define DESC_QI_RFC4106_DEC_LEN		(DESC_RFC4106_DEC_LEN + 5 * CAAM_CMD_SZ)
 
 
 #define DESC_RFC4543_BASE		(3 * CAAM_CMD_SZ)
 #define DESC_RFC4543_BASE		(3 * CAAM_CMD_SZ)
 #define DESC_RFC4543_ENC_LEN		(DESC_RFC4543_BASE + 11 * CAAM_CMD_SZ)
 #define DESC_RFC4543_ENC_LEN		(DESC_RFC4543_BASE + 11 * CAAM_CMD_SZ)
 #define DESC_RFC4543_DEC_LEN		(DESC_RFC4543_BASE + 12 * CAAM_CMD_SZ)
 #define DESC_RFC4543_DEC_LEN		(DESC_RFC4543_BASE + 12 * CAAM_CMD_SZ)
+#define DESC_QI_RFC4543_ENC_LEN		(DESC_RFC4543_ENC_LEN + 4 * CAAM_CMD_SZ)
+#define DESC_QI_RFC4543_DEC_LEN		(DESC_RFC4543_DEC_LEN + 4 * CAAM_CMD_SZ)
 
 
 #define DESC_ABLKCIPHER_BASE		(3 * CAAM_CMD_SZ)
 #define DESC_ABLKCIPHER_BASE		(3 * CAAM_CMD_SZ)
 #define DESC_ABLKCIPHER_ENC_LEN		(DESC_ABLKCIPHER_BASE + \
 #define DESC_ABLKCIPHER_ENC_LEN		(DESC_ABLKCIPHER_BASE + \
@@ -67,22 +73,28 @@ void cnstr_shdsc_aead_givencap(u32 * const desc, struct alginfo *cdata,
 			       const bool is_qi, int era);
 			       const bool is_qi, int era);
 
 
 void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
-			   unsigned int icvsize);
+			   unsigned int ivsize, unsigned int icvsize,
+			   const bool is_qi);
 
 
 void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
 void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
-			   unsigned int icvsize);
+			   unsigned int ivsize, unsigned int icvsize,
+			   const bool is_qi);
 
 
 void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
 void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize);
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi);
 
 
 void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
 void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize);
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi);
 
 
 void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
 void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize);
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi);
 
 
 void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
 void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
-			       unsigned int icvsize);
+			       unsigned int ivsize, unsigned int icvsize,
+			       const bool is_qi);
 
 
 void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata,
 void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata,
 				  unsigned int ivsize, const bool is_rfc3686,
 				  unsigned int ivsize, const bool is_rfc3686,

+ 387 - 1
drivers/crypto/caam/caamalg_qi.c

@@ -278,12 +278,317 @@ skip_split_key:
 		}
 		}
 	}
 	}
 
 
+	memzero_explicit(&keys, sizeof(keys));
 	return ret;
 	return ret;
 badkey:
 badkey:
 	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	memzero_explicit(&keys, sizeof(keys));
 	return -EINVAL;
 	return -EINVAL;
 }
 }
 
 
+static int gcm_set_sh_desc(struct crypto_aead *aead)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
+
+	if (!ctx->cdata.keylen || !ctx->authsize)
+		return 0;
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_GCM_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+		ctx->cdata.key_virt = ctx->key;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_gcm_encap(ctx->sh_desc_enc, &ctx->cdata, ivsize,
+			      ctx->authsize, true);
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_GCM_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+		ctx->cdata.key_virt = ctx->key;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_gcm_decap(ctx->sh_desc_dec, &ctx->cdata, ivsize,
+			      ctx->authsize, true);
+
+	return 0;
+}
+
+static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+
+	ctx->authsize = authsize;
+	gcm_set_sh_desc(authenc);
+
+	return 0;
+}
+
+static int gcm_setkey(struct crypto_aead *aead,
+		      const u8 *key, unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	int ret;
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+#endif
+
+	memcpy(ctx->key, key, keylen);
+	dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, ctx->dir);
+	ctx->cdata.keylen = keylen;
+
+	ret = gcm_set_sh_desc(aead);
+	if (ret)
+		return ret;
+
+	/* Now update the driver contexts with the new shared descriptor */
+	if (ctx->drv_ctx[ENCRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[ENCRYPT],
+					  ctx->sh_desc_enc);
+		if (ret) {
+			dev_err(jrdev, "driver enc context update failed\n");
+			return ret;
+		}
+	}
+
+	if (ctx->drv_ctx[DECRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[DECRYPT],
+					  ctx->sh_desc_dec);
+		if (ret) {
+			dev_err(jrdev, "driver dec context update failed\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int rfc4106_set_sh_desc(struct crypto_aead *aead)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
+
+	if (!ctx->cdata.keylen || !ctx->authsize)
+		return 0;
+
+	ctx->cdata.key_virt = ctx->key;
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4106_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_rfc4106_encap(ctx->sh_desc_enc, &ctx->cdata, ivsize,
+				  ctx->authsize, true);
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4106_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_rfc4106_decap(ctx->sh_desc_dec, &ctx->cdata, ivsize,
+				  ctx->authsize, true);
+
+	return 0;
+}
+
+static int rfc4106_setauthsize(struct crypto_aead *authenc,
+			       unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+
+	ctx->authsize = authsize;
+	rfc4106_set_sh_desc(authenc);
+
+	return 0;
+}
+
+static int rfc4106_setkey(struct crypto_aead *aead,
+			  const u8 *key, unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	int ret;
+
+	if (keylen < 4)
+		return -EINVAL;
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+#endif
+
+	memcpy(ctx->key, key, keylen);
+	/*
+	 * The last four bytes of the key material are used as the salt value
+	 * in the nonce. Update the AES key length.
+	 */
+	ctx->cdata.keylen = keylen - 4;
+	dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->cdata.keylen,
+				   ctx->dir);
+
+	ret = rfc4106_set_sh_desc(aead);
+	if (ret)
+		return ret;
+
+	/* Now update the driver contexts with the new shared descriptor */
+	if (ctx->drv_ctx[ENCRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[ENCRYPT],
+					  ctx->sh_desc_enc);
+		if (ret) {
+			dev_err(jrdev, "driver enc context update failed\n");
+			return ret;
+		}
+	}
+
+	if (ctx->drv_ctx[DECRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[DECRYPT],
+					  ctx->sh_desc_dec);
+		if (ret) {
+			dev_err(jrdev, "driver dec context update failed\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int rfc4543_set_sh_desc(struct crypto_aead *aead)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
+
+	if (!ctx->cdata.keylen || !ctx->authsize)
+		return 0;
+
+	ctx->cdata.key_virt = ctx->key;
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4543_ENC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_rfc4543_encap(ctx->sh_desc_enc, &ctx->cdata, ivsize,
+				  ctx->authsize, true);
+
+	/*
+	 * Job Descriptor and Shared Descriptor
+	 * must fit into the 64-word Descriptor h/w Buffer
+	 */
+	if (rem_bytes >= DESC_QI_RFC4543_DEC_LEN) {
+		ctx->cdata.key_inline = true;
+	} else {
+		ctx->cdata.key_inline = false;
+		ctx->cdata.key_dma = ctx->key_dma;
+	}
+
+	cnstr_shdsc_rfc4543_decap(ctx->sh_desc_dec, &ctx->cdata, ivsize,
+				  ctx->authsize, true);
+
+	return 0;
+}
+
+static int rfc4543_setauthsize(struct crypto_aead *authenc,
+			       unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+
+	ctx->authsize = authsize;
+	rfc4543_set_sh_desc(authenc);
+
+	return 0;
+}
+
+static int rfc4543_setkey(struct crypto_aead *aead,
+			  const u8 *key, unsigned int keylen)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	int ret;
+
+	if (keylen < 4)
+		return -EINVAL;
+
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "key in @" __stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
+#endif
+
+	memcpy(ctx->key, key, keylen);
+	/*
+	 * The last four bytes of the key material are used as the salt value
+	 * in the nonce. Update the AES key length.
+	 */
+	ctx->cdata.keylen = keylen - 4;
+	dma_sync_single_for_device(jrdev, ctx->key_dma, ctx->cdata.keylen,
+				   ctx->dir);
+
+	ret = rfc4543_set_sh_desc(aead);
+	if (ret)
+		return ret;
+
+	/* Now update the driver contexts with the new shared descriptor */
+	if (ctx->drv_ctx[ENCRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[ENCRYPT],
+					  ctx->sh_desc_enc);
+		if (ret) {
+			dev_err(jrdev, "driver enc context update failed\n");
+			return ret;
+		}
+	}
+
+	if (ctx->drv_ctx[DECRYPT]) {
+		ret = caam_drv_ctx_update(ctx->drv_ctx[DECRYPT],
+					  ctx->sh_desc_dec);
+		if (ret) {
+			dev_err(jrdev, "driver dec context update failed\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 			     const u8 *key, unsigned int keylen)
 			     const u8 *key, unsigned int keylen)
 {
 {
@@ -562,8 +867,18 @@ static void aead_done(struct caam_drv_req *drv_req, u32 status)
 	qidev = caam_ctx->qidev;
 	qidev = caam_ctx->qidev;
 
 
 	if (unlikely(status)) {
 	if (unlikely(status)) {
+		u32 ssrc = status & JRSTA_SSRC_MASK;
+		u8 err_id = status & JRSTA_CCBERR_ERRID_MASK;
+
 		caam_jr_strstatus(qidev, status);
 		caam_jr_strstatus(qidev, status);
-		ecode = -EIO;
+		/*
+		 * verify hw auth check passed else return -EBADMSG
+		 */
+		if (ssrc == JRSTA_SSRC_CCB_ERROR &&
+		    err_id == JRSTA_CCBERR_ERRID_ICVCHK)
+			ecode = -EBADMSG;
+		else
+			ecode = -EIO;
 	}
 	}
 
 
 	edesc = container_of(drv_req, typeof(*edesc), drv_req);
 	edesc = container_of(drv_req, typeof(*edesc), drv_req);
@@ -807,6 +1122,22 @@ static int aead_decrypt(struct aead_request *req)
 	return aead_crypt(req, false);
 	return aead_crypt(req, false);
 }
 }
 
 
+static int ipsec_gcm_encrypt(struct aead_request *req)
+{
+	if (req->assoclen < 8)
+		return -EINVAL;
+
+	return aead_crypt(req, true);
+}
+
+static int ipsec_gcm_decrypt(struct aead_request *req)
+{
+	if (req->assoclen < 8)
+		return -EINVAL;
+
+	return aead_crypt(req, false);
+}
+
 static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
 static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
 {
 {
 	struct ablkcipher_edesc *edesc;
 	struct ablkcipher_edesc *edesc;
@@ -1327,6 +1658,61 @@ static struct caam_alg_template driver_algs[] = {
 };
 };
 
 
 static struct caam_aead_alg driver_aeads[] = {
 static struct caam_aead_alg driver_aeads[] = {
+	{
+		.aead = {
+			.base = {
+				.cra_name = "rfc4106(gcm(aes))",
+				.cra_driver_name = "rfc4106-gcm-aes-caam-qi",
+				.cra_blocksize = 1,
+			},
+			.setkey = rfc4106_setkey,
+			.setauthsize = rfc4106_setauthsize,
+			.encrypt = ipsec_gcm_encrypt,
+			.decrypt = ipsec_gcm_decrypt,
+			.ivsize = 8,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "rfc4543(gcm(aes))",
+				.cra_driver_name = "rfc4543-gcm-aes-caam-qi",
+				.cra_blocksize = 1,
+			},
+			.setkey = rfc4543_setkey,
+			.setauthsize = rfc4543_setauthsize,
+			.encrypt = ipsec_gcm_encrypt,
+			.decrypt = ipsec_gcm_decrypt,
+			.ivsize = 8,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		},
+	},
+	/* Galois Counter Mode */
+	{
+		.aead = {
+			.base = {
+				.cra_name = "gcm(aes)",
+				.cra_driver_name = "gcm-aes-caam-qi",
+				.cra_blocksize = 1,
+			},
+			.setkey = gcm_setkey,
+			.setauthsize = gcm_setauthsize,
+			.encrypt = aead_encrypt,
+			.decrypt = aead_decrypt,
+			.ivsize = 12,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		}
+	},
 	/* single-pass ipsec_esp descriptor */
 	/* single-pass ipsec_esp descriptor */
 	{
 	{
 		.aead = {
 		.aead = {

+ 24 - 18
drivers/crypto/caam/ctrl.c

@@ -337,7 +337,8 @@ static int caam_remove(struct platform_device *pdev)
 
 
 	/* shut clocks off before finalizing shutdown */
 	/* shut clocks off before finalizing shutdown */
 	clk_disable_unprepare(ctrlpriv->caam_ipg);
 	clk_disable_unprepare(ctrlpriv->caam_ipg);
-	clk_disable_unprepare(ctrlpriv->caam_mem);
+	if (ctrlpriv->caam_mem)
+		clk_disable_unprepare(ctrlpriv->caam_mem);
 	clk_disable_unprepare(ctrlpriv->caam_aclk);
 	clk_disable_unprepare(ctrlpriv->caam_aclk);
 	if (ctrlpriv->caam_emi_slow)
 	if (ctrlpriv->caam_emi_slow)
 		clk_disable_unprepare(ctrlpriv->caam_emi_slow);
 		clk_disable_unprepare(ctrlpriv->caam_emi_slow);
@@ -466,14 +467,17 @@ static int caam_probe(struct platform_device *pdev)
 	}
 	}
 	ctrlpriv->caam_ipg = clk;
 	ctrlpriv->caam_ipg = clk;
 
 
-	clk = caam_drv_identify_clk(&pdev->dev, "mem");
-	if (IS_ERR(clk)) {
-		ret = PTR_ERR(clk);
-		dev_err(&pdev->dev,
-			"can't identify CAAM mem clk: %d\n", ret);
-		return ret;
+	if (!of_machine_is_compatible("fsl,imx7d") &&
+	    !of_machine_is_compatible("fsl,imx7s")) {
+		clk = caam_drv_identify_clk(&pdev->dev, "mem");
+		if (IS_ERR(clk)) {
+			ret = PTR_ERR(clk);
+			dev_err(&pdev->dev,
+				"can't identify CAAM mem clk: %d\n", ret);
+			return ret;
+		}
+		ctrlpriv->caam_mem = clk;
 	}
 	}
-	ctrlpriv->caam_mem = clk;
 
 
 	clk = caam_drv_identify_clk(&pdev->dev, "aclk");
 	clk = caam_drv_identify_clk(&pdev->dev, "aclk");
 	if (IS_ERR(clk)) {
 	if (IS_ERR(clk)) {
@@ -484,7 +488,9 @@ static int caam_probe(struct platform_device *pdev)
 	}
 	}
 	ctrlpriv->caam_aclk = clk;
 	ctrlpriv->caam_aclk = clk;
 
 
-	if (!of_machine_is_compatible("fsl,imx6ul")) {
+	if (!of_machine_is_compatible("fsl,imx6ul") &&
+	    !of_machine_is_compatible("fsl,imx7d") &&
+	    !of_machine_is_compatible("fsl,imx7s")) {
 		clk = caam_drv_identify_clk(&pdev->dev, "emi_slow");
 		clk = caam_drv_identify_clk(&pdev->dev, "emi_slow");
 		if (IS_ERR(clk)) {
 		if (IS_ERR(clk)) {
 			ret = PTR_ERR(clk);
 			ret = PTR_ERR(clk);
@@ -501,11 +507,13 @@ static int caam_probe(struct platform_device *pdev)
 		return ret;
 		return ret;
 	}
 	}
 
 
-	ret = clk_prepare_enable(ctrlpriv->caam_mem);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "can't enable CAAM secure mem clock: %d\n",
-			ret);
-		goto disable_caam_ipg;
+	if (ctrlpriv->caam_mem) {
+		ret = clk_prepare_enable(ctrlpriv->caam_mem);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "can't enable CAAM secure mem clock: %d\n",
+				ret);
+			goto disable_caam_ipg;
+		}
 	}
 	}
 
 
 	ret = clk_prepare_enable(ctrlpriv->caam_aclk);
 	ret = clk_prepare_enable(ctrlpriv->caam_aclk);
@@ -815,9 +823,6 @@ static int caam_probe(struct platform_device *pdev)
 	return 0;
 	return 0;
 
 
 caam_remove:
 caam_remove:
-#ifdef CONFIG_DEBUG_FS
-	debugfs_remove_recursive(ctrlpriv->dfs_root);
-#endif
 	caam_remove(pdev);
 	caam_remove(pdev);
 	return ret;
 	return ret;
 
 
@@ -829,7 +834,8 @@ disable_caam_emi_slow:
 disable_caam_aclk:
 disable_caam_aclk:
 	clk_disable_unprepare(ctrlpriv->caam_aclk);
 	clk_disable_unprepare(ctrlpriv->caam_aclk);
 disable_caam_mem:
 disable_caam_mem:
-	clk_disable_unprepare(ctrlpriv->caam_mem);
+	if (ctrlpriv->caam_mem)
+		clk_disable_unprepare(ctrlpriv->caam_mem);
 disable_caam_ipg:
 disable_caam_ipg:
 	clk_disable_unprepare(ctrlpriv->caam_ipg);
 	clk_disable_unprepare(ctrlpriv->caam_ipg);
 	return ret;
 	return ret;

+ 9 - 2
drivers/crypto/caam/qi.c

@@ -579,8 +579,15 @@ static enum qman_cb_dqrr_result caam_rsp_fq_dqrr_cb(struct qman_portal *p,
 
 
 	fd = &dqrr->fd;
 	fd = &dqrr->fd;
 	status = be32_to_cpu(fd->status);
 	status = be32_to_cpu(fd->status);
-	if (unlikely(status))
-		dev_err(qidev, "Error: %#x in CAAM response FD\n", status);
+	if (unlikely(status)) {
+		u32 ssrc = status & JRSTA_SSRC_MASK;
+		u8 err_id = status & JRSTA_CCBERR_ERRID_MASK;
+
+		if (ssrc != JRSTA_SSRC_CCB_ERROR ||
+		    err_id != JRSTA_CCBERR_ERRID_ICVCHK)
+			dev_err(qidev, "Error: %#x in CAAM response FD\n",
+				status);
+	}
 
 
 	if (unlikely(qm_fd_get_format(fd) != qm_fd_compound)) {
 	if (unlikely(qm_fd_get_format(fd) != qm_fd_compound)) {
 		dev_err(qidev, "Non-compound FD from CAAM\n");
 		dev_err(qidev, "Non-compound FD from CAAM\n");

+ 1 - 1
drivers/crypto/cavium/cpt/cptpf_main.c

@@ -436,7 +436,7 @@ static int cpt_device_init(struct cpt_device *cpt)
 
 
 	/* Reset the PF when probed first */
 	/* Reset the PF when probed first */
 	cpt_reset(cpt);
 	cpt_reset(cpt);
-	mdelay(100);
+	msleep(100);
 
 
 	/*Check BIST status*/
 	/*Check BIST status*/
 	bist = (u64)cpt_check_bist_status(cpt);
 	bist = (u64)cpt_check_bist_status(cpt);

+ 1 - 1
drivers/crypto/ccp/ccp-crypto-aes-cmac.c

@@ -46,7 +46,7 @@ static int ccp_aes_cmac_complete(struct crypto_async_request *async_req,
 	}
 	}
 
 
 	/* Update result area if supplied */
 	/* Update result area if supplied */
-	if (req->result)
+	if (req->result && rctx->final)
 		memcpy(req->result, rctx->iv, digest_size);
 		memcpy(req->result, rctx->iv, digest_size);
 
 
 e_free:
 e_free:

Một số tệp đã không được hiển thị bởi vì quá nhiều tập tin thay đổi trong này khác