فهرست منبع

Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto update from Herbert Xu:
 "Here is the crypto update for 4.2:

  API:

   - Convert RNG interface to new style.

   - New AEAD interface with one SG list for AD and plain/cipher text.
     All external AEAD users have been converted.

   - New asymmetric key interface (akcipher).

  Algorithms:

   - Chacha20, Poly1305 and RFC7539 support.

   - New RSA implementation.

   - Jitter RNG.

   - DRBG is now seeded with both /dev/random and Jitter RNG.  If kernel
     pool isn't ready then DRBG will be reseeded when it is.

   - DRBG is now the default crypto API RNG, replacing krng.

   - 842 compression (previously part of powerpc nx driver).

  Drivers:

   - Accelerated SHA-512 for arm64.

   - New Marvell CESA driver that supports DMA and more algorithms.

   - Updated powerpc nx 842 support.

   - Added support for SEC1 hardware to talitos"

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (292 commits)
  crypto: marvell/cesa - remove COMPILE_TEST dependency
  crypto: algif_aead - Temporarily disable all AEAD algorithms
  crypto: af_alg - Forbid the use internal algorithms
  crypto: echainiv - Only hold RNG during initialisation
  crypto: seqiv - Add compatibility support without RNG
  crypto: eseqiv - Offer normal cipher functionality without RNG
  crypto: chainiv - Offer normal cipher functionality without RNG
  crypto: user - Add CRYPTO_MSG_DELRNG
  crypto: user - Move cryptouser.h to uapi
  crypto: rng - Do not free default RNG when it becomes unused
  crypto: skcipher - Allow givencrypt to be NULL
  crypto: sahara - propagate the error on clk_disable_unprepare() failure
  crypto: rsa - fix invalid select for AKCIPHER
  crypto: picoxcell - Update to the current clk API
  crypto: nx - Check for bogus firmware properties
  crypto: marvell/cesa - add DT bindings documentation
  crypto: marvell/cesa - add support for Kirkwood and Dove SoCs
  crypto: marvell/cesa - add support for Orion SoCs
  crypto: marvell/cesa - add allhwsupport module parameter
  crypto: marvell/cesa - add support for all armada SoCs
  ...
Linus Torvalds 10 سال پیش
والد
کامیت
44d21c3f3a
100فایلهای تغییر یافته به همراه17776 افزوده شده و 3822 حذف شده
  1. 39 28
      Documentation/DocBook/crypto-API.tmpl
  2. 4 2
      Documentation/devicetree/bindings/crypto/fsl-sec2.txt
  3. 45 0
      Documentation/devicetree/bindings/crypto/marvell-cesa.txt
  4. 22 9
      Documentation/devicetree/bindings/crypto/mv_cesa.txt
  5. 13 3
      MAINTAINERS
  6. 4 11
      arch/arm/crypto/Kconfig
  7. 7 3
      arch/arm/crypto/Makefile
  8. 3 4
      arch/arm/crypto/aes-ce-core.S
  9. 649 0
      arch/arm/crypto/sha512-armv4.pl
  10. 0 455
      arch/arm/crypto/sha512-armv7-neon.S
  11. 1861 0
      arch/arm/crypto/sha512-core.S_shipped
  12. 121 0
      arch/arm/crypto/sha512-glue.c
  13. 98 0
      arch/arm/crypto/sha512-neon-glue.c
  14. 8 0
      arch/arm/crypto/sha512.h
  15. 0 305
      arch/arm/crypto/sha512_neon_glue.c
  16. 1 1
      arch/arm64/crypto/aes-ce-ccm-glue.c
  17. 4 4
      arch/mips/cavium-octeon/crypto/octeon-md5.c
  18. 2 0
      arch/nios2/kernel/time.c
  19. 4 4
      arch/powerpc/crypto/md5-glue.c
  20. 184 0
      arch/powerpc/include/asm/icswx.h
  21. 13 0
      arch/powerpc/include/asm/ppc-opcode.h
  22. 1 0
      arch/powerpc/kernel/prom.c
  23. 4 4
      arch/sparc/crypto/md5_glue.c
  24. 167 256
      arch/x86/crypto/aesni-intel_glue.c
  25. 1 1
      arch/x86/crypto/fpu.c
  26. 2 1
      arch/x86/crypto/sha-mb/sha1_mb.c
  27. 37 137
      crypto/842.c
  28. 95 11
      crypto/Kconfig
  29. 14 1
      crypto/Makefile
  30. 10 2
      crypto/ablkcipher.c
  31. 530 154
      crypto/aead.c
  32. 4 1
      crypto/af_alg.c
  33. 117 0
      crypto/akcipher.c
  34. 26 5
      crypto/algapi.c
  35. 11 59
      crypto/algif_aead.c
  36. 1 1
      crypto/algif_rng.c
  37. 41 47
      crypto/ansi_cprng.c
  38. 9 8
      crypto/authenc.c
  39. 9 8
      crypto/authencesn.c
  40. 1 0
      crypto/blkcipher.c
  41. 7 7
      crypto/ccm.c
  42. 216 0
      crypto/chacha20_generic.c
  43. 695 0
      crypto/chacha20poly1305.c
  44. 30 75
      crypto/chainiv.c
  45. 81 54
      crypto/cryptd.c
  46. 39 0
      crypto/crypto_null.c
  47. 33 1
      crypto/crypto_user.c
  48. 298 269
      crypto/drbg.c
  49. 312 0
      crypto/echainiv.c
  50. 12 40
      crypto/eseqiv.c
  51. 52 1
      crypto/fips.c
  52. 385 555
      crypto/gcm.c
  53. 2 1
      crypto/internal.h
  54. 928 0
      crypto/jitterentropy.c
  55. 0 66
      crypto/krng.c
  56. 4 4
      crypto/md5.c
  57. 1 6
      crypto/pcompress.c
  58. 72 127
      crypto/pcrypt.c
  59. 321 0
      crypto/poly1305_generic.c
  60. 0 41
      crypto/proc.c
  61. 107 25
      crypto/rng.c
  62. 315 0
      crypto/rsa.c
  63. 121 0
      crypto/rsa_helper.c
  64. 5 0
      crypto/rsakey.asn1
  65. 34 11
      crypto/scatterwalk.c
  66. 507 80
      crypto/seqiv.c
  67. 1 6
      crypto/shash.c
  68. 21 15
      crypto/tcrypt.c
  69. 1 0
      crypto/tcrypt.h
  70. 275 39
      crypto/testmgr.c
  71. 2429 14
      crypto/testmgr.h
  72. 2 2
      crypto/zlib.c
  73. 117 0
      drivers/bus/mvebu-mbus.c
  74. 79 1
      drivers/char/random.c
  75. 54 33
      drivers/crypto/Kconfig
  76. 1 0
      drivers/crypto/Makefile
  77. 2 3
      drivers/crypto/caam/Kconfig
  78. 771 726
      drivers/crypto/caam/caamalg.c
  79. 5 4
      drivers/crypto/caam/caamhash.c
  80. 1 1
      drivers/crypto/caam/compat.h
  81. 2 2
      drivers/crypto/caam/ctrl.c
  82. 19 19
      drivers/crypto/caam/regs.h
  83. 36 14
      drivers/crypto/caam/sg_sw_sec4.h
  84. 0 1
      drivers/crypto/ccp/Kconfig
  85. 5 4
      drivers/crypto/ccp/ccp-ops.c
  86. 0 2
      drivers/crypto/ccp/ccp-platform.c
  87. 5 4
      drivers/crypto/ixp4xx_crypto.c
  88. 2 0
      drivers/crypto/marvell/Makefile
  89. 548 0
      drivers/crypto/marvell/cesa.c
  90. 791 0
      drivers/crypto/marvell/cesa.h
  91. 797 0
      drivers/crypto/marvell/cipher.c
  92. 1441 0
      drivers/crypto/marvell/hash.c
  93. 224 0
      drivers/crypto/marvell/tdma.c
  94. 50 23
      drivers/crypto/mv_cesa.c
  95. 4 4
      drivers/crypto/n2_core.c
  96. 45 16
      drivers/crypto/nx/Kconfig
  97. 8 1
      drivers/crypto/nx/Makefile
  98. 580 0
      drivers/crypto/nx/nx-842-crypto.c
  99. 84 0
      drivers/crypto/nx/nx-842-platform.c
  100. 637 0
      drivers/crypto/nx/nx-842-powernv.c

+ 39 - 28
Documentation/DocBook/crypto-API.tmpl

@@ -119,7 +119,7 @@
 
     <para>
      Note: The terms "transformation" and cipher algorithm are used
-     interchangably.
+     interchangeably.
     </para>
    </sect1>
 
@@ -536,8 +536,8 @@
 
      <para>
       For other use cases of AEAD ciphers, the ASCII art applies as
-      well, but the caller may not use the GIVCIPHER interface. In
-      this case, the caller must generate the IV.
+      well, but the caller may not use the AEAD cipher with a separate
+      IV generator. In this case, the caller must generate the IV.
      </para>
 
      <para>
@@ -584,8 +584,8 @@ kernel crypto API                                |   IPSEC Layer
                                                  |
 +-----------+                                    |
 |           |            (1)
-| givcipher | <-----------------------------------  esp_output
-|  (seqiv)  | ---+
+|   aead    | <-----------------------------------  esp_output
+| (seqniv)  | ---+
 +-----------+    |
                  | (2)
 +-----------+    |
@@ -620,8 +620,8 @@ kernel crypto API                                |   IPSEC Layer
      <orderedlist>
       <listitem>
        <para>
-        esp_output() invokes crypto_aead_givencrypt() to trigger an encryption
-        operation of the GIVCIPHER implementation.
+        esp_output() invokes crypto_aead_encrypt() to trigger an encryption
+        operation of the AEAD cipher with IV generator.
        </para>
 
        <para>
@@ -1563,7 +1563,7 @@ struct sockaddr_alg sa = {
 
    <sect1><title>Zero-Copy Interface</title>
     <para>
-     In addition to the send/write/read/recv system call familty, the AF_ALG
+     In addition to the send/write/read/recv system call family, the AF_ALG
      interface can be accessed with the zero-copy interface of splice/vmsplice.
      As the name indicates, the kernel tries to avoid a copy operation into
      kernel space.
@@ -1669,9 +1669,19 @@ read(opfd, out, outlen);
   </chapter>
 
   <chapter id="API"><title>Programming Interface</title>
+   <para>
+    Please note that the kernel crypto API contains the AEAD givcrypt
+    API (crypto_aead_giv* and aead_givcrypt_* function calls in
+    include/crypto/aead.h). This API is obsolete and will be removed
+    in the future. To obtain the functionality of an AEAD cipher with
+    internal IV generation, use the IV generator as a regular cipher.
+    For example, rfc4106(gcm(aes)) is the AEAD cipher with external
+    IV generation and seqniv(rfc4106(gcm(aes))) implies that the kernel
+    crypto API generates the IV. Different IV generators are available.
+   </para>
    <sect1><title>Block Cipher Context Data Structures</title>
 !Pinclude/linux/crypto.h Block Cipher Context Data Structures
-!Finclude/linux/crypto.h aead_request
+!Finclude/crypto/aead.h aead_request
    </sect1>
    <sect1><title>Block Cipher Algorithm Definitions</title>
 !Pinclude/linux/crypto.h Block Cipher Algorithm Definitions
@@ -1680,7 +1690,7 @@ read(opfd, out, outlen);
 !Finclude/linux/crypto.h aead_alg
 !Finclude/linux/crypto.h blkcipher_alg
 !Finclude/linux/crypto.h cipher_alg
-!Finclude/linux/crypto.h rng_alg
+!Finclude/crypto/rng.h rng_alg
    </sect1>
    <sect1><title>Asynchronous Block Cipher API</title>
 !Pinclude/linux/crypto.h Asynchronous Block Cipher API
@@ -1704,26 +1714,27 @@ read(opfd, out, outlen);
 !Finclude/linux/crypto.h ablkcipher_request_set_crypt
    </sect1>
    <sect1><title>Authenticated Encryption With Associated Data (AEAD) Cipher API</title>
-!Pinclude/linux/crypto.h Authenticated Encryption With Associated Data (AEAD) Cipher API
-!Finclude/linux/crypto.h crypto_alloc_aead
-!Finclude/linux/crypto.h crypto_free_aead
-!Finclude/linux/crypto.h crypto_aead_ivsize
-!Finclude/linux/crypto.h crypto_aead_authsize
-!Finclude/linux/crypto.h crypto_aead_blocksize
-!Finclude/linux/crypto.h crypto_aead_setkey
-!Finclude/linux/crypto.h crypto_aead_setauthsize
-!Finclude/linux/crypto.h crypto_aead_encrypt
-!Finclude/linux/crypto.h crypto_aead_decrypt
+!Pinclude/crypto/aead.h Authenticated Encryption With Associated Data (AEAD) Cipher API
+!Finclude/crypto/aead.h crypto_alloc_aead
+!Finclude/crypto/aead.h crypto_free_aead
+!Finclude/crypto/aead.h crypto_aead_ivsize
+!Finclude/crypto/aead.h crypto_aead_authsize
+!Finclude/crypto/aead.h crypto_aead_blocksize
+!Finclude/crypto/aead.h crypto_aead_setkey
+!Finclude/crypto/aead.h crypto_aead_setauthsize
+!Finclude/crypto/aead.h crypto_aead_encrypt
+!Finclude/crypto/aead.h crypto_aead_decrypt
    </sect1>
    <sect1><title>Asynchronous AEAD Request Handle</title>
-!Pinclude/linux/crypto.h Asynchronous AEAD Request Handle
-!Finclude/linux/crypto.h crypto_aead_reqsize
-!Finclude/linux/crypto.h aead_request_set_tfm
-!Finclude/linux/crypto.h aead_request_alloc
-!Finclude/linux/crypto.h aead_request_free
-!Finclude/linux/crypto.h aead_request_set_callback
-!Finclude/linux/crypto.h aead_request_set_crypt
-!Finclude/linux/crypto.h aead_request_set_assoc
+!Pinclude/crypto/aead.h Asynchronous AEAD Request Handle
+!Finclude/crypto/aead.h crypto_aead_reqsize
+!Finclude/crypto/aead.h aead_request_set_tfm
+!Finclude/crypto/aead.h aead_request_alloc
+!Finclude/crypto/aead.h aead_request_free
+!Finclude/crypto/aead.h aead_request_set_callback
+!Finclude/crypto/aead.h aead_request_set_crypt
+!Finclude/crypto/aead.h aead_request_set_assoc
+!Finclude/crypto/aead.h aead_request_set_ad
    </sect1>
    <sect1><title>Synchronous Block Cipher API</title>
 !Pinclude/linux/crypto.h Synchronous Block Cipher API

+ 4 - 2
Documentation/devicetree/bindings/crypto/fsl-sec2.txt

@@ -1,9 +1,11 @@
-Freescale SoC SEC Security Engines versions 2.x-3.x
+Freescale SoC SEC Security Engines versions 1.x-2.x-3.x
 
 Required properties:
 
 - compatible : Should contain entries for this and backward compatible
-  SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0"
+  SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0" (SEC2/3)
+                             e.g., "fsl,sec1.2", "fsl,sec1.0" (SEC1)
+    warning: SEC1 and SEC2 are mutually exclusive
 - reg : Offset and length of the register set for the device
 - interrupts : the SEC's interrupt number
 - fsl,num-channels : An integer representing the number of channels

+ 45 - 0
Documentation/devicetree/bindings/crypto/marvell-cesa.txt

@@ -0,0 +1,45 @@
+Marvell Cryptographic Engines And Security Accelerator
+
+Required properties:
+- compatible: should be one of the following string
+	      "marvell,orion-crypto"
+	      "marvell,kirkwood-crypto"
+	      "marvell,dove-crypto"
+	      "marvell,armada-370-crypto"
+	      "marvell,armada-xp-crypto"
+	      "marvell,armada-375-crypto"
+	      "marvell,armada-38x-crypto"
+- reg: base physical address of the engine and length of memory mapped
+       region. Can also contain an entry for the SRAM attached to the CESA,
+       but this representation is deprecated and marvell,crypto-srams should
+       be used instead
+- reg-names: "regs". Can contain an "sram" entry, but this representation
+	     is deprecated and marvell,crypto-srams should be used instead
+- interrupts: interrupt number
+- clocks: reference to the crypto engines clocks. This property is not
+	  required for orion and kirkwood platforms
+- clock-names: "cesaX" and "cesazX", X should be replaced by the crypto engine
+	       id.
+	       This property is not required for the orion and kirkwoord
+	       platforms.
+	       "cesazX" clocks are not required on armada-370 platforms
+- marvell,crypto-srams: phandle to crypto SRAM definitions
+
+Optional properties:
+- marvell,crypto-sram-size: SRAM size reserved for crypto operations, if not
+			    specified the whole SRAM is used (2KB)
+
+
+Examples:
+
+	crypto@90000 {
+		compatible = "marvell,armada-xp-crypto";
+		reg = <0x90000 0x10000>;
+		reg-names = "regs";
+		interrupts = <48>, <49>;
+		clocks = <&gateclk 23>, <&gateclk 23>;
+		clock-names = "cesa0", "cesa1";
+		marvell,crypto-srams = <&crypto_sram0>, <&crypto_sram1>;
+		marvell,crypto-sram-size = <0x600>;
+		status = "okay";
+	};

+ 22 - 9
Documentation/devicetree/bindings/crypto/mv_cesa.txt

@@ -1,20 +1,33 @@
 Marvell Cryptographic Engines And Security Accelerator
 
 Required properties:
-- compatible : should be "marvell,orion-crypto"
-- reg : base physical address of the engine and length of memory mapped
-        region, followed by base physical address of sram and its memory
-        length
-- reg-names : "regs" , "sram";
-- interrupts : interrupt number
+- compatible: should be one of the following string
+	      "marvell,orion-crypto"
+	      "marvell,kirkwood-crypto"
+	      "marvell,dove-crypto"
+- reg: base physical address of the engine and length of memory mapped
+       region. Can also contain an entry for the SRAM attached to the CESA,
+       but this representation is deprecated and marvell,crypto-srams should
+       be used instead
+- reg-names: "regs". Can contain an "sram" entry, but this representation
+	     is deprecated and marvell,crypto-srams should be used instead
+- interrupts: interrupt number
+- clocks: reference to the crypto engines clocks. This property is only
+	  required for Dove platforms
+- marvell,crypto-srams: phandle to crypto SRAM definitions
+
+Optional properties:
+- marvell,crypto-sram-size: SRAM size reserved for crypto operations, if not
+			    specified the whole SRAM is used (2KB)
 
 Examples:
 
 	crypto@30000 {
 		compatible = "marvell,orion-crypto";
-		reg = <0x30000 0x10000>,
-		      <0x4000000 0x800>;
-		reg-names = "regs" , "sram";
+		reg = <0x30000 0x10000>;
+		reg-names = "regs";
 		interrupts = <22>;
+		marvell,crypto-srams = <&crypto_sram>;
+		marvell,crypto-sram-size = <0x600>;
 		status = "okay";
 	};

+ 13 - 3
MAINTAINERS

@@ -4879,13 +4879,23 @@ M:	Marcelo Henrique Cerri <mhcerri@linux.vnet.ibm.com>
 M:	Fionnuala Gunter <fin@linux.vnet.ibm.com>
 L:	linux-crypto@vger.kernel.org
 S:	Supported
-F:	drivers/crypto/nx/
+F:	drivers/crypto/nx/Makefile
+F:	drivers/crypto/nx/Kconfig
+F:	drivers/crypto/nx/nx-aes*
+F:	drivers/crypto/nx/nx-sha*
+F:	drivers/crypto/nx/nx.*
+F:	drivers/crypto/nx/nx_csbcpb.h
+F:	drivers/crypto/nx/nx_debugfs.h
 
 IBM Power 842 compression accelerator
 M:	Dan Streetman <ddstreet@us.ibm.com>
 S:	Supported
-F:	drivers/crypto/nx/nx-842.c
-F:	include/linux/nx842.h
+F:	drivers/crypto/nx/Makefile
+F:	drivers/crypto/nx/Kconfig
+F:	drivers/crypto/nx/nx-842*
+F:	include/linux/sw842.h
+F:	crypto/842.c
+F:	lib/842/
 
 IBM Power Linux RAID adapter
 M:	Brian King <brking@us.ibm.com>

+ 4 - 11
arch/arm/crypto/Kconfig

@@ -53,20 +53,13 @@ config CRYPTO_SHA256_ARM
 	  SHA-256 secure hash standard (DFIPS 180-2) implemented
 	  using optimized ARM assembler and NEON, when available.
 
-config CRYPTO_SHA512_ARM_NEON
-	tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_SHA512
+config CRYPTO_SHA512_ARM
+	tristate "SHA-384/512 digest algorithm (ARM-asm and NEON)"
 	select CRYPTO_HASH
+	depends on !CPU_V7M
 	help
 	  SHA-512 secure hash standard (DFIPS 180-2) implemented
-	  using ARM NEON instructions, when available.
-
-	  This version of SHA implements a 512 bit hash with 256 bits of
-	  security against collision attacks.
-
-	  This code also includes SHA-384, a 384 bit hash with 192 bits
-	  of security against collision attacks.
+	  using optimized ARM assembler and NEON, when available.
 
 config CRYPTO_AES_ARM
 	tristate "AES cipher algorithms (ARM-asm)"

+ 7 - 3
arch/arm/crypto/Makefile

@@ -7,7 +7,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
-obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
+obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
 
 ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -30,7 +30,8 @@ sha1-arm-y	:= sha1-armv4-large.o sha1_glue.o
 sha1-arm-neon-y	:= sha1-armv7-neon.o sha1_neon_glue.o
 sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
 sha256-arm-y	:= sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
-sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
+sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o
+sha512-arm-y	:= sha512-core.o sha512-glue.o $(sha512-arm-neon-y)
 sha1-arm-ce-y	:= sha1-ce-core.o sha1-ce-glue.o
 sha2-arm-ce-y	:= sha2-ce-core.o sha2-ce-glue.o
 aes-arm-ce-y	:= aes-ce-core.o aes-ce-glue.o
@@ -45,4 +46,7 @@ $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
 $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
 	$(call cmd,perl)
 
-.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S
+$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
+	$(call cmd,perl)
+
+.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S

+ 3 - 4
arch/arm/crypto/aes-ce-core.S

@@ -101,15 +101,14 @@
 	\dround		q10, q11
 	blo		0f			@ AES-128: 10 rounds
 	vld1.8		{q10-q11}, [ip]!
-	beq		1f			@ AES-192: 12 rounds
 	\dround		q12, q13
+	beq		1f			@ AES-192: 12 rounds
 	vld1.8		{q12-q13}, [ip]
 	\dround		q10, q11
 0:	\fround		q12, q13, q14
 	bx		lr
 
-1:	\dround		q12, q13
-	\fround		q10, q11, q14
+1:	\fround		q10, q11, q14
 	bx		lr
 	.endm
 
@@ -122,8 +121,8 @@
 	 *   q2        : third in/output block (_3x version only)
 	 *   q8        : first round key
 	 *   q9        : secound round key
-	 *   ip        : address of 3rd round key
 	 *   q14       : final round key
+	 *   r2        : address of round key array
 	 *   r3        : number of rounds
 	 */
 	.align		6

+ 649 - 0
arch/arm/crypto/sha512-armv4.pl

@@ -0,0 +1,649 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Permission to use under GPL terms is granted.
+# ====================================================================
+
+# SHA512 block procedure for ARMv4. September 2007.
+
+# This code is ~4.5 (four and a half) times faster than code generated
+# by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
+# Xscale PXA250 core].
+#
+# July 2010.
+#
+# Rescheduling for dual-issue pipeline resulted in 6% improvement on
+# Cortex A8 core and ~40 cycles per processed byte.
+
+# February 2011.
+#
+# Profiler-assisted and platform-specific optimization resulted in 7%
+# improvement on Coxtex A8 core and ~38 cycles per byte.
+
+# March 2011.
+#
+# Add NEON implementation. On Cortex A8 it was measured to process
+# one byte in 23.3 cycles or ~60% faster than integer-only code.
+
+# August 2012.
+#
+# Improve NEON performance by 12% on Snapdragon S4. In absolute
+# terms it's 22.6 cycles per byte, which is disappointing result.
+# Technical writers asserted that 3-way S4 pipeline can sustain
+# multiple NEON instructions per cycle, but dual NEON issue could
+# not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
+# for further details. On side note Cortex-A15 processes one byte in
+# 16 cycles.
+
+# Byte order [in]dependence. =========================================
+#
+# Originally caller was expected to maintain specific *dword* order in
+# h[0-7], namely with most significant dword at *lower* address, which
+# was reflected in below two parameters as 0 and 4. Now caller is
+# expected to maintain native byte order for whole 64-bit values.
+$hi="HI";
+$lo="LO";
+# ====================================================================
+
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+$ctx="r0";	# parameter block
+$inp="r1";
+$len="r2";
+
+$Tlo="r3";
+$Thi="r4";
+$Alo="r5";
+$Ahi="r6";
+$Elo="r7";
+$Ehi="r8";
+$t0="r9";
+$t1="r10";
+$t2="r11";
+$t3="r12";
+############	r13 is stack pointer
+$Ktbl="r14";
+############	r15 is program counter
+
+$Aoff=8*0;
+$Boff=8*1;
+$Coff=8*2;
+$Doff=8*3;
+$Eoff=8*4;
+$Foff=8*5;
+$Goff=8*6;
+$Hoff=8*7;
+$Xoff=8*8;
+
+sub BODY_00_15() {
+my $magic = shift;
+$code.=<<___;
+	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
+	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+	mov	$t0,$Elo,lsr#14
+	str	$Tlo,[sp,#$Xoff+0]
+	mov	$t1,$Ehi,lsr#14
+	str	$Thi,[sp,#$Xoff+4]
+	eor	$t0,$t0,$Ehi,lsl#18
+	ldr	$t2,[sp,#$Hoff+0]	@ h.lo
+	eor	$t1,$t1,$Elo,lsl#18
+	ldr	$t3,[sp,#$Hoff+4]	@ h.hi
+	eor	$t0,$t0,$Elo,lsr#18
+	eor	$t1,$t1,$Ehi,lsr#18
+	eor	$t0,$t0,$Ehi,lsl#14
+	eor	$t1,$t1,$Elo,lsl#14
+	eor	$t0,$t0,$Ehi,lsr#9
+	eor	$t1,$t1,$Elo,lsr#9
+	eor	$t0,$t0,$Elo,lsl#23
+	eor	$t1,$t1,$Ehi,lsl#23	@ Sigma1(e)
+	adds	$Tlo,$Tlo,$t0
+	ldr	$t0,[sp,#$Foff+0]	@ f.lo
+	adc	$Thi,$Thi,$t1		@ T += Sigma1(e)
+	ldr	$t1,[sp,#$Foff+4]	@ f.hi
+	adds	$Tlo,$Tlo,$t2
+	ldr	$t2,[sp,#$Goff+0]	@ g.lo
+	adc	$Thi,$Thi,$t3		@ T += h
+	ldr	$t3,[sp,#$Goff+4]	@ g.hi
+
+	eor	$t0,$t0,$t2
+	str	$Elo,[sp,#$Eoff+0]
+	eor	$t1,$t1,$t3
+	str	$Ehi,[sp,#$Eoff+4]
+	and	$t0,$t0,$Elo
+	str	$Alo,[sp,#$Aoff+0]
+	and	$t1,$t1,$Ehi
+	str	$Ahi,[sp,#$Aoff+4]
+	eor	$t0,$t0,$t2
+	ldr	$t2,[$Ktbl,#$lo]	@ K[i].lo
+	eor	$t1,$t1,$t3		@ Ch(e,f,g)
+	ldr	$t3,[$Ktbl,#$hi]	@ K[i].hi
+
+	adds	$Tlo,$Tlo,$t0
+	ldr	$Elo,[sp,#$Doff+0]	@ d.lo
+	adc	$Thi,$Thi,$t1		@ T += Ch(e,f,g)
+	ldr	$Ehi,[sp,#$Doff+4]	@ d.hi
+	adds	$Tlo,$Tlo,$t2
+	and	$t0,$t2,#0xff
+	adc	$Thi,$Thi,$t3		@ T += K[i]
+	adds	$Elo,$Elo,$Tlo
+	ldr	$t2,[sp,#$Boff+0]	@ b.lo
+	adc	$Ehi,$Ehi,$Thi		@ d += T
+	teq	$t0,#$magic
+
+	ldr	$t3,[sp,#$Coff+0]	@ c.lo
+#if __ARM_ARCH__>=7
+	it	eq			@ Thumb2 thing, sanity check in ARM
+#endif
+	orreq	$Ktbl,$Ktbl,#1
+	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+	mov	$t0,$Alo,lsr#28
+	mov	$t1,$Ahi,lsr#28
+	eor	$t0,$t0,$Ahi,lsl#4
+	eor	$t1,$t1,$Alo,lsl#4
+	eor	$t0,$t0,$Ahi,lsr#2
+	eor	$t1,$t1,$Alo,lsr#2
+	eor	$t0,$t0,$Alo,lsl#30
+	eor	$t1,$t1,$Ahi,lsl#30
+	eor	$t0,$t0,$Ahi,lsr#7
+	eor	$t1,$t1,$Alo,lsr#7
+	eor	$t0,$t0,$Alo,lsl#25
+	eor	$t1,$t1,$Ahi,lsl#25	@ Sigma0(a)
+	adds	$Tlo,$Tlo,$t0
+	and	$t0,$Alo,$t2
+	adc	$Thi,$Thi,$t1		@ T += Sigma0(a)
+
+	ldr	$t1,[sp,#$Boff+4]	@ b.hi
+	orr	$Alo,$Alo,$t2
+	ldr	$t2,[sp,#$Coff+4]	@ c.hi
+	and	$Alo,$Alo,$t3
+	and	$t3,$Ahi,$t1
+	orr	$Ahi,$Ahi,$t1
+	orr	$Alo,$Alo,$t0		@ Maj(a,b,c).lo
+	and	$Ahi,$Ahi,$t2
+	adds	$Alo,$Alo,$Tlo
+	orr	$Ahi,$Ahi,$t3		@ Maj(a,b,c).hi
+	sub	sp,sp,#8
+	adc	$Ahi,$Ahi,$Thi		@ h += T
+	tst	$Ktbl,#1
+	add	$Ktbl,$Ktbl,#8
+___
+}
+$code=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+# define VFP_ABI_PUSH	vstmdb	sp!,{d8-d15}
+# define VFP_ABI_POP	vldmia	sp!,{d8-d15}
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+#endif
+
+#ifdef __ARMEL__
+# define LO 0
+# define HI 4
+# define WORD64(hi0,lo0,hi1,lo1)	.word	lo0,hi0, lo1,hi1
+#else
+# define HI 0
+# define LO 4
+# define WORD64(hi0,lo0,hi1,lo1)	.word	hi0,lo0, hi1,lo1
+#endif
+
+.text
+#if __ARM_ARCH__<7
+.code	32
+#else
+.syntax unified
+# ifdef __thumb2__
+#  define adrl adr
+.thumb
+# else
+.code   32
+# endif
+#endif
+
+.type	K512,%object
+.align	5
+K512:
+WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
+WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
+WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
+WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
+WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
+WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
+WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
+WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
+WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
+WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
+WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
+WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
+WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
+WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
+WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
+WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
+WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
+WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
+WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
+WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
+WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
+WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
+WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
+WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
+WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
+WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
+WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
+WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
+WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
+WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
+WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
+WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
+WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
+WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
+WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
+WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
+WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
+WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
+WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
+WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
+.size	K512,.-K512
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word	OPENSSL_armcap_P-sha512_block_data_order
+.skip	32-4
+#else
+.skip	32
+#endif
+
+.global	sha512_block_data_order
+.type	sha512_block_data_order,%function
+sha512_block_data_order:
+#if __ARM_ARCH__<7
+	sub	r3,pc,#8		@ sha512_block_data_order
+#else
+	adr	r3,sha512_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	ldr	r12,.LOPENSSL_armcap
+	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
+	tst	r12,#1
+	bne	.LNEON
+#endif
+	add	$len,$inp,$len,lsl#7	@ len to point at the end of inp
+	stmdb	sp!,{r4-r12,lr}
+	sub	$Ktbl,r3,#672		@ K512
+	sub	sp,sp,#9*8
+
+	ldr	$Elo,[$ctx,#$Eoff+$lo]
+	ldr	$Ehi,[$ctx,#$Eoff+$hi]
+	ldr	$t0, [$ctx,#$Goff+$lo]
+	ldr	$t1, [$ctx,#$Goff+$hi]
+	ldr	$t2, [$ctx,#$Hoff+$lo]
+	ldr	$t3, [$ctx,#$Hoff+$hi]
+.Loop:
+	str	$t0, [sp,#$Goff+0]
+	str	$t1, [sp,#$Goff+4]
+	str	$t2, [sp,#$Hoff+0]
+	str	$t3, [sp,#$Hoff+4]
+	ldr	$Alo,[$ctx,#$Aoff+$lo]
+	ldr	$Ahi,[$ctx,#$Aoff+$hi]
+	ldr	$Tlo,[$ctx,#$Boff+$lo]
+	ldr	$Thi,[$ctx,#$Boff+$hi]
+	ldr	$t0, [$ctx,#$Coff+$lo]
+	ldr	$t1, [$ctx,#$Coff+$hi]
+	ldr	$t2, [$ctx,#$Doff+$lo]
+	ldr	$t3, [$ctx,#$Doff+$hi]
+	str	$Tlo,[sp,#$Boff+0]
+	str	$Thi,[sp,#$Boff+4]
+	str	$t0, [sp,#$Coff+0]
+	str	$t1, [sp,#$Coff+4]
+	str	$t2, [sp,#$Doff+0]
+	str	$t3, [sp,#$Doff+4]
+	ldr	$Tlo,[$ctx,#$Foff+$lo]
+	ldr	$Thi,[$ctx,#$Foff+$hi]
+	str	$Tlo,[sp,#$Foff+0]
+	str	$Thi,[sp,#$Foff+4]
+
+.L00_15:
+#if __ARM_ARCH__<7
+	ldrb	$Tlo,[$inp,#7]
+	ldrb	$t0, [$inp,#6]
+	ldrb	$t1, [$inp,#5]
+	ldrb	$t2, [$inp,#4]
+	ldrb	$Thi,[$inp,#3]
+	ldrb	$t3, [$inp,#2]
+	orr	$Tlo,$Tlo,$t0,lsl#8
+	ldrb	$t0, [$inp,#1]
+	orr	$Tlo,$Tlo,$t1,lsl#16
+	ldrb	$t1, [$inp],#8
+	orr	$Tlo,$Tlo,$t2,lsl#24
+	orr	$Thi,$Thi,$t3,lsl#8
+	orr	$Thi,$Thi,$t0,lsl#16
+	orr	$Thi,$Thi,$t1,lsl#24
+#else
+	ldr	$Tlo,[$inp,#4]
+	ldr	$Thi,[$inp],#8
+#ifdef __ARMEL__
+	rev	$Tlo,$Tlo
+	rev	$Thi,$Thi
+#endif
+#endif
+___
+	&BODY_00_15(0x94);
+$code.=<<___;
+	tst	$Ktbl,#1
+	beq	.L00_15
+	ldr	$t0,[sp,#`$Xoff+8*(16-1)`+0]
+	ldr	$t1,[sp,#`$Xoff+8*(16-1)`+4]
+	bic	$Ktbl,$Ktbl,#1
+.L16_79:
+	@ sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
+	@ LO		lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
+	@ HI		hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
+	mov	$Tlo,$t0,lsr#1
+	ldr	$t2,[sp,#`$Xoff+8*(16-14)`+0]
+	mov	$Thi,$t1,lsr#1
+	ldr	$t3,[sp,#`$Xoff+8*(16-14)`+4]
+	eor	$Tlo,$Tlo,$t1,lsl#31
+	eor	$Thi,$Thi,$t0,lsl#31
+	eor	$Tlo,$Tlo,$t0,lsr#8
+	eor	$Thi,$Thi,$t1,lsr#8
+	eor	$Tlo,$Tlo,$t1,lsl#24
+	eor	$Thi,$Thi,$t0,lsl#24
+	eor	$Tlo,$Tlo,$t0,lsr#7
+	eor	$Thi,$Thi,$t1,lsr#7
+	eor	$Tlo,$Tlo,$t1,lsl#25
+
+	@ sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+	@ LO		lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
+	@ HI		hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
+	mov	$t0,$t2,lsr#19
+	mov	$t1,$t3,lsr#19
+	eor	$t0,$t0,$t3,lsl#13
+	eor	$t1,$t1,$t2,lsl#13
+	eor	$t0,$t0,$t3,lsr#29
+	eor	$t1,$t1,$t2,lsr#29
+	eor	$t0,$t0,$t2,lsl#3
+	eor	$t1,$t1,$t3,lsl#3
+	eor	$t0,$t0,$t2,lsr#6
+	eor	$t1,$t1,$t3,lsr#6
+	ldr	$t2,[sp,#`$Xoff+8*(16-9)`+0]
+	eor	$t0,$t0,$t3,lsl#26
+
+	ldr	$t3,[sp,#`$Xoff+8*(16-9)`+4]
+	adds	$Tlo,$Tlo,$t0
+	ldr	$t0,[sp,#`$Xoff+8*16`+0]
+	adc	$Thi,$Thi,$t1
+
+	ldr	$t1,[sp,#`$Xoff+8*16`+4]
+	adds	$Tlo,$Tlo,$t2
+	adc	$Thi,$Thi,$t3
+	adds	$Tlo,$Tlo,$t0
+	adc	$Thi,$Thi,$t1
+___
+	&BODY_00_15(0x17);
+$code.=<<___;
+#if __ARM_ARCH__>=7
+	ittt	eq			@ Thumb2 thing, sanity check in ARM
+#endif
+	ldreq	$t0,[sp,#`$Xoff+8*(16-1)`+0]
+	ldreq	$t1,[sp,#`$Xoff+8*(16-1)`+4]
+	beq	.L16_79
+	bic	$Ktbl,$Ktbl,#1
+
+	ldr	$Tlo,[sp,#$Boff+0]
+	ldr	$Thi,[sp,#$Boff+4]
+	ldr	$t0, [$ctx,#$Aoff+$lo]
+	ldr	$t1, [$ctx,#$Aoff+$hi]
+	ldr	$t2, [$ctx,#$Boff+$lo]
+	ldr	$t3, [$ctx,#$Boff+$hi]
+	adds	$t0,$Alo,$t0
+	str	$t0, [$ctx,#$Aoff+$lo]
+	adc	$t1,$Ahi,$t1
+	str	$t1, [$ctx,#$Aoff+$hi]
+	adds	$t2,$Tlo,$t2
+	str	$t2, [$ctx,#$Boff+$lo]
+	adc	$t3,$Thi,$t3
+	str	$t3, [$ctx,#$Boff+$hi]
+
+	ldr	$Alo,[sp,#$Coff+0]
+	ldr	$Ahi,[sp,#$Coff+4]
+	ldr	$Tlo,[sp,#$Doff+0]
+	ldr	$Thi,[sp,#$Doff+4]
+	ldr	$t0, [$ctx,#$Coff+$lo]
+	ldr	$t1, [$ctx,#$Coff+$hi]
+	ldr	$t2, [$ctx,#$Doff+$lo]
+	ldr	$t3, [$ctx,#$Doff+$hi]
+	adds	$t0,$Alo,$t0
+	str	$t0, [$ctx,#$Coff+$lo]
+	adc	$t1,$Ahi,$t1
+	str	$t1, [$ctx,#$Coff+$hi]
+	adds	$t2,$Tlo,$t2
+	str	$t2, [$ctx,#$Doff+$lo]
+	adc	$t3,$Thi,$t3
+	str	$t3, [$ctx,#$Doff+$hi]
+
+	ldr	$Tlo,[sp,#$Foff+0]
+	ldr	$Thi,[sp,#$Foff+4]
+	ldr	$t0, [$ctx,#$Eoff+$lo]
+	ldr	$t1, [$ctx,#$Eoff+$hi]
+	ldr	$t2, [$ctx,#$Foff+$lo]
+	ldr	$t3, [$ctx,#$Foff+$hi]
+	adds	$Elo,$Elo,$t0
+	str	$Elo,[$ctx,#$Eoff+$lo]
+	adc	$Ehi,$Ehi,$t1
+	str	$Ehi,[$ctx,#$Eoff+$hi]
+	adds	$t2,$Tlo,$t2
+	str	$t2, [$ctx,#$Foff+$lo]
+	adc	$t3,$Thi,$t3
+	str	$t3, [$ctx,#$Foff+$hi]
+
+	ldr	$Alo,[sp,#$Goff+0]
+	ldr	$Ahi,[sp,#$Goff+4]
+	ldr	$Tlo,[sp,#$Hoff+0]
+	ldr	$Thi,[sp,#$Hoff+4]
+	ldr	$t0, [$ctx,#$Goff+$lo]
+	ldr	$t1, [$ctx,#$Goff+$hi]
+	ldr	$t2, [$ctx,#$Hoff+$lo]
+	ldr	$t3, [$ctx,#$Hoff+$hi]
+	adds	$t0,$Alo,$t0
+	str	$t0, [$ctx,#$Goff+$lo]
+	adc	$t1,$Ahi,$t1
+	str	$t1, [$ctx,#$Goff+$hi]
+	adds	$t2,$Tlo,$t2
+	str	$t2, [$ctx,#$Hoff+$lo]
+	adc	$t3,$Thi,$t3
+	str	$t3, [$ctx,#$Hoff+$hi]
+
+	add	sp,sp,#640
+	sub	$Ktbl,$Ktbl,#640
+
+	teq	$inp,$len
+	bne	.Loop
+
+	add	sp,sp,#8*9		@ destroy frame
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r12,pc}
+#else
+	ldmia	sp!,{r4-r12,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	bx	lr			@ interoperable with Thumb ISA:-)
+#endif
+.size	sha512_block_data_order,.-sha512_block_data_order
+___
+
+{
+my @Sigma0=(28,34,39);
+my @Sigma1=(14,18,41);
+my @sigma0=(1, 8, 7);
+my @sigma1=(19,61,6);
+
+my $Ktbl="r3";
+my $cnt="r12";	# volatile register known as ip, intra-procedure-call scratch
+
+my @X=map("d$_",(0..15));
+my @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("d$_",(16..23));
+
+sub NEON_00_15() {
+my $i=shift;
+my ($a,$b,$c,$d,$e,$f,$g,$h)=@_;
+my ($t0,$t1,$t2,$T1,$K,$Ch,$Maj)=map("d$_",(24..31));	# temps
+
+$code.=<<___ if ($i<16 || $i&1);
+	vshr.u64	$t0,$e,#@Sigma1[0]	@ $i
+#if $i<16
+	vld1.64		{@X[$i%16]},[$inp]!	@ handles unaligned
+#endif
+	vshr.u64	$t1,$e,#@Sigma1[1]
+#if $i>0
+	 vadd.i64	$a,$Maj			@ h+=Maj from the past
+#endif
+	vshr.u64	$t2,$e,#@Sigma1[2]
+___
+$code.=<<___;
+	vld1.64		{$K},[$Ktbl,:64]!	@ K[i++]
+	vsli.64		$t0,$e,#`64-@Sigma1[0]`
+	vsli.64		$t1,$e,#`64-@Sigma1[1]`
+	vmov		$Ch,$e
+	vsli.64		$t2,$e,#`64-@Sigma1[2]`
+#if $i<16 && defined(__ARMEL__)
+	vrev64.8	@X[$i],@X[$i]
+#endif
+	veor		$t1,$t0
+	vbsl		$Ch,$f,$g		@ Ch(e,f,g)
+	vshr.u64	$t0,$a,#@Sigma0[0]
+	veor		$t2,$t1			@ Sigma1(e)
+	vadd.i64	$T1,$Ch,$h
+	vshr.u64	$t1,$a,#@Sigma0[1]
+	vsli.64		$t0,$a,#`64-@Sigma0[0]`
+	vadd.i64	$T1,$t2
+	vshr.u64	$t2,$a,#@Sigma0[2]
+	vadd.i64	$K,@X[$i%16]
+	vsli.64		$t1,$a,#`64-@Sigma0[1]`
+	veor		$Maj,$a,$b
+	vsli.64		$t2,$a,#`64-@Sigma0[2]`
+	veor		$h,$t0,$t1
+	vadd.i64	$T1,$K
+	vbsl		$Maj,$c,$b		@ Maj(a,b,c)
+	veor		$h,$t2			@ Sigma0(a)
+	vadd.i64	$d,$T1
+	vadd.i64	$Maj,$T1
+	@ vadd.i64	$h,$Maj
+___
+}
+
+sub NEON_16_79() {
+my $i=shift;
+
+if ($i&1)	{ &NEON_00_15($i,@_); return; }
+
+# 2x-vectorized, therefore runs every 2nd round
+my @X=map("q$_",(0..7));			# view @X as 128-bit vector
+my ($t0,$t1,$s0,$s1) = map("q$_",(12..15));	# temps
+my ($d0,$d1,$d2) = map("d$_",(24..26));		# temps from NEON_00_15
+my $e=@_[4];					# $e from NEON_00_15
+$i /= 2;
+$code.=<<___;
+	vshr.u64	$t0,@X[($i+7)%8],#@sigma1[0]
+	vshr.u64	$t1,@X[($i+7)%8],#@sigma1[1]
+	 vadd.i64	@_[0],d30			@ h+=Maj from the past
+	vshr.u64	$s1,@X[($i+7)%8],#@sigma1[2]
+	vsli.64		$t0,@X[($i+7)%8],#`64-@sigma1[0]`
+	vext.8		$s0,@X[$i%8],@X[($i+1)%8],#8	@ X[i+1]
+	vsli.64		$t1,@X[($i+7)%8],#`64-@sigma1[1]`
+	veor		$s1,$t0
+	vshr.u64	$t0,$s0,#@sigma0[0]
+	veor		$s1,$t1				@ sigma1(X[i+14])
+	vshr.u64	$t1,$s0,#@sigma0[1]
+	vadd.i64	@X[$i%8],$s1
+	vshr.u64	$s1,$s0,#@sigma0[2]
+	vsli.64		$t0,$s0,#`64-@sigma0[0]`
+	vsli.64		$t1,$s0,#`64-@sigma0[1]`
+	vext.8		$s0,@X[($i+4)%8],@X[($i+5)%8],#8	@ X[i+9]
+	veor		$s1,$t0
+	vshr.u64	$d0,$e,#@Sigma1[0]		@ from NEON_00_15
+	vadd.i64	@X[$i%8],$s0
+	vshr.u64	$d1,$e,#@Sigma1[1]		@ from NEON_00_15
+	veor		$s1,$t1				@ sigma0(X[i+1])
+	vshr.u64	$d2,$e,#@Sigma1[2]		@ from NEON_00_15
+	vadd.i64	@X[$i%8],$s1
+___
+	&NEON_00_15(2*$i,@_);
+}
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch	armv7-a
+.fpu	neon
+
+.global	sha512_block_data_order_neon
+.type	sha512_block_data_order_neon,%function
+.align	4
+sha512_block_data_order_neon:
+.LNEON:
+	dmb				@ errata #451034 on early Cortex A8
+	add	$len,$inp,$len,lsl#7	@ len to point at the end of inp
+	VFP_ABI_PUSH
+	adrl	$Ktbl,K512
+	vldmia	$ctx,{$A-$H}		@ load context
+.Loop_neon:
+___
+for($i=0;$i<16;$i++)	{ &NEON_00_15($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+	mov		$cnt,#4
+.L16_79_neon:
+	subs		$cnt,#1
+___
+for(;$i<32;$i++)	{ &NEON_16_79($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+	bne		.L16_79_neon
+
+	 vadd.i64	$A,d30		@ h+=Maj from the past
+	vldmia		$ctx,{d24-d31}	@ load context to temp
+	vadd.i64	q8,q12		@ vectorized accumulate
+	vadd.i64	q9,q13
+	vadd.i64	q10,q14
+	vadd.i64	q11,q15
+	vstmia		$ctx,{$A-$H}	@ save context
+	teq		$inp,$len
+	sub		$Ktbl,#640	@ rewind K512
+	bne		.Loop_neon
+
+	VFP_ABI_POP
+	ret				@ bx lr
+.size	sha512_block_data_order_neon,.-sha512_block_data_order_neon
+#endif
+___
+}
+$code.=<<___;
+.asciz	"SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
+.align	2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm	OPENSSL_armcap_P,4,4
+#endif
+___
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
+$code =~ s/\bret\b/bx	lr/gm;
+
+open SELF,$0;
+while(<SELF>) {
+	next if (/^#!/);
+	last if (!s/^#/@/ and !/^$/);
+	print;
+}
+close SELF;
+
+print $code;
+close STDOUT; # enforce flush

+ 0 - 455
arch/arm/crypto/sha512-armv7-neon.S

@@ -1,455 +0,0 @@
-/* sha512-armv7-neon.S  -  ARM/NEON assembly implementation of SHA-512 transform
- *
- * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- */
-
-#include <linux/linkage.h>
-
-
-.syntax unified
-.code   32
-.fpu neon
-
-.text
-
-/* structure of SHA512_CONTEXT */
-#define hd_a 0
-#define hd_b ((hd_a) + 8)
-#define hd_c ((hd_b) + 8)
-#define hd_d ((hd_c) + 8)
-#define hd_e ((hd_d) + 8)
-#define hd_f ((hd_e) + 8)
-#define hd_g ((hd_f) + 8)
-
-/* register macros */
-#define RK %r2
-
-#define RA d0
-#define RB d1
-#define RC d2
-#define RD d3
-#define RE d4
-#define RF d5
-#define RG d6
-#define RH d7
-
-#define RT0 d8
-#define RT1 d9
-#define RT2 d10
-#define RT3 d11
-#define RT4 d12
-#define RT5 d13
-#define RT6 d14
-#define RT7 d15
-
-#define RT01q q4
-#define RT23q q5
-#define RT45q q6
-#define RT67q q7
-
-#define RW0 d16
-#define RW1 d17
-#define RW2 d18
-#define RW3 d19
-#define RW4 d20
-#define RW5 d21
-#define RW6 d22
-#define RW7 d23
-#define RW8 d24
-#define RW9 d25
-#define RW10 d26
-#define RW11 d27
-#define RW12 d28
-#define RW13 d29
-#define RW14 d30
-#define RW15 d31
-
-#define RW01q q8
-#define RW23q q9
-#define RW45q q10
-#define RW67q q11
-#define RW89q q12
-#define RW1011q q13
-#define RW1213q q14
-#define RW1415q q15
-
-/***********************************************************************
- * ARM assembly implementation of sha512 transform
- ***********************************************************************/
-#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, \
-                     rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \
-	/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
-	vshr.u64 RT2, re, #14; \
-	vshl.u64 RT3, re, #64 - 14; \
-	interleave_op(arg1); \
-	vshr.u64 RT4, re, #18; \
-	vshl.u64 RT5, re, #64 - 18; \
-	vld1.64 {RT0}, [RK]!; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, re, #41; \
-	vshl.u64 RT5, re, #64 - 41; \
-	vadd.u64 RT0, RT0, rw0; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vmov.64 RT7, re; \
-	veor.64 RT1, RT2, RT3; \
-	vbsl.64 RT7, rf, rg; \
-	\
-	vadd.u64 RT1, RT1, rh; \
-	vshr.u64 RT2, ra, #28; \
-	vshl.u64 RT3, ra, #64 - 28; \
-	vadd.u64 RT1, RT1, RT0; \
-	vshr.u64 RT4, ra, #34; \
-	vshl.u64 RT5, ra, #64 - 34; \
-	vadd.u64 RT1, RT1, RT7; \
-	\
-	/* h = Sum0 (a) + Maj (a, b, c); */ \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, ra, #39; \
-	vshl.u64 RT5, ra, #64 - 39; \
-	veor.64 RT0, ra, rb; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vbsl.64 RT0, rc, rb; \
-	vadd.u64 rd, rd, RT1; /* d+=t1; */ \
-	veor.64 rh, RT2, RT3; \
-	\
-	/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
-	vshr.u64 RT2, rd, #14; \
-	vshl.u64 RT3, rd, #64 - 14; \
-	vadd.u64 rh, rh, RT0; \
-	vshr.u64 RT4, rd, #18; \
-	vshl.u64 RT5, rd, #64 - 18; \
-	vadd.u64 rh, rh, RT1; /* h+=t1; */ \
-	vld1.64 {RT0}, [RK]!; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, rd, #41; \
-	vshl.u64 RT5, rd, #64 - 41; \
-	vadd.u64 RT0, RT0, rw1; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vmov.64 RT7, rd; \
-	veor.64 RT1, RT2, RT3; \
-	vbsl.64 RT7, re, rf; \
-	\
-	vadd.u64 RT1, RT1, rg; \
-	vshr.u64 RT2, rh, #28; \
-	vshl.u64 RT3, rh, #64 - 28; \
-	vadd.u64 RT1, RT1, RT0; \
-	vshr.u64 RT4, rh, #34; \
-	vshl.u64 RT5, rh, #64 - 34; \
-	vadd.u64 RT1, RT1, RT7; \
-	\
-	/* g = Sum0 (h) + Maj (h, a, b); */ \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, rh, #39; \
-	vshl.u64 RT5, rh, #64 - 39; \
-	veor.64 RT0, rh, ra; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vbsl.64 RT0, rb, ra; \
-	vadd.u64 rc, rc, RT1; /* c+=t1; */ \
-	veor.64 rg, RT2, RT3; \
-	\
-	/* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \
-	/* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \
-	\
-	/**** S0(w[1:2]) */ \
-	\
-	/* w[0:1] += w[9:10] */ \
-	/* RT23q = rw1:rw2 */ \
-	vext.u64 RT23q, rw01q, rw23q, #1; \
-	vadd.u64 rw0, rw9; \
-	vadd.u64 rg, rg, RT0; \
-	vadd.u64 rw1, rw10;\
-	vadd.u64 rg, rg, RT1; /* g+=t1; */ \
-	\
-	vshr.u64 RT45q, RT23q, #1; \
-	vshl.u64 RT67q, RT23q, #64 - 1; \
-	vshr.u64 RT01q, RT23q, #8; \
-	veor.u64 RT45q, RT45q, RT67q; \
-	vshl.u64 RT67q, RT23q, #64 - 8; \
-	veor.u64 RT45q, RT45q, RT01q; \
-	vshr.u64 RT01q, RT23q, #7; \
-	veor.u64 RT45q, RT45q, RT67q; \
-	\
-	/**** S1(w[14:15]) */ \
-	vshr.u64 RT23q, rw1415q, #6; \
-	veor.u64 RT01q, RT01q, RT45q; \
-	vshr.u64 RT45q, rw1415q, #19; \
-	vshl.u64 RT67q, rw1415q, #64 - 19; \
-	veor.u64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT45q, rw1415q, #61; \
-	veor.u64 RT23q, RT23q, RT67q; \
-	vshl.u64 RT67q, rw1415q, #64 - 61; \
-	veor.u64 RT23q, RT23q, RT45q; \
-	vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \
-	veor.u64 RT01q, RT23q, RT67q;
-#define vadd_RT01q(rw01q) \
-	/* w[0:1] += S(w[14:15]) */ \
-	vadd.u64 rw01q, RT01q;
-
-#define dummy(_) /*_*/
-
-#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, \
-	              interleave_op1, arg1, interleave_op2, arg2) \
-	/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
-	vshr.u64 RT2, re, #14; \
-	vshl.u64 RT3, re, #64 - 14; \
-	interleave_op1(arg1); \
-	vshr.u64 RT4, re, #18; \
-	vshl.u64 RT5, re, #64 - 18; \
-	interleave_op2(arg2); \
-	vld1.64 {RT0}, [RK]!; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, re, #41; \
-	vshl.u64 RT5, re, #64 - 41; \
-	vadd.u64 RT0, RT0, rw0; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vmov.64 RT7, re; \
-	veor.64 RT1, RT2, RT3; \
-	vbsl.64 RT7, rf, rg; \
-	\
-	vadd.u64 RT1, RT1, rh; \
-	vshr.u64 RT2, ra, #28; \
-	vshl.u64 RT3, ra, #64 - 28; \
-	vadd.u64 RT1, RT1, RT0; \
-	vshr.u64 RT4, ra, #34; \
-	vshl.u64 RT5, ra, #64 - 34; \
-	vadd.u64 RT1, RT1, RT7; \
-	\
-	/* h = Sum0 (a) + Maj (a, b, c); */ \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, ra, #39; \
-	vshl.u64 RT5, ra, #64 - 39; \
-	veor.64 RT0, ra, rb; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vbsl.64 RT0, rc, rb; \
-	vadd.u64 rd, rd, RT1; /* d+=t1; */ \
-	veor.64 rh, RT2, RT3; \
-	\
-	/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
-	vshr.u64 RT2, rd, #14; \
-	vshl.u64 RT3, rd, #64 - 14; \
-	vadd.u64 rh, rh, RT0; \
-	vshr.u64 RT4, rd, #18; \
-	vshl.u64 RT5, rd, #64 - 18; \
-	vadd.u64 rh, rh, RT1; /* h+=t1; */ \
-	vld1.64 {RT0}, [RK]!; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, rd, #41; \
-	vshl.u64 RT5, rd, #64 - 41; \
-	vadd.u64 RT0, RT0, rw1; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vmov.64 RT7, rd; \
-	veor.64 RT1, RT2, RT3; \
-	vbsl.64 RT7, re, rf; \
-	\
-	vadd.u64 RT1, RT1, rg; \
-	vshr.u64 RT2, rh, #28; \
-	vshl.u64 RT3, rh, #64 - 28; \
-	vadd.u64 RT1, RT1, RT0; \
-	vshr.u64 RT4, rh, #34; \
-	vshl.u64 RT5, rh, #64 - 34; \
-	vadd.u64 RT1, RT1, RT7; \
-	\
-	/* g = Sum0 (h) + Maj (h, a, b); */ \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, rh, #39; \
-	vshl.u64 RT5, rh, #64 - 39; \
-	veor.64 RT0, rh, ra; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vbsl.64 RT0, rb, ra; \
-	vadd.u64 rc, rc, RT1; /* c+=t1; */ \
-	veor.64 rg, RT2, RT3;
-#define vadd_rg_RT0(rg) \
-	vadd.u64 rg, rg, RT0;
-#define vadd_rg_RT1(rg) \
-	vadd.u64 rg, rg, RT1; /* g+=t1; */
-
-.align 3
-ENTRY(sha512_transform_neon)
-	/* Input:
-	 *	%r0: SHA512_CONTEXT
-	 *	%r1: data
-	 *	%r2: u64 k[] constants
-	 *	%r3: nblks
-	 */
-	push {%lr};
-
-	mov %lr, #0;
-
-	/* Load context to d0-d7 */
-	vld1.64 {RA-RD}, [%r0]!;
-	vld1.64 {RE-RH}, [%r0];
-	sub %r0, #(4*8);
-
-	/* Load input to w[16], d16-d31 */
-	/* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */
-	vld1.64 {RW0-RW3}, [%r1]!;
-	vld1.64 {RW4-RW7}, [%r1]!;
-	vld1.64 {RW8-RW11}, [%r1]!;
-	vld1.64 {RW12-RW15}, [%r1]!;
-#ifdef __ARMEL__
-	/* byteswap */
-	vrev64.8 RW01q, RW01q;
-	vrev64.8 RW23q, RW23q;
-	vrev64.8 RW45q, RW45q;
-	vrev64.8 RW67q, RW67q;
-	vrev64.8 RW89q, RW89q;
-	vrev64.8 RW1011q, RW1011q;
-	vrev64.8 RW1213q, RW1213q;
-	vrev64.8 RW1415q, RW1415q;
-#endif
-
-	/* EABI says that d8-d15 must be preserved by callee. */
-	/*vpush {RT0-RT7};*/
-
-.Loop:
-	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
-		     RW23q, RW1415q, RW9, RW10, dummy, _);
-	b .Lenter_rounds;
-
-.Loop_rounds:
-	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
-		     RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q);
-.Lenter_rounds:
-	rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4,
-		     RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q);
-	rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6,
-		     RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q);
-	rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8,
-		     RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q);
-	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10,
-		     RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q);
-	rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12,
-		     RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q);
-	add %lr, #16;
-	rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14,
-		     RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q);
-	cmp %lr, #64;
-	rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0,
-		     RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q);
-	bne .Loop_rounds;
-
-	subs %r3, #1;
-
-	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1,
-		      vadd_RT01q, RW1415q, dummy, _);
-	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3,
-		      vadd_rg_RT0, RG, vadd_rg_RT1, RG);
-	beq .Lhandle_tail;
-	vld1.64 {RW0-RW3}, [%r1]!;
-	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
-		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
-	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
-		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
-#ifdef __ARMEL__
-	vrev64.8 RW01q, RW01q;
-	vrev64.8 RW23q, RW23q;
-#endif
-	vld1.64 {RW4-RW7}, [%r1]!;
-	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
-		      vadd_rg_RT0, RA, vadd_rg_RT1, RA);
-	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
-		      vadd_rg_RT0, RG, vadd_rg_RT1, RG);
-#ifdef __ARMEL__
-	vrev64.8 RW45q, RW45q;
-	vrev64.8 RW67q, RW67q;
-#endif
-	vld1.64 {RW8-RW11}, [%r1]!;
-	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
-		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
-	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
-		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
-#ifdef __ARMEL__
-	vrev64.8 RW89q, RW89q;
-	vrev64.8 RW1011q, RW1011q;
-#endif
-	vld1.64 {RW12-RW15}, [%r1]!;
-	vadd_rg_RT0(RA);
-	vadd_rg_RT1(RA);
-
-	/* Load context */
-	vld1.64 {RT0-RT3}, [%r0]!;
-	vld1.64 {RT4-RT7}, [%r0];
-	sub %r0, #(4*8);
-
-#ifdef __ARMEL__
-	vrev64.8 RW1213q, RW1213q;
-	vrev64.8 RW1415q, RW1415q;
-#endif
-
-	vadd.u64 RA, RT0;
-	vadd.u64 RB, RT1;
-	vadd.u64 RC, RT2;
-	vadd.u64 RD, RT3;
-	vadd.u64 RE, RT4;
-	vadd.u64 RF, RT5;
-	vadd.u64 RG, RT6;
-	vadd.u64 RH, RT7;
-
-	/* Store the first half of context */
-	vst1.64 {RA-RD}, [%r0]!;
-	sub RK, $(8*80);
-	vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
-	mov %lr, #0;
-	sub %r0, #(4*8);
-
-	b .Loop;
-
-.Lhandle_tail:
-	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
-		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
-	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
-		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
-	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
-		      vadd_rg_RT0, RA, vadd_rg_RT1, RA);
-	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
-		      vadd_rg_RT0, RG, vadd_rg_RT1, RG);
-	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
-		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
-	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
-		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
-
-	/* Load context to d16-d23 */
-	vld1.64 {RW0-RW3}, [%r0]!;
-	vadd_rg_RT0(RA);
-	vld1.64 {RW4-RW7}, [%r0];
-	vadd_rg_RT1(RA);
-	sub %r0, #(4*8);
-
-	vadd.u64 RA, RW0;
-	vadd.u64 RB, RW1;
-	vadd.u64 RC, RW2;
-	vadd.u64 RD, RW3;
-	vadd.u64 RE, RW4;
-	vadd.u64 RF, RW5;
-	vadd.u64 RG, RW6;
-	vadd.u64 RH, RW7;
-
-	/* Store the first half of context */
-	vst1.64 {RA-RD}, [%r0]!;
-
-	/* Clear used registers */
-	/* d16-d31 */
-	veor.u64 RW01q, RW01q;
-	veor.u64 RW23q, RW23q;
-	veor.u64 RW45q, RW45q;
-	veor.u64 RW67q, RW67q;
-	vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
-	veor.u64 RW89q, RW89q;
-	veor.u64 RW1011q, RW1011q;
-	veor.u64 RW1213q, RW1213q;
-	veor.u64 RW1415q, RW1415q;
-	/* d8-d15 */
-	/*vpop {RT0-RT7};*/
-	/* d0-d7 (q0-q3) */
-	veor.u64 %q0, %q0;
-	veor.u64 %q1, %q1;
-	veor.u64 %q2, %q2;
-	veor.u64 %q3, %q3;
-
-	pop {%pc};
-ENDPROC(sha512_transform_neon)

+ 1861 - 0
arch/arm/crypto/sha512-core.S_shipped

@@ -0,0 +1,1861 @@
+
+@ ====================================================================
+@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+@ project. The module is, however, dual licensed under OpenSSL and
+@ CRYPTOGAMS licenses depending on where you obtain it. For further
+@ details see http://www.openssl.org/~appro/cryptogams/.
+@
+@ Permission to use under GPL terms is granted.
+@ ====================================================================
+
+@ SHA512 block procedure for ARMv4. September 2007.
+
+@ This code is ~4.5 (four and a half) times faster than code generated
+@ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
+@ Xscale PXA250 core].
+@
+@ July 2010.
+@
+@ Rescheduling for dual-issue pipeline resulted in 6% improvement on
+@ Cortex A8 core and ~40 cycles per processed byte.
+
+@ February 2011.
+@
+@ Profiler-assisted and platform-specific optimization resulted in 7%
+@ improvement on Coxtex A8 core and ~38 cycles per byte.
+
+@ March 2011.
+@
+@ Add NEON implementation. On Cortex A8 it was measured to process
+@ one byte in 23.3 cycles or ~60% faster than integer-only code.
+
+@ August 2012.
+@
+@ Improve NEON performance by 12% on Snapdragon S4. In absolute
+@ terms it's 22.6 cycles per byte, which is disappointing result.
+@ Technical writers asserted that 3-way S4 pipeline can sustain
+@ multiple NEON instructions per cycle, but dual NEON issue could
+@ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
+@ for further details. On side note Cortex-A15 processes one byte in
+@ 16 cycles.
+
+@ Byte order [in]dependence. =========================================
+@
+@ Originally caller was expected to maintain specific *dword* order in
+@ h[0-7], namely with most significant dword at *lower* address, which
+@ was reflected in below two parameters as 0 and 4. Now caller is
+@ expected to maintain native byte order for whole 64-bit values.
+#ifndef __KERNEL__
+# include "arm_arch.h"
+# define VFP_ABI_PUSH	vstmdb	sp!,{d8-d15}
+# define VFP_ABI_POP	vldmia	sp!,{d8-d15}
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+#endif
+
+#ifdef __ARMEL__
+# define LO 0
+# define HI 4
+# define WORD64(hi0,lo0,hi1,lo1)	.word	lo0,hi0, lo1,hi1
+#else
+# define HI 0
+# define LO 4
+# define WORD64(hi0,lo0,hi1,lo1)	.word	hi0,lo0, hi1,lo1
+#endif
+
+.text
+#if __ARM_ARCH__<7
+.code	32
+#else
+.syntax unified
+# ifdef __thumb2__
+#  define adrl adr
+.thumb
+# else
+.code   32
+# endif
+#endif
+
+.type	K512,%object
+.align	5
+K512:
+WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
+WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
+WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
+WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
+WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
+WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
+WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
+WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
+WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
+WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
+WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
+WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
+WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
+WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
+WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
+WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
+WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
+WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
+WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
+WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
+WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
+WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
+WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
+WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
+WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
+WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
+WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
+WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
+WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
+WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
+WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
+WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
+WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
+WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
+WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
+WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
+WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
+WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
+WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
+WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
+.size	K512,.-K512
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word	OPENSSL_armcap_P-sha512_block_data_order
+.skip	32-4
+#else
+.skip	32
+#endif
+
+.global	sha512_block_data_order
+.type	sha512_block_data_order,%function
+sha512_block_data_order:
+#if __ARM_ARCH__<7
+	sub	r3,pc,#8		@ sha512_block_data_order
+#else
+	adr	r3,sha512_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	ldr	r12,.LOPENSSL_armcap
+	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
+	tst	r12,#1
+	bne	.LNEON
+#endif
+	add	r2,r1,r2,lsl#7	@ len to point at the end of inp
+	stmdb	sp!,{r4-r12,lr}
+	sub	r14,r3,#672		@ K512
+	sub	sp,sp,#9*8
+
+	ldr	r7,[r0,#32+LO]
+	ldr	r8,[r0,#32+HI]
+	ldr	r9, [r0,#48+LO]
+	ldr	r10, [r0,#48+HI]
+	ldr	r11, [r0,#56+LO]
+	ldr	r12, [r0,#56+HI]
+.Loop:
+	str	r9, [sp,#48+0]
+	str	r10, [sp,#48+4]
+	str	r11, [sp,#56+0]
+	str	r12, [sp,#56+4]
+	ldr	r5,[r0,#0+LO]
+	ldr	r6,[r0,#0+HI]
+	ldr	r3,[r0,#8+LO]
+	ldr	r4,[r0,#8+HI]
+	ldr	r9, [r0,#16+LO]
+	ldr	r10, [r0,#16+HI]
+	ldr	r11, [r0,#24+LO]
+	ldr	r12, [r0,#24+HI]
+	str	r3,[sp,#8+0]
+	str	r4,[sp,#8+4]
+	str	r9, [sp,#16+0]
+	str	r10, [sp,#16+4]
+	str	r11, [sp,#24+0]
+	str	r12, [sp,#24+4]
+	ldr	r3,[r0,#40+LO]
+	ldr	r4,[r0,#40+HI]
+	str	r3,[sp,#40+0]
+	str	r4,[sp,#40+4]
+
+.L00_15:
+#if __ARM_ARCH__<7
+	ldrb	r3,[r1,#7]
+	ldrb	r9, [r1,#6]
+	ldrb	r10, [r1,#5]
+	ldrb	r11, [r1,#4]
+	ldrb	r4,[r1,#3]
+	ldrb	r12, [r1,#2]
+	orr	r3,r3,r9,lsl#8
+	ldrb	r9, [r1,#1]
+	orr	r3,r3,r10,lsl#16
+	ldrb	r10, [r1],#8
+	orr	r3,r3,r11,lsl#24
+	orr	r4,r4,r12,lsl#8
+	orr	r4,r4,r9,lsl#16
+	orr	r4,r4,r10,lsl#24
+#else
+	ldr	r3,[r1,#4]
+	ldr	r4,[r1],#8
+#ifdef __ARMEL__
+	rev	r3,r3
+	rev	r4,r4
+#endif
+#endif
+	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
+	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+	mov	r9,r7,lsr#14
+	str	r3,[sp,#64+0]
+	mov	r10,r8,lsr#14
+	str	r4,[sp,#64+4]
+	eor	r9,r9,r8,lsl#18
+	ldr	r11,[sp,#56+0]	@ h.lo
+	eor	r10,r10,r7,lsl#18
+	ldr	r12,[sp,#56+4]	@ h.hi
+	eor	r9,r9,r7,lsr#18
+	eor	r10,r10,r8,lsr#18
+	eor	r9,r9,r8,lsl#14
+	eor	r10,r10,r7,lsl#14
+	eor	r9,r9,r8,lsr#9
+	eor	r10,r10,r7,lsr#9
+	eor	r9,r9,r7,lsl#23
+	eor	r10,r10,r8,lsl#23	@ Sigma1(e)
+	adds	r3,r3,r9
+	ldr	r9,[sp,#40+0]	@ f.lo
+	adc	r4,r4,r10		@ T += Sigma1(e)
+	ldr	r10,[sp,#40+4]	@ f.hi
+	adds	r3,r3,r11
+	ldr	r11,[sp,#48+0]	@ g.lo
+	adc	r4,r4,r12		@ T += h
+	ldr	r12,[sp,#48+4]	@ g.hi
+
+	eor	r9,r9,r11
+	str	r7,[sp,#32+0]
+	eor	r10,r10,r12
+	str	r8,[sp,#32+4]
+	and	r9,r9,r7
+	str	r5,[sp,#0+0]
+	and	r10,r10,r8
+	str	r6,[sp,#0+4]
+	eor	r9,r9,r11
+	ldr	r11,[r14,#LO]	@ K[i].lo
+	eor	r10,r10,r12		@ Ch(e,f,g)
+	ldr	r12,[r14,#HI]	@ K[i].hi
+
+	adds	r3,r3,r9
+	ldr	r7,[sp,#24+0]	@ d.lo
+	adc	r4,r4,r10		@ T += Ch(e,f,g)
+	ldr	r8,[sp,#24+4]	@ d.hi
+	adds	r3,r3,r11
+	and	r9,r11,#0xff
+	adc	r4,r4,r12		@ T += K[i]
+	adds	r7,r7,r3
+	ldr	r11,[sp,#8+0]	@ b.lo
+	adc	r8,r8,r4		@ d += T
+	teq	r9,#148
+
+	ldr	r12,[sp,#16+0]	@ c.lo
+#if __ARM_ARCH__>=7
+	it	eq			@ Thumb2 thing, sanity check in ARM
+#endif
+	orreq	r14,r14,#1
+	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+	mov	r9,r5,lsr#28
+	mov	r10,r6,lsr#28
+	eor	r9,r9,r6,lsl#4
+	eor	r10,r10,r5,lsl#4
+	eor	r9,r9,r6,lsr#2
+	eor	r10,r10,r5,lsr#2
+	eor	r9,r9,r5,lsl#30
+	eor	r10,r10,r6,lsl#30
+	eor	r9,r9,r6,lsr#7
+	eor	r10,r10,r5,lsr#7
+	eor	r9,r9,r5,lsl#25
+	eor	r10,r10,r6,lsl#25	@ Sigma0(a)
+	adds	r3,r3,r9
+	and	r9,r5,r11
+	adc	r4,r4,r10		@ T += Sigma0(a)
+
+	ldr	r10,[sp,#8+4]	@ b.hi
+	orr	r5,r5,r11
+	ldr	r11,[sp,#16+4]	@ c.hi
+	and	r5,r5,r12
+	and	r12,r6,r10
+	orr	r6,r6,r10
+	orr	r5,r5,r9		@ Maj(a,b,c).lo
+	and	r6,r6,r11
+	adds	r5,r5,r3
+	orr	r6,r6,r12		@ Maj(a,b,c).hi
+	sub	sp,sp,#8
+	adc	r6,r6,r4		@ h += T
+	tst	r14,#1
+	add	r14,r14,#8
+	tst	r14,#1
+	beq	.L00_15
+	ldr	r9,[sp,#184+0]
+	ldr	r10,[sp,#184+4]
+	bic	r14,r14,#1
+.L16_79:
+	@ sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
+	@ LO		lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
+	@ HI		hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
+	mov	r3,r9,lsr#1
+	ldr	r11,[sp,#80+0]
+	mov	r4,r10,lsr#1
+	ldr	r12,[sp,#80+4]
+	eor	r3,r3,r10,lsl#31
+	eor	r4,r4,r9,lsl#31
+	eor	r3,r3,r9,lsr#8
+	eor	r4,r4,r10,lsr#8
+	eor	r3,r3,r10,lsl#24
+	eor	r4,r4,r9,lsl#24
+	eor	r3,r3,r9,lsr#7
+	eor	r4,r4,r10,lsr#7
+	eor	r3,r3,r10,lsl#25
+
+	@ sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+	@ LO		lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
+	@ HI		hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
+	mov	r9,r11,lsr#19
+	mov	r10,r12,lsr#19
+	eor	r9,r9,r12,lsl#13
+	eor	r10,r10,r11,lsl#13
+	eor	r9,r9,r12,lsr#29
+	eor	r10,r10,r11,lsr#29
+	eor	r9,r9,r11,lsl#3
+	eor	r10,r10,r12,lsl#3
+	eor	r9,r9,r11,lsr#6
+	eor	r10,r10,r12,lsr#6
+	ldr	r11,[sp,#120+0]
+	eor	r9,r9,r12,lsl#26
+
+	ldr	r12,[sp,#120+4]
+	adds	r3,r3,r9
+	ldr	r9,[sp,#192+0]
+	adc	r4,r4,r10
+
+	ldr	r10,[sp,#192+4]
+	adds	r3,r3,r11
+	adc	r4,r4,r12
+	adds	r3,r3,r9
+	adc	r4,r4,r10
+	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
+	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+	mov	r9,r7,lsr#14
+	str	r3,[sp,#64+0]
+	mov	r10,r8,lsr#14
+	str	r4,[sp,#64+4]
+	eor	r9,r9,r8,lsl#18
+	ldr	r11,[sp,#56+0]	@ h.lo
+	eor	r10,r10,r7,lsl#18
+	ldr	r12,[sp,#56+4]	@ h.hi
+	eor	r9,r9,r7,lsr#18
+	eor	r10,r10,r8,lsr#18
+	eor	r9,r9,r8,lsl#14
+	eor	r10,r10,r7,lsl#14
+	eor	r9,r9,r8,lsr#9
+	eor	r10,r10,r7,lsr#9
+	eor	r9,r9,r7,lsl#23
+	eor	r10,r10,r8,lsl#23	@ Sigma1(e)
+	adds	r3,r3,r9
+	ldr	r9,[sp,#40+0]	@ f.lo
+	adc	r4,r4,r10		@ T += Sigma1(e)
+	ldr	r10,[sp,#40+4]	@ f.hi
+	adds	r3,r3,r11
+	ldr	r11,[sp,#48+0]	@ g.lo
+	adc	r4,r4,r12		@ T += h
+	ldr	r12,[sp,#48+4]	@ g.hi
+
+	eor	r9,r9,r11
+	str	r7,[sp,#32+0]
+	eor	r10,r10,r12
+	str	r8,[sp,#32+4]
+	and	r9,r9,r7
+	str	r5,[sp,#0+0]
+	and	r10,r10,r8
+	str	r6,[sp,#0+4]
+	eor	r9,r9,r11
+	ldr	r11,[r14,#LO]	@ K[i].lo
+	eor	r10,r10,r12		@ Ch(e,f,g)
+	ldr	r12,[r14,#HI]	@ K[i].hi
+
+	adds	r3,r3,r9
+	ldr	r7,[sp,#24+0]	@ d.lo
+	adc	r4,r4,r10		@ T += Ch(e,f,g)
+	ldr	r8,[sp,#24+4]	@ d.hi
+	adds	r3,r3,r11
+	and	r9,r11,#0xff
+	adc	r4,r4,r12		@ T += K[i]
+	adds	r7,r7,r3
+	ldr	r11,[sp,#8+0]	@ b.lo
+	adc	r8,r8,r4		@ d += T
+	teq	r9,#23
+
+	ldr	r12,[sp,#16+0]	@ c.lo
+#if __ARM_ARCH__>=7
+	it	eq			@ Thumb2 thing, sanity check in ARM
+#endif
+	orreq	r14,r14,#1
+	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+	mov	r9,r5,lsr#28
+	mov	r10,r6,lsr#28
+	eor	r9,r9,r6,lsl#4
+	eor	r10,r10,r5,lsl#4
+	eor	r9,r9,r6,lsr#2
+	eor	r10,r10,r5,lsr#2
+	eor	r9,r9,r5,lsl#30
+	eor	r10,r10,r6,lsl#30
+	eor	r9,r9,r6,lsr#7
+	eor	r10,r10,r5,lsr#7
+	eor	r9,r9,r5,lsl#25
+	eor	r10,r10,r6,lsl#25	@ Sigma0(a)
+	adds	r3,r3,r9
+	and	r9,r5,r11
+	adc	r4,r4,r10		@ T += Sigma0(a)
+
+	ldr	r10,[sp,#8+4]	@ b.hi
+	orr	r5,r5,r11
+	ldr	r11,[sp,#16+4]	@ c.hi
+	and	r5,r5,r12
+	and	r12,r6,r10
+	orr	r6,r6,r10
+	orr	r5,r5,r9		@ Maj(a,b,c).lo
+	and	r6,r6,r11
+	adds	r5,r5,r3
+	orr	r6,r6,r12		@ Maj(a,b,c).hi
+	sub	sp,sp,#8
+	adc	r6,r6,r4		@ h += T
+	tst	r14,#1
+	add	r14,r14,#8
+#if __ARM_ARCH__>=7
+	ittt	eq			@ Thumb2 thing, sanity check in ARM
+#endif
+	ldreq	r9,[sp,#184+0]
+	ldreq	r10,[sp,#184+4]
+	beq	.L16_79
+	bic	r14,r14,#1
+
+	ldr	r3,[sp,#8+0]
+	ldr	r4,[sp,#8+4]
+	ldr	r9, [r0,#0+LO]
+	ldr	r10, [r0,#0+HI]
+	ldr	r11, [r0,#8+LO]
+	ldr	r12, [r0,#8+HI]
+	adds	r9,r5,r9
+	str	r9, [r0,#0+LO]
+	adc	r10,r6,r10
+	str	r10, [r0,#0+HI]
+	adds	r11,r3,r11
+	str	r11, [r0,#8+LO]
+	adc	r12,r4,r12
+	str	r12, [r0,#8+HI]
+
+	ldr	r5,[sp,#16+0]
+	ldr	r6,[sp,#16+4]
+	ldr	r3,[sp,#24+0]
+	ldr	r4,[sp,#24+4]
+	ldr	r9, [r0,#16+LO]
+	ldr	r10, [r0,#16+HI]
+	ldr	r11, [r0,#24+LO]
+	ldr	r12, [r0,#24+HI]
+	adds	r9,r5,r9
+	str	r9, [r0,#16+LO]
+	adc	r10,r6,r10
+	str	r10, [r0,#16+HI]
+	adds	r11,r3,r11
+	str	r11, [r0,#24+LO]
+	adc	r12,r4,r12
+	str	r12, [r0,#24+HI]
+
+	ldr	r3,[sp,#40+0]
+	ldr	r4,[sp,#40+4]
+	ldr	r9, [r0,#32+LO]
+	ldr	r10, [r0,#32+HI]
+	ldr	r11, [r0,#40+LO]
+	ldr	r12, [r0,#40+HI]
+	adds	r7,r7,r9
+	str	r7,[r0,#32+LO]
+	adc	r8,r8,r10
+	str	r8,[r0,#32+HI]
+	adds	r11,r3,r11
+	str	r11, [r0,#40+LO]
+	adc	r12,r4,r12
+	str	r12, [r0,#40+HI]
+
+	ldr	r5,[sp,#48+0]
+	ldr	r6,[sp,#48+4]
+	ldr	r3,[sp,#56+0]
+	ldr	r4,[sp,#56+4]
+	ldr	r9, [r0,#48+LO]
+	ldr	r10, [r0,#48+HI]
+	ldr	r11, [r0,#56+LO]
+	ldr	r12, [r0,#56+HI]
+	adds	r9,r5,r9
+	str	r9, [r0,#48+LO]
+	adc	r10,r6,r10
+	str	r10, [r0,#48+HI]
+	adds	r11,r3,r11
+	str	r11, [r0,#56+LO]
+	adc	r12,r4,r12
+	str	r12, [r0,#56+HI]
+
+	add	sp,sp,#640
+	sub	r14,r14,#640
+
+	teq	r1,r2
+	bne	.Loop
+
+	add	sp,sp,#8*9		@ destroy frame
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r12,pc}
+#else
+	ldmia	sp!,{r4-r12,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	sha512_block_data_order,.-sha512_block_data_order
+#if __ARM_MAX_ARCH__>=7
+.arch	armv7-a
+.fpu	neon
+
+.global	sha512_block_data_order_neon
+.type	sha512_block_data_order_neon,%function
+.align	4
+sha512_block_data_order_neon:
+.LNEON:
+	dmb				@ errata #451034 on early Cortex A8
+	add	r2,r1,r2,lsl#7	@ len to point at the end of inp
+	VFP_ABI_PUSH
+	adrl	r3,K512
+	vldmia	r0,{d16-d23}		@ load context
+.Loop_neon:
+	vshr.u64	d24,d20,#14	@ 0
+#if 0<16
+	vld1.64		{d0},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d20,#18
+#if 0>0
+	 vadd.i64	d16,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d20,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d20,#50
+	vsli.64		d25,d20,#46
+	vmov		d29,d20
+	vsli.64		d26,d20,#23
+#if 0<16 && defined(__ARMEL__)
+	vrev64.8	d0,d0
+#endif
+	veor		d25,d24
+	vbsl		d29,d21,d22		@ Ch(e,f,g)
+	vshr.u64	d24,d16,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d23
+	vshr.u64	d25,d16,#34
+	vsli.64		d24,d16,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d16,#39
+	vadd.i64	d28,d0
+	vsli.64		d25,d16,#30
+	veor		d30,d16,d17
+	vsli.64		d26,d16,#25
+	veor		d23,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d18,d17		@ Maj(a,b,c)
+	veor		d23,d26			@ Sigma0(a)
+	vadd.i64	d19,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d23,d30
+	vshr.u64	d24,d19,#14	@ 1
+#if 1<16
+	vld1.64		{d1},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d19,#18
+#if 1>0
+	 vadd.i64	d23,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d19,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d19,#50
+	vsli.64		d25,d19,#46
+	vmov		d29,d19
+	vsli.64		d26,d19,#23
+#if 1<16 && defined(__ARMEL__)
+	vrev64.8	d1,d1
+#endif
+	veor		d25,d24
+	vbsl		d29,d20,d21		@ Ch(e,f,g)
+	vshr.u64	d24,d23,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d22
+	vshr.u64	d25,d23,#34
+	vsli.64		d24,d23,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d23,#39
+	vadd.i64	d28,d1
+	vsli.64		d25,d23,#30
+	veor		d30,d23,d16
+	vsli.64		d26,d23,#25
+	veor		d22,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d17,d16		@ Maj(a,b,c)
+	veor		d22,d26			@ Sigma0(a)
+	vadd.i64	d18,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d22,d30
+	vshr.u64	d24,d18,#14	@ 2
+#if 2<16
+	vld1.64		{d2},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d18,#18
+#if 2>0
+	 vadd.i64	d22,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d18,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d18,#50
+	vsli.64		d25,d18,#46
+	vmov		d29,d18
+	vsli.64		d26,d18,#23
+#if 2<16 && defined(__ARMEL__)
+	vrev64.8	d2,d2
+#endif
+	veor		d25,d24
+	vbsl		d29,d19,d20		@ Ch(e,f,g)
+	vshr.u64	d24,d22,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d21
+	vshr.u64	d25,d22,#34
+	vsli.64		d24,d22,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d22,#39
+	vadd.i64	d28,d2
+	vsli.64		d25,d22,#30
+	veor		d30,d22,d23
+	vsli.64		d26,d22,#25
+	veor		d21,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d16,d23		@ Maj(a,b,c)
+	veor		d21,d26			@ Sigma0(a)
+	vadd.i64	d17,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d21,d30
+	vshr.u64	d24,d17,#14	@ 3
+#if 3<16
+	vld1.64		{d3},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d17,#18
+#if 3>0
+	 vadd.i64	d21,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d17,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d17,#50
+	vsli.64		d25,d17,#46
+	vmov		d29,d17
+	vsli.64		d26,d17,#23
+#if 3<16 && defined(__ARMEL__)
+	vrev64.8	d3,d3
+#endif
+	veor		d25,d24
+	vbsl		d29,d18,d19		@ Ch(e,f,g)
+	vshr.u64	d24,d21,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d20
+	vshr.u64	d25,d21,#34
+	vsli.64		d24,d21,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d21,#39
+	vadd.i64	d28,d3
+	vsli.64		d25,d21,#30
+	veor		d30,d21,d22
+	vsli.64		d26,d21,#25
+	veor		d20,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d23,d22		@ Maj(a,b,c)
+	veor		d20,d26			@ Sigma0(a)
+	vadd.i64	d16,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d20,d30
+	vshr.u64	d24,d16,#14	@ 4
+#if 4<16
+	vld1.64		{d4},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d16,#18
+#if 4>0
+	 vadd.i64	d20,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d16,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d16,#50
+	vsli.64		d25,d16,#46
+	vmov		d29,d16
+	vsli.64		d26,d16,#23
+#if 4<16 && defined(__ARMEL__)
+	vrev64.8	d4,d4
+#endif
+	veor		d25,d24
+	vbsl		d29,d17,d18		@ Ch(e,f,g)
+	vshr.u64	d24,d20,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d19
+	vshr.u64	d25,d20,#34
+	vsli.64		d24,d20,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d20,#39
+	vadd.i64	d28,d4
+	vsli.64		d25,d20,#30
+	veor		d30,d20,d21
+	vsli.64		d26,d20,#25
+	veor		d19,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d22,d21		@ Maj(a,b,c)
+	veor		d19,d26			@ Sigma0(a)
+	vadd.i64	d23,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d19,d30
+	vshr.u64	d24,d23,#14	@ 5
+#if 5<16
+	vld1.64		{d5},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d23,#18
+#if 5>0
+	 vadd.i64	d19,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d23,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d23,#50
+	vsli.64		d25,d23,#46
+	vmov		d29,d23
+	vsli.64		d26,d23,#23
+#if 5<16 && defined(__ARMEL__)
+	vrev64.8	d5,d5
+#endif
+	veor		d25,d24
+	vbsl		d29,d16,d17		@ Ch(e,f,g)
+	vshr.u64	d24,d19,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d18
+	vshr.u64	d25,d19,#34
+	vsli.64		d24,d19,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d19,#39
+	vadd.i64	d28,d5
+	vsli.64		d25,d19,#30
+	veor		d30,d19,d20
+	vsli.64		d26,d19,#25
+	veor		d18,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d21,d20		@ Maj(a,b,c)
+	veor		d18,d26			@ Sigma0(a)
+	vadd.i64	d22,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d18,d30
+	vshr.u64	d24,d22,#14	@ 6
+#if 6<16
+	vld1.64		{d6},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d22,#18
+#if 6>0
+	 vadd.i64	d18,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d22,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d22,#50
+	vsli.64		d25,d22,#46
+	vmov		d29,d22
+	vsli.64		d26,d22,#23
+#if 6<16 && defined(__ARMEL__)
+	vrev64.8	d6,d6
+#endif
+	veor		d25,d24
+	vbsl		d29,d23,d16		@ Ch(e,f,g)
+	vshr.u64	d24,d18,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d17
+	vshr.u64	d25,d18,#34
+	vsli.64		d24,d18,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d18,#39
+	vadd.i64	d28,d6
+	vsli.64		d25,d18,#30
+	veor		d30,d18,d19
+	vsli.64		d26,d18,#25
+	veor		d17,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d20,d19		@ Maj(a,b,c)
+	veor		d17,d26			@ Sigma0(a)
+	vadd.i64	d21,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d17,d30
+	vshr.u64	d24,d21,#14	@ 7
+#if 7<16
+	vld1.64		{d7},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d21,#18
+#if 7>0
+	 vadd.i64	d17,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d21,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d21,#50
+	vsli.64		d25,d21,#46
+	vmov		d29,d21
+	vsli.64		d26,d21,#23
+#if 7<16 && defined(__ARMEL__)
+	vrev64.8	d7,d7
+#endif
+	veor		d25,d24
+	vbsl		d29,d22,d23		@ Ch(e,f,g)
+	vshr.u64	d24,d17,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d16
+	vshr.u64	d25,d17,#34
+	vsli.64		d24,d17,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d17,#39
+	vadd.i64	d28,d7
+	vsli.64		d25,d17,#30
+	veor		d30,d17,d18
+	vsli.64		d26,d17,#25
+	veor		d16,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d19,d18		@ Maj(a,b,c)
+	veor		d16,d26			@ Sigma0(a)
+	vadd.i64	d20,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d16,d30
+	vshr.u64	d24,d20,#14	@ 8
+#if 8<16
+	vld1.64		{d8},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d20,#18
+#if 8>0
+	 vadd.i64	d16,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d20,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d20,#50
+	vsli.64		d25,d20,#46
+	vmov		d29,d20
+	vsli.64		d26,d20,#23
+#if 8<16 && defined(__ARMEL__)
+	vrev64.8	d8,d8
+#endif
+	veor		d25,d24
+	vbsl		d29,d21,d22		@ Ch(e,f,g)
+	vshr.u64	d24,d16,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d23
+	vshr.u64	d25,d16,#34
+	vsli.64		d24,d16,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d16,#39
+	vadd.i64	d28,d8
+	vsli.64		d25,d16,#30
+	veor		d30,d16,d17
+	vsli.64		d26,d16,#25
+	veor		d23,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d18,d17		@ Maj(a,b,c)
+	veor		d23,d26			@ Sigma0(a)
+	vadd.i64	d19,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d23,d30
+	vshr.u64	d24,d19,#14	@ 9
+#if 9<16
+	vld1.64		{d9},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d19,#18
+#if 9>0
+	 vadd.i64	d23,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d19,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d19,#50
+	vsli.64		d25,d19,#46
+	vmov		d29,d19
+	vsli.64		d26,d19,#23
+#if 9<16 && defined(__ARMEL__)
+	vrev64.8	d9,d9
+#endif
+	veor		d25,d24
+	vbsl		d29,d20,d21		@ Ch(e,f,g)
+	vshr.u64	d24,d23,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d22
+	vshr.u64	d25,d23,#34
+	vsli.64		d24,d23,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d23,#39
+	vadd.i64	d28,d9
+	vsli.64		d25,d23,#30
+	veor		d30,d23,d16
+	vsli.64		d26,d23,#25
+	veor		d22,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d17,d16		@ Maj(a,b,c)
+	veor		d22,d26			@ Sigma0(a)
+	vadd.i64	d18,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d22,d30
+	vshr.u64	d24,d18,#14	@ 10
+#if 10<16
+	vld1.64		{d10},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d18,#18
+#if 10>0
+	 vadd.i64	d22,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d18,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d18,#50
+	vsli.64		d25,d18,#46
+	vmov		d29,d18
+	vsli.64		d26,d18,#23
+#if 10<16 && defined(__ARMEL__)
+	vrev64.8	d10,d10
+#endif
+	veor		d25,d24
+	vbsl		d29,d19,d20		@ Ch(e,f,g)
+	vshr.u64	d24,d22,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d21
+	vshr.u64	d25,d22,#34
+	vsli.64		d24,d22,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d22,#39
+	vadd.i64	d28,d10
+	vsli.64		d25,d22,#30
+	veor		d30,d22,d23
+	vsli.64		d26,d22,#25
+	veor		d21,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d16,d23		@ Maj(a,b,c)
+	veor		d21,d26			@ Sigma0(a)
+	vadd.i64	d17,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d21,d30
+	vshr.u64	d24,d17,#14	@ 11
+#if 11<16
+	vld1.64		{d11},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d17,#18
+#if 11>0
+	 vadd.i64	d21,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d17,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d17,#50
+	vsli.64		d25,d17,#46
+	vmov		d29,d17
+	vsli.64		d26,d17,#23
+#if 11<16 && defined(__ARMEL__)
+	vrev64.8	d11,d11
+#endif
+	veor		d25,d24
+	vbsl		d29,d18,d19		@ Ch(e,f,g)
+	vshr.u64	d24,d21,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d20
+	vshr.u64	d25,d21,#34
+	vsli.64		d24,d21,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d21,#39
+	vadd.i64	d28,d11
+	vsli.64		d25,d21,#30
+	veor		d30,d21,d22
+	vsli.64		d26,d21,#25
+	veor		d20,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d23,d22		@ Maj(a,b,c)
+	veor		d20,d26			@ Sigma0(a)
+	vadd.i64	d16,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d20,d30
+	vshr.u64	d24,d16,#14	@ 12
+#if 12<16
+	vld1.64		{d12},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d16,#18
+#if 12>0
+	 vadd.i64	d20,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d16,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d16,#50
+	vsli.64		d25,d16,#46
+	vmov		d29,d16
+	vsli.64		d26,d16,#23
+#if 12<16 && defined(__ARMEL__)
+	vrev64.8	d12,d12
+#endif
+	veor		d25,d24
+	vbsl		d29,d17,d18		@ Ch(e,f,g)
+	vshr.u64	d24,d20,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d19
+	vshr.u64	d25,d20,#34
+	vsli.64		d24,d20,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d20,#39
+	vadd.i64	d28,d12
+	vsli.64		d25,d20,#30
+	veor		d30,d20,d21
+	vsli.64		d26,d20,#25
+	veor		d19,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d22,d21		@ Maj(a,b,c)
+	veor		d19,d26			@ Sigma0(a)
+	vadd.i64	d23,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d19,d30
+	vshr.u64	d24,d23,#14	@ 13
+#if 13<16
+	vld1.64		{d13},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d23,#18
+#if 13>0
+	 vadd.i64	d19,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d23,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d23,#50
+	vsli.64		d25,d23,#46
+	vmov		d29,d23
+	vsli.64		d26,d23,#23
+#if 13<16 && defined(__ARMEL__)
+	vrev64.8	d13,d13
+#endif
+	veor		d25,d24
+	vbsl		d29,d16,d17		@ Ch(e,f,g)
+	vshr.u64	d24,d19,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d18
+	vshr.u64	d25,d19,#34
+	vsli.64		d24,d19,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d19,#39
+	vadd.i64	d28,d13
+	vsli.64		d25,d19,#30
+	veor		d30,d19,d20
+	vsli.64		d26,d19,#25
+	veor		d18,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d21,d20		@ Maj(a,b,c)
+	veor		d18,d26			@ Sigma0(a)
+	vadd.i64	d22,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d18,d30
+	vshr.u64	d24,d22,#14	@ 14
+#if 14<16
+	vld1.64		{d14},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d22,#18
+#if 14>0
+	 vadd.i64	d18,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d22,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d22,#50
+	vsli.64		d25,d22,#46
+	vmov		d29,d22
+	vsli.64		d26,d22,#23
+#if 14<16 && defined(__ARMEL__)
+	vrev64.8	d14,d14
+#endif
+	veor		d25,d24
+	vbsl		d29,d23,d16		@ Ch(e,f,g)
+	vshr.u64	d24,d18,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d17
+	vshr.u64	d25,d18,#34
+	vsli.64		d24,d18,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d18,#39
+	vadd.i64	d28,d14
+	vsli.64		d25,d18,#30
+	veor		d30,d18,d19
+	vsli.64		d26,d18,#25
+	veor		d17,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d20,d19		@ Maj(a,b,c)
+	veor		d17,d26			@ Sigma0(a)
+	vadd.i64	d21,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d17,d30
+	vshr.u64	d24,d21,#14	@ 15
+#if 15<16
+	vld1.64		{d15},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d21,#18
+#if 15>0
+	 vadd.i64	d17,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d21,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d21,#50
+	vsli.64		d25,d21,#46
+	vmov		d29,d21
+	vsli.64		d26,d21,#23
+#if 15<16 && defined(__ARMEL__)
+	vrev64.8	d15,d15
+#endif
+	veor		d25,d24
+	vbsl		d29,d22,d23		@ Ch(e,f,g)
+	vshr.u64	d24,d17,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d16
+	vshr.u64	d25,d17,#34
+	vsli.64		d24,d17,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d17,#39
+	vadd.i64	d28,d15
+	vsli.64		d25,d17,#30
+	veor		d30,d17,d18
+	vsli.64		d26,d17,#25
+	veor		d16,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d19,d18		@ Maj(a,b,c)
+	veor		d16,d26			@ Sigma0(a)
+	vadd.i64	d20,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d16,d30
+	mov		r12,#4
+.L16_79_neon:
+	subs		r12,#1
+	vshr.u64	q12,q7,#19
+	vshr.u64	q13,q7,#61
+	 vadd.i64	d16,d30			@ h+=Maj from the past
+	vshr.u64	q15,q7,#6
+	vsli.64		q12,q7,#45
+	vext.8		q14,q0,q1,#8	@ X[i+1]
+	vsli.64		q13,q7,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q0,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q4,q5,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d20,#14		@ from NEON_00_15
+	vadd.i64	q0,q14
+	vshr.u64	d25,d20,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d20,#41		@ from NEON_00_15
+	vadd.i64	q0,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d20,#50
+	vsli.64		d25,d20,#46
+	vmov		d29,d20
+	vsli.64		d26,d20,#23
+#if 16<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d21,d22		@ Ch(e,f,g)
+	vshr.u64	d24,d16,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d23
+	vshr.u64	d25,d16,#34
+	vsli.64		d24,d16,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d16,#39
+	vadd.i64	d28,d0
+	vsli.64		d25,d16,#30
+	veor		d30,d16,d17
+	vsli.64		d26,d16,#25
+	veor		d23,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d18,d17		@ Maj(a,b,c)
+	veor		d23,d26			@ Sigma0(a)
+	vadd.i64	d19,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d23,d30
+	vshr.u64	d24,d19,#14	@ 17
+#if 17<16
+	vld1.64		{d1},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d19,#18
+#if 17>0
+	 vadd.i64	d23,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d19,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d19,#50
+	vsli.64		d25,d19,#46
+	vmov		d29,d19
+	vsli.64		d26,d19,#23
+#if 17<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d20,d21		@ Ch(e,f,g)
+	vshr.u64	d24,d23,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d22
+	vshr.u64	d25,d23,#34
+	vsli.64		d24,d23,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d23,#39
+	vadd.i64	d28,d1
+	vsli.64		d25,d23,#30
+	veor		d30,d23,d16
+	vsli.64		d26,d23,#25
+	veor		d22,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d17,d16		@ Maj(a,b,c)
+	veor		d22,d26			@ Sigma0(a)
+	vadd.i64	d18,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d22,d30
+	vshr.u64	q12,q0,#19
+	vshr.u64	q13,q0,#61
+	 vadd.i64	d22,d30			@ h+=Maj from the past
+	vshr.u64	q15,q0,#6
+	vsli.64		q12,q0,#45
+	vext.8		q14,q1,q2,#8	@ X[i+1]
+	vsli.64		q13,q0,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q1,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q5,q6,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d18,#14		@ from NEON_00_15
+	vadd.i64	q1,q14
+	vshr.u64	d25,d18,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d18,#41		@ from NEON_00_15
+	vadd.i64	q1,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d18,#50
+	vsli.64		d25,d18,#46
+	vmov		d29,d18
+	vsli.64		d26,d18,#23
+#if 18<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d19,d20		@ Ch(e,f,g)
+	vshr.u64	d24,d22,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d21
+	vshr.u64	d25,d22,#34
+	vsli.64		d24,d22,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d22,#39
+	vadd.i64	d28,d2
+	vsli.64		d25,d22,#30
+	veor		d30,d22,d23
+	vsli.64		d26,d22,#25
+	veor		d21,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d16,d23		@ Maj(a,b,c)
+	veor		d21,d26			@ Sigma0(a)
+	vadd.i64	d17,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d21,d30
+	vshr.u64	d24,d17,#14	@ 19
+#if 19<16
+	vld1.64		{d3},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d17,#18
+#if 19>0
+	 vadd.i64	d21,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d17,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d17,#50
+	vsli.64		d25,d17,#46
+	vmov		d29,d17
+	vsli.64		d26,d17,#23
+#if 19<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d18,d19		@ Ch(e,f,g)
+	vshr.u64	d24,d21,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d20
+	vshr.u64	d25,d21,#34
+	vsli.64		d24,d21,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d21,#39
+	vadd.i64	d28,d3
+	vsli.64		d25,d21,#30
+	veor		d30,d21,d22
+	vsli.64		d26,d21,#25
+	veor		d20,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d23,d22		@ Maj(a,b,c)
+	veor		d20,d26			@ Sigma0(a)
+	vadd.i64	d16,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d20,d30
+	vshr.u64	q12,q1,#19
+	vshr.u64	q13,q1,#61
+	 vadd.i64	d20,d30			@ h+=Maj from the past
+	vshr.u64	q15,q1,#6
+	vsli.64		q12,q1,#45
+	vext.8		q14,q2,q3,#8	@ X[i+1]
+	vsli.64		q13,q1,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q2,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q6,q7,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d16,#14		@ from NEON_00_15
+	vadd.i64	q2,q14
+	vshr.u64	d25,d16,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d16,#41		@ from NEON_00_15
+	vadd.i64	q2,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d16,#50
+	vsli.64		d25,d16,#46
+	vmov		d29,d16
+	vsli.64		d26,d16,#23
+#if 20<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d17,d18		@ Ch(e,f,g)
+	vshr.u64	d24,d20,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d19
+	vshr.u64	d25,d20,#34
+	vsli.64		d24,d20,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d20,#39
+	vadd.i64	d28,d4
+	vsli.64		d25,d20,#30
+	veor		d30,d20,d21
+	vsli.64		d26,d20,#25
+	veor		d19,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d22,d21		@ Maj(a,b,c)
+	veor		d19,d26			@ Sigma0(a)
+	vadd.i64	d23,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d19,d30
+	vshr.u64	d24,d23,#14	@ 21
+#if 21<16
+	vld1.64		{d5},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d23,#18
+#if 21>0
+	 vadd.i64	d19,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d23,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d23,#50
+	vsli.64		d25,d23,#46
+	vmov		d29,d23
+	vsli.64		d26,d23,#23
+#if 21<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d16,d17		@ Ch(e,f,g)
+	vshr.u64	d24,d19,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d18
+	vshr.u64	d25,d19,#34
+	vsli.64		d24,d19,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d19,#39
+	vadd.i64	d28,d5
+	vsli.64		d25,d19,#30
+	veor		d30,d19,d20
+	vsli.64		d26,d19,#25
+	veor		d18,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d21,d20		@ Maj(a,b,c)
+	veor		d18,d26			@ Sigma0(a)
+	vadd.i64	d22,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d18,d30
+	vshr.u64	q12,q2,#19
+	vshr.u64	q13,q2,#61
+	 vadd.i64	d18,d30			@ h+=Maj from the past
+	vshr.u64	q15,q2,#6
+	vsli.64		q12,q2,#45
+	vext.8		q14,q3,q4,#8	@ X[i+1]
+	vsli.64		q13,q2,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q3,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q7,q0,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d22,#14		@ from NEON_00_15
+	vadd.i64	q3,q14
+	vshr.u64	d25,d22,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d22,#41		@ from NEON_00_15
+	vadd.i64	q3,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d22,#50
+	vsli.64		d25,d22,#46
+	vmov		d29,d22
+	vsli.64		d26,d22,#23
+#if 22<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d23,d16		@ Ch(e,f,g)
+	vshr.u64	d24,d18,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d17
+	vshr.u64	d25,d18,#34
+	vsli.64		d24,d18,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d18,#39
+	vadd.i64	d28,d6
+	vsli.64		d25,d18,#30
+	veor		d30,d18,d19
+	vsli.64		d26,d18,#25
+	veor		d17,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d20,d19		@ Maj(a,b,c)
+	veor		d17,d26			@ Sigma0(a)
+	vadd.i64	d21,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d17,d30
+	vshr.u64	d24,d21,#14	@ 23
+#if 23<16
+	vld1.64		{d7},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d21,#18
+#if 23>0
+	 vadd.i64	d17,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d21,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d21,#50
+	vsli.64		d25,d21,#46
+	vmov		d29,d21
+	vsli.64		d26,d21,#23
+#if 23<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d22,d23		@ Ch(e,f,g)
+	vshr.u64	d24,d17,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d16
+	vshr.u64	d25,d17,#34
+	vsli.64		d24,d17,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d17,#39
+	vadd.i64	d28,d7
+	vsli.64		d25,d17,#30
+	veor		d30,d17,d18
+	vsli.64		d26,d17,#25
+	veor		d16,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d19,d18		@ Maj(a,b,c)
+	veor		d16,d26			@ Sigma0(a)
+	vadd.i64	d20,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d16,d30
+	vshr.u64	q12,q3,#19
+	vshr.u64	q13,q3,#61
+	 vadd.i64	d16,d30			@ h+=Maj from the past
+	vshr.u64	q15,q3,#6
+	vsli.64		q12,q3,#45
+	vext.8		q14,q4,q5,#8	@ X[i+1]
+	vsli.64		q13,q3,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q4,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q0,q1,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d20,#14		@ from NEON_00_15
+	vadd.i64	q4,q14
+	vshr.u64	d25,d20,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d20,#41		@ from NEON_00_15
+	vadd.i64	q4,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d20,#50
+	vsli.64		d25,d20,#46
+	vmov		d29,d20
+	vsli.64		d26,d20,#23
+#if 24<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d21,d22		@ Ch(e,f,g)
+	vshr.u64	d24,d16,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d23
+	vshr.u64	d25,d16,#34
+	vsli.64		d24,d16,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d16,#39
+	vadd.i64	d28,d8
+	vsli.64		d25,d16,#30
+	veor		d30,d16,d17
+	vsli.64		d26,d16,#25
+	veor		d23,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d18,d17		@ Maj(a,b,c)
+	veor		d23,d26			@ Sigma0(a)
+	vadd.i64	d19,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d23,d30
+	vshr.u64	d24,d19,#14	@ 25
+#if 25<16
+	vld1.64		{d9},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d19,#18
+#if 25>0
+	 vadd.i64	d23,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d19,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d19,#50
+	vsli.64		d25,d19,#46
+	vmov		d29,d19
+	vsli.64		d26,d19,#23
+#if 25<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d20,d21		@ Ch(e,f,g)
+	vshr.u64	d24,d23,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d22
+	vshr.u64	d25,d23,#34
+	vsli.64		d24,d23,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d23,#39
+	vadd.i64	d28,d9
+	vsli.64		d25,d23,#30
+	veor		d30,d23,d16
+	vsli.64		d26,d23,#25
+	veor		d22,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d17,d16		@ Maj(a,b,c)
+	veor		d22,d26			@ Sigma0(a)
+	vadd.i64	d18,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d22,d30
+	vshr.u64	q12,q4,#19
+	vshr.u64	q13,q4,#61
+	 vadd.i64	d22,d30			@ h+=Maj from the past
+	vshr.u64	q15,q4,#6
+	vsli.64		q12,q4,#45
+	vext.8		q14,q5,q6,#8	@ X[i+1]
+	vsli.64		q13,q4,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q5,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q1,q2,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d18,#14		@ from NEON_00_15
+	vadd.i64	q5,q14
+	vshr.u64	d25,d18,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d18,#41		@ from NEON_00_15
+	vadd.i64	q5,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d18,#50
+	vsli.64		d25,d18,#46
+	vmov		d29,d18
+	vsli.64		d26,d18,#23
+#if 26<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d19,d20		@ Ch(e,f,g)
+	vshr.u64	d24,d22,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d21
+	vshr.u64	d25,d22,#34
+	vsli.64		d24,d22,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d22,#39
+	vadd.i64	d28,d10
+	vsli.64		d25,d22,#30
+	veor		d30,d22,d23
+	vsli.64		d26,d22,#25
+	veor		d21,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d16,d23		@ Maj(a,b,c)
+	veor		d21,d26			@ Sigma0(a)
+	vadd.i64	d17,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d21,d30
+	vshr.u64	d24,d17,#14	@ 27
+#if 27<16
+	vld1.64		{d11},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d17,#18
+#if 27>0
+	 vadd.i64	d21,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d17,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d17,#50
+	vsli.64		d25,d17,#46
+	vmov		d29,d17
+	vsli.64		d26,d17,#23
+#if 27<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d18,d19		@ Ch(e,f,g)
+	vshr.u64	d24,d21,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d20
+	vshr.u64	d25,d21,#34
+	vsli.64		d24,d21,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d21,#39
+	vadd.i64	d28,d11
+	vsli.64		d25,d21,#30
+	veor		d30,d21,d22
+	vsli.64		d26,d21,#25
+	veor		d20,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d23,d22		@ Maj(a,b,c)
+	veor		d20,d26			@ Sigma0(a)
+	vadd.i64	d16,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d20,d30
+	vshr.u64	q12,q5,#19
+	vshr.u64	q13,q5,#61
+	 vadd.i64	d20,d30			@ h+=Maj from the past
+	vshr.u64	q15,q5,#6
+	vsli.64		q12,q5,#45
+	vext.8		q14,q6,q7,#8	@ X[i+1]
+	vsli.64		q13,q5,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q6,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q2,q3,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d16,#14		@ from NEON_00_15
+	vadd.i64	q6,q14
+	vshr.u64	d25,d16,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d16,#41		@ from NEON_00_15
+	vadd.i64	q6,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d16,#50
+	vsli.64		d25,d16,#46
+	vmov		d29,d16
+	vsli.64		d26,d16,#23
+#if 28<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d17,d18		@ Ch(e,f,g)
+	vshr.u64	d24,d20,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d19
+	vshr.u64	d25,d20,#34
+	vsli.64		d24,d20,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d20,#39
+	vadd.i64	d28,d12
+	vsli.64		d25,d20,#30
+	veor		d30,d20,d21
+	vsli.64		d26,d20,#25
+	veor		d19,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d22,d21		@ Maj(a,b,c)
+	veor		d19,d26			@ Sigma0(a)
+	vadd.i64	d23,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d19,d30
+	vshr.u64	d24,d23,#14	@ 29
+#if 29<16
+	vld1.64		{d13},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d23,#18
+#if 29>0
+	 vadd.i64	d19,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d23,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d23,#50
+	vsli.64		d25,d23,#46
+	vmov		d29,d23
+	vsli.64		d26,d23,#23
+#if 29<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d16,d17		@ Ch(e,f,g)
+	vshr.u64	d24,d19,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d18
+	vshr.u64	d25,d19,#34
+	vsli.64		d24,d19,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d19,#39
+	vadd.i64	d28,d13
+	vsli.64		d25,d19,#30
+	veor		d30,d19,d20
+	vsli.64		d26,d19,#25
+	veor		d18,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d21,d20		@ Maj(a,b,c)
+	veor		d18,d26			@ Sigma0(a)
+	vadd.i64	d22,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d18,d30
+	vshr.u64	q12,q6,#19
+	vshr.u64	q13,q6,#61
+	 vadd.i64	d18,d30			@ h+=Maj from the past
+	vshr.u64	q15,q6,#6
+	vsli.64		q12,q6,#45
+	vext.8		q14,q7,q0,#8	@ X[i+1]
+	vsli.64		q13,q6,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q7,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q3,q4,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d22,#14		@ from NEON_00_15
+	vadd.i64	q7,q14
+	vshr.u64	d25,d22,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d22,#41		@ from NEON_00_15
+	vadd.i64	q7,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d22,#50
+	vsli.64		d25,d22,#46
+	vmov		d29,d22
+	vsli.64		d26,d22,#23
+#if 30<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d23,d16		@ Ch(e,f,g)
+	vshr.u64	d24,d18,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d17
+	vshr.u64	d25,d18,#34
+	vsli.64		d24,d18,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d18,#39
+	vadd.i64	d28,d14
+	vsli.64		d25,d18,#30
+	veor		d30,d18,d19
+	vsli.64		d26,d18,#25
+	veor		d17,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d20,d19		@ Maj(a,b,c)
+	veor		d17,d26			@ Sigma0(a)
+	vadd.i64	d21,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d17,d30
+	vshr.u64	d24,d21,#14	@ 31
+#if 31<16
+	vld1.64		{d15},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d21,#18
+#if 31>0
+	 vadd.i64	d17,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d21,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d21,#50
+	vsli.64		d25,d21,#46
+	vmov		d29,d21
+	vsli.64		d26,d21,#23
+#if 31<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d22,d23		@ Ch(e,f,g)
+	vshr.u64	d24,d17,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d16
+	vshr.u64	d25,d17,#34
+	vsli.64		d24,d17,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d17,#39
+	vadd.i64	d28,d15
+	vsli.64		d25,d17,#30
+	veor		d30,d17,d18
+	vsli.64		d26,d17,#25
+	veor		d16,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d19,d18		@ Maj(a,b,c)
+	veor		d16,d26			@ Sigma0(a)
+	vadd.i64	d20,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d16,d30
+	bne		.L16_79_neon
+
+	 vadd.i64	d16,d30		@ h+=Maj from the past
+	vldmia		r0,{d24-d31}	@ load context to temp
+	vadd.i64	q8,q12		@ vectorized accumulate
+	vadd.i64	q9,q13
+	vadd.i64	q10,q14
+	vadd.i64	q11,q15
+	vstmia		r0,{d16-d23}	@ save context
+	teq		r1,r2
+	sub		r3,#640	@ rewind K512
+	bne		.Loop_neon
+
+	VFP_ABI_POP
+	bx	lr				@ .word	0xe12fff1e
+.size	sha512_block_data_order_neon,.-sha512_block_data_order_neon
+#endif
+.asciz	"SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
+.align	2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm	OPENSSL_armcap_P,4,4
+#endif

+ 121 - 0
arch/arm/crypto/sha512-glue.c

@@ -0,0 +1,121 @@
+/*
+ * sha512-glue.c - accelerated SHA-384/512 for ARM
+ *
+ * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha512_base.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+
+#include "sha512.h"
+
+MODULE_DESCRIPTION("Accelerated SHA-384/SHA-512 secure hash for ARM");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+MODULE_ALIAS_CRYPTO("sha384");
+MODULE_ALIAS_CRYPTO("sha512");
+MODULE_ALIAS_CRYPTO("sha384-arm");
+MODULE_ALIAS_CRYPTO("sha512-arm");
+
+asmlinkage void sha512_block_data_order(u64 *state, u8 const *src, int blocks);
+
+int sha512_arm_update(struct shash_desc *desc, const u8 *data,
+		      unsigned int len)
+{
+	return sha512_base_do_update(desc, data, len,
+		(sha512_block_fn *)sha512_block_data_order);
+}
+
+int sha512_arm_final(struct shash_desc *desc, u8 *out)
+{
+	sha512_base_do_finalize(desc,
+		(sha512_block_fn *)sha512_block_data_order);
+	return sha512_base_finish(desc, out);
+}
+
+int sha512_arm_finup(struct shash_desc *desc, const u8 *data,
+		     unsigned int len, u8 *out)
+{
+	sha512_base_do_update(desc, data, len,
+		(sha512_block_fn *)sha512_block_data_order);
+	return sha512_arm_final(desc, out);
+}
+
+static struct shash_alg sha512_arm_algs[] = { {
+	.init			= sha384_base_init,
+	.update			= sha512_arm_update,
+	.final			= sha512_arm_final,
+	.finup			= sha512_arm_finup,
+	.descsize		= sizeof(struct sha512_state),
+	.digestsize		= SHA384_DIGEST_SIZE,
+	.base			= {
+		.cra_name		= "sha384",
+		.cra_driver_name	= "sha384-arm",
+		.cra_priority		= 250,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA512_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+},  {
+	.init			= sha512_base_init,
+	.update			= sha512_arm_update,
+	.final			= sha512_arm_final,
+	.finup			= sha512_arm_finup,
+	.descsize		= sizeof(struct sha512_state),
+	.digestsize		= SHA512_DIGEST_SIZE,
+	.base			= {
+		.cra_name		= "sha512",
+		.cra_driver_name	= "sha512-arm",
+		.cra_priority		= 250,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA512_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+} };
+
+static int __init sha512_arm_mod_init(void)
+{
+	int err;
+
+	err = crypto_register_shashes(sha512_arm_algs,
+				      ARRAY_SIZE(sha512_arm_algs));
+	if (err)
+		return err;
+
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
+		err = crypto_register_shashes(sha512_neon_algs,
+					      ARRAY_SIZE(sha512_neon_algs));
+		if (err)
+			goto err_unregister;
+	}
+	return 0;
+
+err_unregister:
+	crypto_unregister_shashes(sha512_arm_algs,
+				  ARRAY_SIZE(sha512_arm_algs));
+
+	return err;
+}
+
+static void __exit sha512_arm_mod_fini(void)
+{
+	crypto_unregister_shashes(sha512_arm_algs,
+				  ARRAY_SIZE(sha512_arm_algs));
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
+		crypto_unregister_shashes(sha512_neon_algs,
+					  ARRAY_SIZE(sha512_neon_algs));
+}
+
+module_init(sha512_arm_mod_init);
+module_exit(sha512_arm_mod_fini);

+ 98 - 0
arch/arm/crypto/sha512-neon-glue.c

@@ -0,0 +1,98 @@
+/*
+ * sha512-neon-glue.c - accelerated SHA-384/512 for ARM NEON
+ *
+ * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha512_base.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+#include <asm/simd.h>
+#include <asm/neon.h>
+
+#include "sha512.h"
+
+MODULE_ALIAS_CRYPTO("sha384-neon");
+MODULE_ALIAS_CRYPTO("sha512-neon");
+
+asmlinkage void sha512_block_data_order_neon(u64 *state, u8 const *src,
+					     int blocks);
+
+static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
+			      unsigned int len)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+
+	if (!may_use_simd() ||
+	    (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
+		return sha512_arm_update(desc, data, len);
+
+	kernel_neon_begin();
+	sha512_base_do_update(desc, data, len,
+		(sha512_block_fn *)sha512_block_data_order_neon);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int sha512_neon_finup(struct shash_desc *desc, const u8 *data,
+			     unsigned int len, u8 *out)
+{
+	if (!may_use_simd())
+		return sha512_arm_finup(desc, data, len, out);
+
+	kernel_neon_begin();
+	if (len)
+		sha512_base_do_update(desc, data, len,
+			(sha512_block_fn *)sha512_block_data_order_neon);
+	sha512_base_do_finalize(desc,
+		(sha512_block_fn *)sha512_block_data_order_neon);
+	kernel_neon_end();
+
+	return sha512_base_finish(desc, out);
+}
+
+static int sha512_neon_final(struct shash_desc *desc, u8 *out)
+{
+	return sha512_neon_finup(desc, NULL, 0, out);
+}
+
+struct shash_alg sha512_neon_algs[] = { {
+	.init			= sha384_base_init,
+	.update			= sha512_neon_update,
+	.final			= sha512_neon_final,
+	.finup			= sha512_neon_finup,
+	.descsize		= sizeof(struct sha512_state),
+	.digestsize		= SHA384_DIGEST_SIZE,
+	.base			= {
+		.cra_name		= "sha384",
+		.cra_driver_name	= "sha384-neon",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA384_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+
+	}
+},  {
+	.init			= sha512_base_init,
+	.update			= sha512_neon_update,
+	.final			= sha512_neon_final,
+	.finup			= sha512_neon_finup,
+	.descsize		= sizeof(struct sha512_state),
+	.digestsize		= SHA512_DIGEST_SIZE,
+	.base			= {
+		.cra_name		= "sha512",
+		.cra_driver_name	= "sha512-neon",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA512_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+} };

+ 8 - 0
arch/arm/crypto/sha512.h

@@ -0,0 +1,8 @@
+
+int sha512_arm_update(struct shash_desc *desc, const u8 *data,
+		      unsigned int len);
+
+int sha512_arm_finup(struct shash_desc *desc, const u8 *data,
+		     unsigned int len, u8 *out);
+
+extern struct shash_alg sha512_neon_algs[2];

+ 0 - 305
arch/arm/crypto/sha512_neon_glue.c

@@ -1,305 +0,0 @@
-/*
- * Glue code for the SHA512 Secure Hash Algorithm assembly implementation
- * using NEON instructions.
- *
- * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This file is based on sha512_ssse3_glue.c:
- *   Copyright (C) 2013 Intel Corporation
- *   Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <crypto/sha.h>
-#include <asm/byteorder.h>
-#include <asm/simd.h>
-#include <asm/neon.h>
-
-
-static const u64 sha512_k[] = {
-	0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
-	0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
-	0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
-	0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
-	0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
-	0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
-	0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
-	0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
-	0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
-	0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
-	0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
-	0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
-	0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
-	0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
-	0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
-	0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
-	0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
-	0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
-	0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
-	0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
-	0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
-	0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
-	0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
-	0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
-	0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
-	0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
-	0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
-	0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
-	0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
-	0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
-	0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
-	0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
-	0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
-	0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
-	0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
-	0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
-	0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
-	0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
-	0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
-	0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
-};
-
-
-asmlinkage void sha512_transform_neon(u64 *digest, const void *data,
-				      const u64 k[], unsigned int num_blks);
-
-
-static int sha512_neon_init(struct shash_desc *desc)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-
-	sctx->state[0] = SHA512_H0;
-	sctx->state[1] = SHA512_H1;
-	sctx->state[2] = SHA512_H2;
-	sctx->state[3] = SHA512_H3;
-	sctx->state[4] = SHA512_H4;
-	sctx->state[5] = SHA512_H5;
-	sctx->state[6] = SHA512_H6;
-	sctx->state[7] = SHA512_H7;
-	sctx->count[0] = sctx->count[1] = 0;
-
-	return 0;
-}
-
-static int __sha512_neon_update(struct shash_desc *desc, const u8 *data,
-				unsigned int len, unsigned int partial)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-	unsigned int done = 0;
-
-	sctx->count[0] += len;
-	if (sctx->count[0] < len)
-		sctx->count[1]++;
-
-	if (partial) {
-		done = SHA512_BLOCK_SIZE - partial;
-		memcpy(sctx->buf + partial, data, done);
-		sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1);
-	}
-
-	if (len - done >= SHA512_BLOCK_SIZE) {
-		const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
-
-		sha512_transform_neon(sctx->state, data + done, sha512_k,
-				      rounds);
-
-		done += rounds * SHA512_BLOCK_SIZE;
-	}
-
-	memcpy(sctx->buf, data + done, len - done);
-
-	return 0;
-}
-
-static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
-			     unsigned int len)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-	unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
-	int res;
-
-	/* Handle the fast case right here */
-	if (partial + len < SHA512_BLOCK_SIZE) {
-		sctx->count[0] += len;
-		if (sctx->count[0] < len)
-			sctx->count[1]++;
-		memcpy(sctx->buf + partial, data, len);
-
-		return 0;
-	}
-
-	if (!may_use_simd()) {
-		res = crypto_sha512_update(desc, data, len);
-	} else {
-		kernel_neon_begin();
-		res = __sha512_neon_update(desc, data, len, partial);
-		kernel_neon_end();
-	}
-
-	return res;
-}
-
-
-/* Add padding and return the message digest. */
-static int sha512_neon_final(struct shash_desc *desc, u8 *out)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-	unsigned int i, index, padlen;
-	__be64 *dst = (__be64 *)out;
-	__be64 bits[2];
-	static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
-
-	/* save number of bits */
-	bits[1] = cpu_to_be64(sctx->count[0] << 3);
-	bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
-
-	/* Pad out to 112 mod 128 and append length */
-	index = sctx->count[0] & 0x7f;
-	padlen = (index < 112) ? (112 - index) : ((128+112) - index);
-
-	if (!may_use_simd()) {
-		crypto_sha512_update(desc, padding, padlen);
-		crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
-	} else {
-		kernel_neon_begin();
-		/* We need to fill a whole block for __sha512_neon_update() */
-		if (padlen <= 112) {
-			sctx->count[0] += padlen;
-			if (sctx->count[0] < padlen)
-				sctx->count[1]++;
-			memcpy(sctx->buf + index, padding, padlen);
-		} else {
-			__sha512_neon_update(desc, padding, padlen, index);
-		}
-		__sha512_neon_update(desc, (const u8 *)&bits,
-					sizeof(bits), 112);
-		kernel_neon_end();
-	}
-
-	/* Store state in digest */
-	for (i = 0; i < 8; i++)
-		dst[i] = cpu_to_be64(sctx->state[i]);
-
-	/* Wipe context */
-	memset(sctx, 0, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha512_neon_export(struct shash_desc *desc, void *out)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-
-	memcpy(out, sctx, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha512_neon_import(struct shash_desc *desc, const void *in)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-
-	memcpy(sctx, in, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha384_neon_init(struct shash_desc *desc)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-
-	sctx->state[0] = SHA384_H0;
-	sctx->state[1] = SHA384_H1;
-	sctx->state[2] = SHA384_H2;
-	sctx->state[3] = SHA384_H3;
-	sctx->state[4] = SHA384_H4;
-	sctx->state[5] = SHA384_H5;
-	sctx->state[6] = SHA384_H6;
-	sctx->state[7] = SHA384_H7;
-
-	sctx->count[0] = sctx->count[1] = 0;
-
-	return 0;
-}
-
-static int sha384_neon_final(struct shash_desc *desc, u8 *hash)
-{
-	u8 D[SHA512_DIGEST_SIZE];
-
-	sha512_neon_final(desc, D);
-
-	memcpy(hash, D, SHA384_DIGEST_SIZE);
-	memzero_explicit(D, SHA512_DIGEST_SIZE);
-
-	return 0;
-}
-
-static struct shash_alg algs[] = { {
-	.digestsize	=	SHA512_DIGEST_SIZE,
-	.init		=	sha512_neon_init,
-	.update		=	sha512_neon_update,
-	.final		=	sha512_neon_final,
-	.export		=	sha512_neon_export,
-	.import		=	sha512_neon_import,
-	.descsize	=	sizeof(struct sha512_state),
-	.statesize	=	sizeof(struct sha512_state),
-	.base		=	{
-		.cra_name	=	"sha512",
-		.cra_driver_name =	"sha512-neon",
-		.cra_priority	=	250,
-		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize	=	SHA512_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-},  {
-	.digestsize	=	SHA384_DIGEST_SIZE,
-	.init		=	sha384_neon_init,
-	.update		=	sha512_neon_update,
-	.final		=	sha384_neon_final,
-	.export		=	sha512_neon_export,
-	.import		=	sha512_neon_import,
-	.descsize	=	sizeof(struct sha512_state),
-	.statesize	=	sizeof(struct sha512_state),
-	.base		=	{
-		.cra_name	=	"sha384",
-		.cra_driver_name =	"sha384-neon",
-		.cra_priority	=	250,
-		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize	=	SHA384_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-} };
-
-static int __init sha512_neon_mod_init(void)
-{
-	if (!cpu_has_neon())
-		return -ENODEV;
-
-	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit sha512_neon_mod_fini(void)
-{
-	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
-}
-
-module_init(sha512_neon_mod_init);
-module_exit(sha512_neon_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated");
-
-MODULE_ALIAS_CRYPTO("sha512");
-MODULE_ALIAS_CRYPTO("sha384");

+ 1 - 1
arch/arm64/crypto/aes-ce-ccm-glue.c

@@ -13,7 +13,7 @@
 #include <crypto/aes.h>
 #include <crypto/algapi.h>
 #include <crypto/scatterwalk.h>
-#include <linux/crypto.h>
+#include <crypto/internal/aead.h>
 #include <linux/module.h>
 
 #include "aes-ce-setkey.h"

+ 4 - 4
arch/mips/cavium-octeon/crypto/octeon-md5.c

@@ -69,10 +69,10 @@ static int octeon_md5_init(struct shash_desc *desc)
 {
 	struct md5_state *mctx = shash_desc_ctx(desc);
 
-	mctx->hash[0] = cpu_to_le32(0x67452301);
-	mctx->hash[1] = cpu_to_le32(0xefcdab89);
-	mctx->hash[2] = cpu_to_le32(0x98badcfe);
-	mctx->hash[3] = cpu_to_le32(0x10325476);
+	mctx->hash[0] = cpu_to_le32(MD5_H0);
+	mctx->hash[1] = cpu_to_le32(MD5_H1);
+	mctx->hash[2] = cpu_to_le32(MD5_H2);
+	mctx->hash[3] = cpu_to_le32(MD5_H3);
 	mctx->byte_count = 0;
 
 	return 0;

+ 2 - 0
arch/nios2/kernel/time.c

@@ -8,6 +8,7 @@
  * for more details.
  */
 
+#include <linux/export.h>
 #include <linux/interrupt.h>
 #include <linux/clockchips.h>
 #include <linux/clocksource.h>
@@ -106,6 +107,7 @@ cycles_t get_cycles(void)
 {
 	return nios2_timer_read(&nios2_cs.cs);
 }
+EXPORT_SYMBOL(get_cycles);
 
 static void nios2_timer_start(struct nios2_timer *timer)
 {

+ 4 - 4
arch/powerpc/crypto/md5-glue.c

@@ -37,10 +37,10 @@ static int ppc_md5_init(struct shash_desc *desc)
 {
 	struct md5_state *sctx = shash_desc_ctx(desc);
 
-	sctx->hash[0] = 0x67452301;
-	sctx->hash[1] = 0xefcdab89;
-	sctx->hash[2] = 0x98badcfe;
-	sctx->hash[3] =	0x10325476;
+	sctx->hash[0] = MD5_H0;
+	sctx->hash[1] = MD5_H1;
+	sctx->hash[2] = MD5_H2;
+	sctx->hash[3] =	MD5_H3;
 	sctx->byte_count = 0;
 
 	return 0;

+ 184 - 0
arch/powerpc/include/asm/icswx.h

@@ -0,0 +1,184 @@
+/*
+ * ICSWX api
+ *
+ * Copyright (C) 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This provides the Initiate Coprocessor Store Word Indexed (ICSWX)
+ * instruction.  This instruction is used to communicate with PowerPC
+ * coprocessors.  This also provides definitions of the structures used
+ * to communicate with the coprocessor.
+ *
+ * The RFC02130: Coprocessor Architecture document is the reference for
+ * everything in this file unless otherwise noted.
+ */
+#ifndef _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_
+#define _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_
+
+#include <asm/ppc-opcode.h> /* for PPC_ICSWX */
+
+/* Chapter 6.5.8 Coprocessor-Completion Block (CCB) */
+
+#define CCB_VALUE		(0x3fffffffffffffff)
+#define CCB_ADDRESS		(0xfffffffffffffff8)
+#define CCB_CM			(0x0000000000000007)
+#define CCB_CM0			(0x0000000000000004)
+#define CCB_CM12		(0x0000000000000003)
+
+#define CCB_CM0_ALL_COMPLETIONS	(0x0)
+#define CCB_CM0_LAST_IN_CHAIN	(0x4)
+#define CCB_CM12_STORE		(0x0)
+#define CCB_CM12_INTERRUPT	(0x1)
+
+#define CCB_SIZE		(0x10)
+#define CCB_ALIGN		CCB_SIZE
+
+struct coprocessor_completion_block {
+	__be64 value;
+	__be64 address;
+} __packed __aligned(CCB_ALIGN);
+
+
+/* Chapter 6.5.7 Coprocessor-Status Block (CSB) */
+
+#define CSB_V			(0x80)
+#define CSB_F			(0x04)
+#define CSB_CH			(0x03)
+#define CSB_CE_INCOMPLETE	(0x80)
+#define CSB_CE_TERMINATION	(0x40)
+#define CSB_CE_TPBC		(0x20)
+
+#define CSB_CC_SUCCESS		(0)
+#define CSB_CC_INVALID_ALIGN	(1)
+#define CSB_CC_OPERAND_OVERLAP	(2)
+#define CSB_CC_DATA_LENGTH	(3)
+#define CSB_CC_TRANSLATION	(5)
+#define CSB_CC_PROTECTION	(6)
+#define CSB_CC_RD_EXTERNAL	(7)
+#define CSB_CC_INVALID_OPERAND	(8)
+#define CSB_CC_PRIVILEGE	(9)
+#define CSB_CC_INTERNAL		(10)
+#define CSB_CC_WR_EXTERNAL	(12)
+#define CSB_CC_NOSPC		(13)
+#define CSB_CC_EXCESSIVE_DDE	(14)
+#define CSB_CC_WR_TRANSLATION	(15)
+#define CSB_CC_WR_PROTECTION	(16)
+#define CSB_CC_UNKNOWN_CODE	(17)
+#define CSB_CC_ABORT		(18)
+#define CSB_CC_TRANSPORT	(20)
+#define CSB_CC_SEGMENTED_DDL	(31)
+#define CSB_CC_PROGRESS_POINT	(32)
+#define CSB_CC_DDE_OVERFLOW	(33)
+#define CSB_CC_SESSION		(34)
+#define CSB_CC_PROVISION	(36)
+#define CSB_CC_CHAIN		(37)
+#define CSB_CC_SEQUENCE		(38)
+#define CSB_CC_HW		(39)
+
+#define CSB_SIZE		(0x10)
+#define CSB_ALIGN		CSB_SIZE
+
+struct coprocessor_status_block {
+	u8 flags;
+	u8 cs;
+	u8 cc;
+	u8 ce;
+	__be32 count;
+	__be64 address;
+} __packed __aligned(CSB_ALIGN);
+
+
+/* Chapter 6.5.10 Data-Descriptor List (DDL)
+ * each list contains one or more Data-Descriptor Entries (DDE)
+ */
+
+#define DDE_P			(0x8000)
+
+#define DDE_SIZE		(0x10)
+#define DDE_ALIGN		DDE_SIZE
+
+struct data_descriptor_entry {
+	__be16 flags;
+	u8 count;
+	u8 index;
+	__be32 length;
+	__be64 address;
+} __packed __aligned(DDE_ALIGN);
+
+
+/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */
+
+#define CRB_SIZE		(0x80)
+#define CRB_ALIGN		(0x100) /* Errata: requires 256 alignment */
+
+/* Coprocessor Status Block field
+ *   ADDRESS	address of CSB
+ *   C		CCB is valid
+ *   AT		0 = addrs are virtual, 1 = addrs are phys
+ *   M		enable perf monitor
+ */
+#define CRB_CSB_ADDRESS		(0xfffffffffffffff0)
+#define CRB_CSB_C		(0x0000000000000008)
+#define CRB_CSB_AT		(0x0000000000000002)
+#define CRB_CSB_M		(0x0000000000000001)
+
+struct coprocessor_request_block {
+	__be32 ccw;
+	__be32 flags;
+	__be64 csb_addr;
+
+	struct data_descriptor_entry source;
+	struct data_descriptor_entry target;
+
+	struct coprocessor_completion_block ccb;
+
+	u8 reserved[48];
+
+	struct coprocessor_status_block csb;
+} __packed __aligned(CRB_ALIGN);
+
+
+/* RFC02167 Initiate Coprocessor Instructions document
+ * Chapter 8.2.1.1.1 RS
+ * Chapter 8.2.3 Coprocessor Directive
+ * Chapter 8.2.4 Execution
+ *
+ * The CCW must be converted to BE before passing to icswx()
+ */
+
+#define CCW_PS			(0xff000000)
+#define CCW_CT			(0x00ff0000)
+#define CCW_CD			(0x0000ffff)
+#define CCW_CL			(0x0000c000)
+
+
+/* RFC02167 Initiate Coprocessor Instructions document
+ * Chapter 8.2.1 Initiate Coprocessor Store Word Indexed (ICSWX)
+ * Chapter 8.2.4.1 Condition Register 0
+ */
+
+#define ICSWX_INITIATED		(0x8)
+#define ICSWX_BUSY		(0x4)
+#define ICSWX_REJECTED		(0x2)
+
+static inline int icswx(__be32 ccw, struct coprocessor_request_block *crb)
+{
+	__be64 ccw_reg = ccw;
+	u32 cr;
+
+	__asm__ __volatile__(
+	PPC_ICSWX(%1,0,%2) "\n"
+	"mfcr %0\n"
+	: "=r" (cr)
+	: "r" (ccw_reg), "r" (crb)
+	: "cr0", "memory");
+
+	return (int)((cr >> 28) & 0xf);
+}
+
+
+#endif /* _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_ */

+ 13 - 0
arch/powerpc/include/asm/ppc-opcode.h

@@ -136,6 +136,8 @@
 #define PPC_INST_DCBAL			0x7c2005ec
 #define PPC_INST_DCBZL			0x7c2007ec
 #define PPC_INST_ICBT			0x7c00002c
+#define PPC_INST_ICSWX			0x7c00032d
+#define PPC_INST_ICSWEPX		0x7c00076d
 #define PPC_INST_ISEL			0x7c00001e
 #define PPC_INST_ISEL_MASK		0xfc00003e
 #define PPC_INST_LDARX			0x7c0000a8
@@ -403,4 +405,15 @@
 #define MFTMR(tmr, r)		stringify_in_c(.long PPC_INST_MFTMR | \
 					       TMRN(tmr) | ___PPC_RT(r))
 
+/* Coprocessor instructions */
+#define PPC_ICSWX(s, a, b)	stringify_in_c(.long PPC_INST_ICSWX |	\
+					       ___PPC_RS(s) |		\
+					       ___PPC_RA(a) |		\
+					       ___PPC_RB(b))
+#define PPC_ICSWEPX(s, a, b)	stringify_in_c(.long PPC_INST_ICSWEPX | \
+					       ___PPC_RS(s) |		\
+					       ___PPC_RA(a) |		\
+					       ___PPC_RB(b))
+
+
 #endif /* _ASM_POWERPC_PPC_OPCODE_H */

+ 1 - 0
arch/powerpc/kernel/prom.c

@@ -800,6 +800,7 @@ int of_get_ibm_chip_id(struct device_node *np)
 	}
 	return -1;
 }
+EXPORT_SYMBOL(of_get_ibm_chip_id);
 
 /**
  * cpu_to_chip_id - Return the cpus chip-id

+ 4 - 4
arch/sparc/crypto/md5_glue.c

@@ -33,10 +33,10 @@ static int md5_sparc64_init(struct shash_desc *desc)
 {
 	struct md5_state *mctx = shash_desc_ctx(desc);
 
-	mctx->hash[0] = cpu_to_le32(0x67452301);
-	mctx->hash[1] = cpu_to_le32(0xefcdab89);
-	mctx->hash[2] = cpu_to_le32(0x98badcfe);
-	mctx->hash[3] = cpu_to_le32(0x10325476);
+	mctx->hash[0] = cpu_to_le32(MD5_H0);
+	mctx->hash[1] = cpu_to_le32(MD5_H1);
+	mctx->hash[2] = cpu_to_le32(MD5_H2);
+	mctx->hash[3] = cpu_to_le32(MD5_H3);
 	mctx->byte_count = 0;
 
 	return 0;

+ 167 - 256
arch/x86/crypto/aesni-intel_glue.c

@@ -44,15 +44,19 @@
 #endif
 
 
+#define AESNI_ALIGN	16
+#define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE - 1))
+#define RFC4106_HASH_SUBKEY_SIZE 16
+
 /* This data is stored at the end of the crypto_tfm struct.
  * It's a type of per "session" data storage location.
  * This needs to be 16 byte aligned.
  */
 struct aesni_rfc4106_gcm_ctx {
-	u8 hash_subkey[16];
-	struct crypto_aes_ctx aes_key_expanded;
+	u8 hash_subkey[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+	struct crypto_aes_ctx aes_key_expanded
+		__attribute__ ((__aligned__(AESNI_ALIGN)));
 	u8 nonce[4];
-	struct cryptd_aead *cryptd_tfm;
 };
 
 struct aesni_gcm_set_hash_subkey_result {
@@ -66,10 +70,6 @@ struct aesni_hash_subkey_req_data {
 	struct scatterlist sg;
 };
 
-#define AESNI_ALIGN	(16)
-#define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE-1))
-#define RFC4106_HASH_SUBKEY_SIZE 16
-
 struct aesni_lrw_ctx {
 	struct lrw_table_ctx lrw_table;
 	u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
@@ -283,10 +283,11 @@ static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out,
 static inline struct
 aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
 {
-	return
-		(struct aesni_rfc4106_gcm_ctx *)
-		PTR_ALIGN((u8 *)
-		crypto_tfm_ctx(crypto_aead_tfm(tfm)), AESNI_ALIGN);
+	unsigned long align = AESNI_ALIGN;
+
+	if (align <= crypto_tfm_ctx_alignment())
+		align = 1;
+	return PTR_ALIGN(crypto_aead_ctx(tfm), align);
 }
 #endif
 
@@ -790,36 +791,30 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 #endif
 
 #ifdef CONFIG_X86_64
-static int rfc4106_init(struct crypto_tfm *tfm)
+static int rfc4106_init(struct crypto_aead *aead)
 {
 	struct cryptd_aead *cryptd_tfm;
-	struct aesni_rfc4106_gcm_ctx *ctx = (struct aesni_rfc4106_gcm_ctx *)
-		PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
-	struct crypto_aead *cryptd_child;
-	struct aesni_rfc4106_gcm_ctx *child_ctx;
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
 	cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni",
 				       CRYPTO_ALG_INTERNAL,
 				       CRYPTO_ALG_INTERNAL);
 	if (IS_ERR(cryptd_tfm))
 		return PTR_ERR(cryptd_tfm);
 
-	cryptd_child = cryptd_aead_child(cryptd_tfm);
-	child_ctx = aesni_rfc4106_gcm_ctx_get(cryptd_child);
-	memcpy(child_ctx, ctx, sizeof(*ctx));
-	ctx->cryptd_tfm = cryptd_tfm;
-	tfm->crt_aead.reqsize = sizeof(struct aead_request)
-		+ crypto_aead_reqsize(&cryptd_tfm->base);
+	*ctx = cryptd_tfm;
+	crypto_aead_set_reqsize(
+		aead,
+		sizeof(struct aead_request) +
+		crypto_aead_reqsize(&cryptd_tfm->base));
 	return 0;
 }
 
-static void rfc4106_exit(struct crypto_tfm *tfm)
+static void rfc4106_exit(struct crypto_aead *aead)
 {
-	struct aesni_rfc4106_gcm_ctx *ctx =
-		(struct aesni_rfc4106_gcm_ctx *)
-		PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
-	if (!IS_ERR(ctx->cryptd_tfm))
-		cryptd_free_aead(ctx->cryptd_tfm);
-	return;
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_free_aead(*ctx);
 }
 
 static void
@@ -845,8 +840,6 @@ rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
 	if (IS_ERR(ctr_tfm))
 		return PTR_ERR(ctr_tfm);
 
-	crypto_ablkcipher_clear_flags(ctr_tfm, ~0);
-
 	ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len);
 	if (ret)
 		goto out_free_ablkcipher;
@@ -895,73 +888,29 @@ out_free_ablkcipher:
 static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
 				  unsigned int key_len)
 {
-	int ret = 0;
-	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
 	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead);
-	u8 *new_key_align, *new_key_mem = NULL;
 
 	if (key_len < 4) {
-		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	/*Account for 4 byte nonce at the end.*/
 	key_len -= 4;
-	if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
-	    key_len != AES_KEYSIZE_256) {
-		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
 
 	memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce));
-	/*This must be on a 16 byte boundary!*/
-	if ((unsigned long)(&(ctx->aes_key_expanded.key_enc[0])) % AESNI_ALIGN)
-		return -EINVAL;
-
-	if ((unsigned long)key % AESNI_ALIGN) {
-		/*key is not aligned: use an auxuliar aligned pointer*/
-		new_key_mem = kmalloc(key_len+AESNI_ALIGN, GFP_KERNEL);
-		if (!new_key_mem)
-			return -ENOMEM;
-
-		new_key_align = PTR_ALIGN(new_key_mem, AESNI_ALIGN);
-		memcpy(new_key_align, key, key_len);
-		key = new_key_align;
-	}
 
-	if (!irq_fpu_usable())
-		ret = crypto_aes_expand_key(&(ctx->aes_key_expanded),
-		key, key_len);
-	else {
-		kernel_fpu_begin();
-		ret = aesni_set_key(&(ctx->aes_key_expanded), key, key_len);
-		kernel_fpu_end();
-	}
-	/*This must be on a 16 byte boundary!*/
-	if ((unsigned long)(&(ctx->hash_subkey[0])) % AESNI_ALIGN) {
-		ret = -EINVAL;
-		goto exit;
-	}
-	ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
-exit:
-	kfree(new_key_mem);
-	return ret;
+	return aes_set_key_common(crypto_aead_tfm(aead),
+				  &ctx->aes_key_expanded, key, key_len) ?:
+	       rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
 }
 
 static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
 			   unsigned int key_len)
 {
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
-	struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm);
-	struct aesni_rfc4106_gcm_ctx *c_ctx = aesni_rfc4106_gcm_ctx_get(child);
-	struct cryptd_aead *cryptd_tfm = ctx->cryptd_tfm;
-	int ret;
+	struct cryptd_aead **ctx = crypto_aead_ctx(parent);
+	struct cryptd_aead *cryptd_tfm = *ctx;
 
-	ret = crypto_aead_setkey(child, key, key_len);
-	if (!ret) {
-		memcpy(ctx, c_ctx, sizeof(*ctx));
-		ctx->cryptd_tfm = cryptd_tfm;
-	}
-	return ret;
+	return crypto_aead_setkey(&cryptd_tfm->base, key, key_len);
 }
 
 static int common_rfc4106_set_authsize(struct crypto_aead *aead,
@@ -975,7 +924,7 @@ static int common_rfc4106_set_authsize(struct crypto_aead *aead,
 	default:
 		return -EINVAL;
 	}
-	crypto_aead_crt(aead)->authsize = authsize;
+
 	return 0;
 }
 
@@ -984,30 +933,23 @@ static int common_rfc4106_set_authsize(struct crypto_aead *aead,
 static int rfc4106_set_authsize(struct crypto_aead *parent,
 				unsigned int authsize)
 {
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
-	struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm);
-	int ret;
+	struct cryptd_aead **ctx = crypto_aead_ctx(parent);
+	struct cryptd_aead *cryptd_tfm = *ctx;
 
-	ret = crypto_aead_setauthsize(child, authsize);
-	if (!ret)
-		crypto_aead_crt(parent)->authsize = authsize;
-	return ret;
+	return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
 }
 
-static int __driver_rfc4106_encrypt(struct aead_request *req)
+static int helper_rfc4106_encrypt(struct aead_request *req)
 {
 	u8 one_entry_in_sg = 0;
 	u8 *src, *dst, *assoc;
 	__be32 counter = cpu_to_be32(1);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
-	u32 key_len = ctx->aes_key_expanded.key_length;
 	void *aes_ctx = &(ctx->aes_key_expanded);
 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
-	u8 iv_tab[16+AESNI_ALIGN];
-	u8* iv = (u8 *) PTR_ALIGN((u8 *)iv_tab, AESNI_ALIGN);
+	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
 	struct scatter_walk src_sg_walk;
-	struct scatter_walk assoc_sg_walk;
 	struct scatter_walk dst_sg_walk;
 	unsigned int i;
 
@@ -1016,12 +958,6 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
 	/* to 8 or 12 bytes */
 	if (unlikely(req->assoclen != 8 && req->assoclen != 12))
 		return -EINVAL;
-	if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16))
-	        return -EINVAL;
-	if (unlikely(key_len != AES_KEYSIZE_128 &&
-	             key_len != AES_KEYSIZE_192 &&
-	             key_len != AES_KEYSIZE_256))
-	        return -EINVAL;
 
 	/* IV below built */
 	for (i = 0; i < 4; i++)
@@ -1030,55 +966,57 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
 		*(iv+4+i) = req->iv[i];
 	*((__be32 *)(iv+12)) = counter;
 
-	if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) {
+	if (sg_is_last(req->src) &&
+	    req->src->offset + req->src->length <= PAGE_SIZE &&
+	    sg_is_last(req->dst) &&
+	    req->dst->offset + req->dst->length <= PAGE_SIZE) {
 		one_entry_in_sg = 1;
 		scatterwalk_start(&src_sg_walk, req->src);
-		scatterwalk_start(&assoc_sg_walk, req->assoc);
-		src = scatterwalk_map(&src_sg_walk);
-		assoc = scatterwalk_map(&assoc_sg_walk);
+		assoc = scatterwalk_map(&src_sg_walk);
+		src = assoc + req->assoclen;
 		dst = src;
 		if (unlikely(req->src != req->dst)) {
 			scatterwalk_start(&dst_sg_walk, req->dst);
-			dst = scatterwalk_map(&dst_sg_walk);
+			dst = scatterwalk_map(&dst_sg_walk) + req->assoclen;
 		}
-
 	} else {
 		/* Allocate memory for src, dst, assoc */
-		src = kmalloc(req->cryptlen + auth_tag_len + req->assoclen,
+		assoc = kmalloc(req->cryptlen + auth_tag_len + req->assoclen,
 			GFP_ATOMIC);
-		if (unlikely(!src))
+		if (unlikely(!assoc))
 			return -ENOMEM;
-		assoc = (src + req->cryptlen + auth_tag_len);
-		scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0);
-		scatterwalk_map_and_copy(assoc, req->assoc, 0,
-					req->assoclen, 0);
+		scatterwalk_map_and_copy(assoc, req->src, 0,
+					 req->assoclen + req->cryptlen, 0);
+		src = assoc + req->assoclen;
 		dst = src;
 	}
 
+	kernel_fpu_begin();
 	aesni_gcm_enc_tfm(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv,
 		ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst
 		+ ((unsigned long)req->cryptlen), auth_tag_len);
+	kernel_fpu_end();
 
 	/* The authTag (aka the Integrity Check Value) needs to be written
 	 * back to the packet. */
 	if (one_entry_in_sg) {
 		if (unlikely(req->src != req->dst)) {
-			scatterwalk_unmap(dst);
-			scatterwalk_done(&dst_sg_walk, 0, 0);
+			scatterwalk_unmap(dst - req->assoclen);
+			scatterwalk_advance(&dst_sg_walk, req->dst->length);
+			scatterwalk_done(&dst_sg_walk, 1, 0);
 		}
-		scatterwalk_unmap(src);
 		scatterwalk_unmap(assoc);
-		scatterwalk_done(&src_sg_walk, 0, 0);
-		scatterwalk_done(&assoc_sg_walk, 0, 0);
+		scatterwalk_advance(&src_sg_walk, req->src->length);
+		scatterwalk_done(&src_sg_walk, req->src == req->dst, 0);
 	} else {
-		scatterwalk_map_and_copy(dst, req->dst, 0,
-			req->cryptlen + auth_tag_len, 1);
-		kfree(src);
+		scatterwalk_map_and_copy(dst, req->dst, req->assoclen,
+					 req->cryptlen + auth_tag_len, 1);
+		kfree(assoc);
 	}
 	return 0;
 }
 
-static int __driver_rfc4106_decrypt(struct aead_request *req)
+static int helper_rfc4106_decrypt(struct aead_request *req)
 {
 	u8 one_entry_in_sg = 0;
 	u8 *src, *dst, *assoc;
@@ -1087,26 +1025,16 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
 	int retval = 0;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
-	u32 key_len = ctx->aes_key_expanded.key_length;
 	void *aes_ctx = &(ctx->aes_key_expanded);
 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
-	u8 iv_and_authTag[32+AESNI_ALIGN];
-	u8 *iv = (u8 *) PTR_ALIGN((u8 *)iv_and_authTag, AESNI_ALIGN);
-	u8 *authTag = iv + 16;
+	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+	u8 authTag[16];
 	struct scatter_walk src_sg_walk;
-	struct scatter_walk assoc_sg_walk;
 	struct scatter_walk dst_sg_walk;
 	unsigned int i;
 
-	if (unlikely((req->cryptlen < auth_tag_len) ||
-		(req->assoclen != 8 && req->assoclen != 12)))
+	if (unlikely(req->assoclen != 8 && req->assoclen != 12))
 		return -EINVAL;
-	if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16))
-	        return -EINVAL;
-	if (unlikely(key_len != AES_KEYSIZE_128 &&
-	             key_len != AES_KEYSIZE_192 &&
-	             key_len != AES_KEYSIZE_256))
-	        return -EINVAL;
 
 	/* Assuming we are supporting rfc4106 64-bit extended */
 	/* sequence numbers We need to have the AAD length */
@@ -1120,33 +1048,36 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
 		*(iv+4+i) = req->iv[i];
 	*((__be32 *)(iv+12)) = counter;
 
-	if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) {
+	if (sg_is_last(req->src) &&
+	    req->src->offset + req->src->length <= PAGE_SIZE &&
+	    sg_is_last(req->dst) &&
+	    req->dst->offset + req->dst->length <= PAGE_SIZE) {
 		one_entry_in_sg = 1;
 		scatterwalk_start(&src_sg_walk, req->src);
-		scatterwalk_start(&assoc_sg_walk, req->assoc);
-		src = scatterwalk_map(&src_sg_walk);
-		assoc = scatterwalk_map(&assoc_sg_walk);
+		assoc = scatterwalk_map(&src_sg_walk);
+		src = assoc + req->assoclen;
 		dst = src;
 		if (unlikely(req->src != req->dst)) {
 			scatterwalk_start(&dst_sg_walk, req->dst);
-			dst = scatterwalk_map(&dst_sg_walk);
+			dst = scatterwalk_map(&dst_sg_walk) + req->assoclen;
 		}
 
 	} else {
 		/* Allocate memory for src, dst, assoc */
-		src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC);
-		if (!src)
+		assoc = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC);
+		if (!assoc)
 			return -ENOMEM;
-		assoc = (src + req->cryptlen);
-		scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0);
-		scatterwalk_map_and_copy(assoc, req->assoc, 0,
-			req->assoclen, 0);
+		scatterwalk_map_and_copy(assoc, req->src, 0,
+					 req->assoclen + req->cryptlen, 0);
+		src = assoc + req->assoclen;
 		dst = src;
 	}
 
+	kernel_fpu_begin();
 	aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
 		ctx->hash_subkey, assoc, (unsigned long)req->assoclen,
 		authTag, auth_tag_len);
+	kernel_fpu_end();
 
 	/* Compare generated tag with passed in tag. */
 	retval = crypto_memneq(src + tempCipherLen, authTag, auth_tag_len) ?
@@ -1154,90 +1085,59 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
 
 	if (one_entry_in_sg) {
 		if (unlikely(req->src != req->dst)) {
-			scatterwalk_unmap(dst);
-			scatterwalk_done(&dst_sg_walk, 0, 0);
+			scatterwalk_unmap(dst - req->assoclen);
+			scatterwalk_advance(&dst_sg_walk, req->dst->length);
+			scatterwalk_done(&dst_sg_walk, 1, 0);
 		}
-		scatterwalk_unmap(src);
 		scatterwalk_unmap(assoc);
-		scatterwalk_done(&src_sg_walk, 0, 0);
-		scatterwalk_done(&assoc_sg_walk, 0, 0);
+		scatterwalk_advance(&src_sg_walk, req->src->length);
+		scatterwalk_done(&src_sg_walk, req->src == req->dst, 0);
 	} else {
-		scatterwalk_map_and_copy(dst, req->dst, 0, tempCipherLen, 1);
-		kfree(src);
+		scatterwalk_map_and_copy(dst, req->dst, req->assoclen,
+					 tempCipherLen, 1);
+		kfree(assoc);
 	}
 	return retval;
 }
 
 static int rfc4106_encrypt(struct aead_request *req)
 {
-	int ret;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+	struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+	struct aead_request *subreq = aead_request_ctx(req);
 
-	if (!irq_fpu_usable()) {
-		struct aead_request *cryptd_req =
-			(struct aead_request *) aead_request_ctx(req);
+	aead_request_set_tfm(subreq, irq_fpu_usable() ?
+				     cryptd_aead_child(cryptd_tfm) :
+				     &cryptd_tfm->base);
 
-		memcpy(cryptd_req, req, sizeof(*req));
-		aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-		ret = crypto_aead_encrypt(cryptd_req);
-	} else {
-		kernel_fpu_begin();
-		ret = __driver_rfc4106_encrypt(req);
-		kernel_fpu_end();
-	}
-	return ret;
+	aead_request_set_callback(subreq, req->base.flags,
+				  req->base.complete, req->base.data);
+	aead_request_set_crypt(subreq, req->src, req->dst,
+			       req->cryptlen, req->iv);
+	aead_request_set_ad(subreq, req->assoclen);
+
+	return crypto_aead_encrypt(subreq);
 }
 
 static int rfc4106_decrypt(struct aead_request *req)
 {
-	int ret;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+	struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+	struct aead_request *subreq = aead_request_ctx(req);
 
-	if (!irq_fpu_usable()) {
-		struct aead_request *cryptd_req =
-			(struct aead_request *) aead_request_ctx(req);
+	aead_request_set_tfm(subreq, irq_fpu_usable() ?
+				     cryptd_aead_child(cryptd_tfm) :
+				     &cryptd_tfm->base);
 
-		memcpy(cryptd_req, req, sizeof(*req));
-		aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-		ret = crypto_aead_decrypt(cryptd_req);
-	} else {
-		kernel_fpu_begin();
-		ret = __driver_rfc4106_decrypt(req);
-		kernel_fpu_end();
-	}
-	return ret;
-}
-
-static int helper_rfc4106_encrypt(struct aead_request *req)
-{
-	int ret;
-
-	if (unlikely(!irq_fpu_usable())) {
-		WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context");
-		ret = -EINVAL;
-	} else {
-		kernel_fpu_begin();
-		ret = __driver_rfc4106_encrypt(req);
-		kernel_fpu_end();
-	}
-	return ret;
-}
-
-static int helper_rfc4106_decrypt(struct aead_request *req)
-{
-	int ret;
+	aead_request_set_callback(subreq, req->base.flags,
+				  req->base.complete, req->base.data);
+	aead_request_set_crypt(subreq, req->src, req->dst,
+			       req->cryptlen, req->iv);
+	aead_request_set_ad(subreq, req->assoclen);
 
-	if (unlikely(!irq_fpu_usable())) {
-		WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context");
-		ret = -EINVAL;
-	} else {
-		kernel_fpu_begin();
-		ret = __driver_rfc4106_decrypt(req);
-		kernel_fpu_end();
-	}
-	return ret;
+	return crypto_aead_decrypt(subreq);
 }
 #endif
 
@@ -1410,51 +1310,6 @@ static struct crypto_alg aesni_algs[] = { {
 			.geniv		= "chainiv",
 		},
 	},
-}, {
-	.cra_name		= "__gcm-aes-aesni",
-	.cra_driver_name	= "__driver-gcm-aes-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct aesni_rfc4106_gcm_ctx) +
-				  AESNI_ALIGN,
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_aead_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.aead = {
-			.setkey		= common_rfc4106_set_key,
-			.setauthsize	= common_rfc4106_set_authsize,
-			.encrypt	= helper_rfc4106_encrypt,
-			.decrypt	= helper_rfc4106_decrypt,
-			.ivsize		= 8,
-			.maxauthsize	= 16,
-		},
-	},
-}, {
-	.cra_name		= "rfc4106(gcm(aes))",
-	.cra_driver_name	= "rfc4106-gcm-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct aesni_rfc4106_gcm_ctx) +
-				  AESNI_ALIGN,
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_nivaead_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= rfc4106_init,
-	.cra_exit		= rfc4106_exit,
-	.cra_u = {
-		.aead = {
-			.setkey		= rfc4106_set_key,
-			.setauthsize	= rfc4106_set_authsize,
-			.encrypt	= rfc4106_encrypt,
-			.decrypt	= rfc4106_decrypt,
-			.geniv		= "seqiv",
-			.ivsize		= 8,
-			.maxauthsize	= 16,
-		},
-	},
 #endif
 #if IS_ENABLED(CONFIG_CRYPTO_PCBC)
 }, {
@@ -1569,6 +1424,46 @@ static struct crypto_alg aesni_algs[] = { {
 	},
 } };
 
+#ifdef CONFIG_X86_64
+static struct aead_alg aesni_aead_algs[] = { {
+	.setkey			= common_rfc4106_set_key,
+	.setauthsize		= common_rfc4106_set_authsize,
+	.encrypt		= helper_rfc4106_encrypt,
+	.decrypt		= helper_rfc4106_decrypt,
+	.ivsize			= 8,
+	.maxauthsize		= 16,
+	.base = {
+		.cra_name		= "__gcm-aes-aesni",
+		.cra_driver_name	= "__driver-gcm-aes-aesni",
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct aesni_rfc4106_gcm_ctx),
+		.cra_alignmask		= AESNI_ALIGN - 1,
+		.cra_module		= THIS_MODULE,
+	},
+}, {
+	.init			= rfc4106_init,
+	.exit			= rfc4106_exit,
+	.setkey			= rfc4106_set_key,
+	.setauthsize		= rfc4106_set_authsize,
+	.encrypt		= rfc4106_encrypt,
+	.decrypt		= rfc4106_decrypt,
+	.ivsize			= 8,
+	.maxauthsize		= 16,
+	.base = {
+		.cra_name		= "rfc4106(gcm(aes))",
+		.cra_driver_name	= "rfc4106-gcm-aesni",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct cryptd_aead *),
+		.cra_module		= THIS_MODULE,
+	},
+} };
+#else
+static struct aead_alg aesni_aead_algs[0];
+#endif
+
 
 static const struct x86_cpu_id aesni_cpu_id[] = {
 	X86_FEATURE_MATCH(X86_FEATURE_AES),
@@ -1616,11 +1511,27 @@ static int __init aesni_init(void)
 	if (err)
 		return err;
 
-	return crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
+	err = crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
+	if (err)
+		goto fpu_exit;
+
+	err = crypto_register_aeads(aesni_aead_algs,
+				    ARRAY_SIZE(aesni_aead_algs));
+	if (err)
+		goto unregister_algs;
+
+	return err;
+
+unregister_algs:
+	crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
+fpu_exit:
+	crypto_fpu_exit();
+	return err;
 }
 
 static void __exit aesni_exit(void)
 {
+	crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs));
 	crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
 
 	crypto_fpu_exit();

+ 1 - 1
arch/x86/crypto/fpu.c

@@ -156,7 +156,7 @@ int __init crypto_fpu_init(void)
 	return crypto_register_template(&crypto_fpu_tmpl);
 }
 
-void __exit crypto_fpu_exit(void)
+void crypto_fpu_exit(void)
 {
 	crypto_unregister_template(&crypto_fpu_tmpl);
 }

+ 2 - 1
arch/x86/crypto/sha-mb/sha1_mb.c

@@ -882,7 +882,8 @@ static int __init sha1_mb_mod_init(void)
 		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
 		cpu_state->cpu = cpu;
 		cpu_state->alg_state = &sha1_mb_alg_state;
-		cpu_state->mgr = (struct sha1_ctx_mgr *) kzalloc(sizeof(struct sha1_ctx_mgr), GFP_KERNEL);
+		cpu_state->mgr = kzalloc(sizeof(struct sha1_ctx_mgr),
+					GFP_KERNEL);
 		if (!cpu_state->mgr)
 			goto err2;
 		sha1_ctx_mgr_init(cpu_state->mgr);

+ 37 - 137
crypto/842.c

@@ -1,5 +1,5 @@
 /*
- * Cryptographic API for the 842 compression algorithm.
+ * Cryptographic API for the 842 software compression algorithm.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -11,173 +11,73 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ * Copyright (C) IBM Corporation, 2011-2015
  *
- * Copyright (C) IBM Corporation, 2011
+ * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
+ *                   Seth Jennings <sjenning@linux.vnet.ibm.com>
  *
- * Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
- *          Seth Jennings <sjenning@linux.vnet.ibm.com>
+ * Rewrite: Dan Streetman <ddstreet@ieee.org>
+ *
+ * This is the software implementation of compression and decompression using
+ * the 842 format.  This uses the software 842 library at lib/842/ which is
+ * only a reference implementation, and is very, very slow as compared to other
+ * software compressors.  You probably do not want to use this software
+ * compression.  If you have access to the PowerPC 842 compression hardware, you
+ * want to use the 842 hardware compression interface, which is at:
+ * drivers/crypto/nx/nx-842-crypto.c
  */
 
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/crypto.h>
-#include <linux/vmalloc.h>
-#include <linux/nx842.h>
-#include <linux/lzo.h>
-#include <linux/timer.h>
-
-static int nx842_uselzo;
-
-struct nx842_ctx {
-	void *nx842_wmem; /* working memory for 842/lzo */
-};
+#include <linux/sw842.h>
 
-enum nx842_crypto_type {
-	NX842_CRYPTO_TYPE_842,
-	NX842_CRYPTO_TYPE_LZO
+struct crypto842_ctx {
+	char wmem[SW842_MEM_COMPRESS];	/* working memory for compress */
 };
 
-#define NX842_SENTINEL 0xdeadbeef
-
-struct nx842_crypto_header {
-	unsigned int sentinel; /* debug */
-	enum nx842_crypto_type type;
-};
-
-static int nx842_init(struct crypto_tfm *tfm)
-{
-	struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
-	int wmemsize;
-
-	wmemsize = max_t(int, nx842_get_workmem_size(), LZO1X_MEM_COMPRESS);
-	ctx->nx842_wmem = kmalloc(wmemsize, GFP_NOFS);
-	if (!ctx->nx842_wmem)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static void nx842_exit(struct crypto_tfm *tfm)
-{
-	struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	kfree(ctx->nx842_wmem);
-}
-
-static void nx842_reset_uselzo(unsigned long data)
+static int crypto842_compress(struct crypto_tfm *tfm,
+			      const u8 *src, unsigned int slen,
+			      u8 *dst, unsigned int *dlen)
 {
-	nx842_uselzo = 0;
-}
-
-static DEFINE_TIMER(failover_timer, nx842_reset_uselzo, 0, 0);
-
-static int nx842_crypto_compress(struct crypto_tfm *tfm, const u8 *src,
-			    unsigned int slen, u8 *dst, unsigned int *dlen)
-{
-	struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct nx842_crypto_header *hdr;
-	unsigned int tmp_len = *dlen;
-	size_t lzodlen; /* needed for lzo */
-	int err;
-
-	*dlen = 0;
-	hdr = (struct nx842_crypto_header *)dst;
-	hdr->sentinel = NX842_SENTINEL; /* debug */
-	dst += sizeof(struct nx842_crypto_header);
-	tmp_len -= sizeof(struct nx842_crypto_header);
-	lzodlen = tmp_len;
-
-	if (likely(!nx842_uselzo)) {
-		err = nx842_compress(src, slen, dst, &tmp_len, ctx->nx842_wmem);
-
-		if (likely(!err)) {
-			hdr->type = NX842_CRYPTO_TYPE_842;
-			*dlen = tmp_len + sizeof(struct nx842_crypto_header);
-			return 0;
-		}
-
-		/* hardware failed */
-		nx842_uselzo = 1;
+	struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm);
 
-		/* set timer to check for hardware again in 1 second */
-		mod_timer(&failover_timer, jiffies + msecs_to_jiffies(1000));
-	}
-
-	/* no hardware, use lzo */
-	err = lzo1x_1_compress(src, slen, dst, &lzodlen, ctx->nx842_wmem);
-	if (err != LZO_E_OK)
-		return -EINVAL;
-
-	hdr->type = NX842_CRYPTO_TYPE_LZO;
-	*dlen = lzodlen + sizeof(struct nx842_crypto_header);
-	return 0;
+	return sw842_compress(src, slen, dst, dlen, ctx->wmem);
 }
 
-static int nx842_crypto_decompress(struct crypto_tfm *tfm, const u8 *src,
-			      unsigned int slen, u8 *dst, unsigned int *dlen)
+static int crypto842_decompress(struct crypto_tfm *tfm,
+				const u8 *src, unsigned int slen,
+				u8 *dst, unsigned int *dlen)
 {
-	struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct nx842_crypto_header *hdr;
-	unsigned int tmp_len = *dlen;
-	size_t lzodlen; /* needed for lzo */
-	int err;
-
-	*dlen = 0;
-	hdr = (struct nx842_crypto_header *)src;
-
-	if (unlikely(hdr->sentinel != NX842_SENTINEL))
-		return -EINVAL;
-
-	src += sizeof(struct nx842_crypto_header);
-	slen -= sizeof(struct nx842_crypto_header);
-
-	if (likely(hdr->type == NX842_CRYPTO_TYPE_842)) {
-		err = nx842_decompress(src, slen, dst, &tmp_len,
-			ctx->nx842_wmem);
-		if (err)
-			return -EINVAL;
-		*dlen = tmp_len;
-	} else if (hdr->type == NX842_CRYPTO_TYPE_LZO) {
-		lzodlen = tmp_len;
-		err = lzo1x_decompress_safe(src, slen, dst, &lzodlen);
-		if (err != LZO_E_OK)
-			return -EINVAL;
-		*dlen = lzodlen;
-	} else
-		return -EINVAL;
-
-	return 0;
+	return sw842_decompress(src, slen, dst, dlen);
 }
 
 static struct crypto_alg alg = {
 	.cra_name		= "842",
+	.cra_driver_name	= "842-generic",
+	.cra_priority		= 100,
 	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
-	.cra_ctxsize		= sizeof(struct nx842_ctx),
+	.cra_ctxsize		= sizeof(struct crypto842_ctx),
 	.cra_module		= THIS_MODULE,
-	.cra_init		= nx842_init,
-	.cra_exit		= nx842_exit,
 	.cra_u			= { .compress = {
-	.coa_compress		= nx842_crypto_compress,
-	.coa_decompress		= nx842_crypto_decompress } }
+	.coa_compress		= crypto842_compress,
+	.coa_decompress		= crypto842_decompress } }
 };
 
-static int __init nx842_mod_init(void)
+static int __init crypto842_mod_init(void)
 {
-	del_timer(&failover_timer);
 	return crypto_register_alg(&alg);
 }
+module_init(crypto842_mod_init);
 
-static void __exit nx842_mod_exit(void)
+static void __exit crypto842_mod_exit(void)
 {
 	crypto_unregister_alg(&alg);
 }
-
-module_init(nx842_mod_init);
-module_exit(nx842_mod_exit);
+module_exit(crypto842_mod_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("842 Compression Algorithm");
+MODULE_DESCRIPTION("842 Software Compression Algorithm");
 MODULE_ALIAS_CRYPTO("842");
+MODULE_ALIAS_CRYPTO("842-generic");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");

+ 95 - 11
crypto/Kconfig

@@ -78,6 +78,10 @@ config CRYPTO_RNG2
 	tristate
 	select CRYPTO_ALGAPI2
 
+config CRYPTO_RNG_DEFAULT
+	tristate
+	select CRYPTO_DRBG_MENU
+
 config CRYPTO_PCOMP
 	tristate
 	select CRYPTO_PCOMP2
@@ -87,6 +91,23 @@ config CRYPTO_PCOMP2
 	tristate
 	select CRYPTO_ALGAPI2
 
+config CRYPTO_AKCIPHER2
+	tristate
+	select CRYPTO_ALGAPI2
+
+config CRYPTO_AKCIPHER
+	tristate
+	select CRYPTO_AKCIPHER2
+	select CRYPTO_ALGAPI
+
+config CRYPTO_RSA
+	tristate "RSA algorithm"
+	select CRYPTO_AKCIPHER
+	select MPILIB
+	select ASN1
+	help
+	  Generic implementation of the RSA public key algorithm.
+
 config CRYPTO_MANAGER
 	tristate "Cryptographic algorithm manager"
 	select CRYPTO_MANAGER2
@@ -100,6 +121,7 @@ config CRYPTO_MANAGER2
 	select CRYPTO_HASH2
 	select CRYPTO_BLKCIPHER2
 	select CRYPTO_PCOMP2
+	select CRYPTO_AKCIPHER2
 
 config CRYPTO_USER
 	tristate "Userspace cryptographic algorithm configuration"
@@ -217,15 +239,39 @@ config CRYPTO_GCM
 	  Support for Galois/Counter Mode (GCM) and Galois Message
 	  Authentication Code (GMAC). Required for IPSec.
 
+config CRYPTO_CHACHA20POLY1305
+	tristate "ChaCha20-Poly1305 AEAD support"
+	select CRYPTO_CHACHA20
+	select CRYPTO_POLY1305
+	select CRYPTO_AEAD
+	help
+	  ChaCha20-Poly1305 AEAD support, RFC7539.
+
+	  Support for the AEAD wrapper using the ChaCha20 stream cipher combined
+	  with the Poly1305 authenticator. It is defined in RFC7539 for use in
+	  IETF protocols.
+
 config CRYPTO_SEQIV
 	tristate "Sequence Number IV Generator"
 	select CRYPTO_AEAD
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_RNG
+	select CRYPTO_NULL
+	select CRYPTO_RNG_DEFAULT
 	help
 	  This IV generator generates an IV based on a sequence number by
 	  xoring it with a salt.  This algorithm is mainly useful for CTR
 
+config CRYPTO_ECHAINIV
+	tristate "Encrypted Chain IV Generator"
+	select CRYPTO_AEAD
+	select CRYPTO_NULL
+	select CRYPTO_RNG_DEFAULT
+	default m
+	help
+	  This IV generator generates an IV based on the encryption of
+	  a sequence number xored with a salt.  This is the default
+	  algorithm for CBC.
+
 comment "Block modes"
 
 config CRYPTO_CBC
@@ -415,6 +461,15 @@ config CRYPTO_GHASH
 	help
 	  GHASH is message digest algorithm for GCM (Galois/Counter Mode).
 
+config CRYPTO_POLY1305
+	tristate "Poly1305 authenticator algorithm"
+	help
+	  Poly1305 authenticator algorithm, RFC7539.
+
+	  Poly1305 is an authenticator algorithm designed by Daniel J. Bernstein.
+	  It is used for the ChaCha20-Poly1305 AEAD, specified in RFC7539 for use
+	  in IETF protocols. This is the portable C implementation of Poly1305.
+
 config CRYPTO_MD4
 	tristate "MD4 digest algorithm"
 	select CRYPTO_HASH
@@ -1145,6 +1200,19 @@ config CRYPTO_SALSA20_X86_64
 	  The Salsa20 stream cipher algorithm is designed by Daniel J.
 	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
 
+config CRYPTO_CHACHA20
+	tristate "ChaCha20 cipher algorithm"
+	select CRYPTO_BLKCIPHER
+	help
+	  ChaCha20 cipher algorithm, RFC7539.
+
+	  ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
+	  Bernstein and further specified in RFC7539 for use in IETF protocols.
+	  This is the portable C implementation of ChaCha20.
+
+	  See also:
+	  <http://cr.yp.to/chacha/chacha-20080128.pdf>
+
 config CRYPTO_SEED
 	tristate "SEED cipher algorithm"
 	select CRYPTO_ALGAPI
@@ -1412,10 +1480,9 @@ config CRYPTO_LZO
 
 config CRYPTO_842
 	tristate "842 compression algorithm"
-	depends on CRYPTO_DEV_NX_COMPRESS
-	# 842 uses lzo if the hardware becomes unavailable
-	select LZO_COMPRESS
-	select LZO_DECOMPRESS
+	select CRYPTO_ALGAPI
+	select 842_COMPRESS
+	select 842_DECOMPRESS
 	help
 	  This is the 842 algorithm.
 
@@ -1439,7 +1506,6 @@ comment "Random Number Generation"
 
 config CRYPTO_ANSI_CPRNG
 	tristate "Pseudo Random Number Generation for Cryptographic modules"
-	default m
 	select CRYPTO_AES
 	select CRYPTO_RNG
 	help
@@ -1457,15 +1523,14 @@ menuconfig CRYPTO_DRBG_MENU
 if CRYPTO_DRBG_MENU
 
 config CRYPTO_DRBG_HMAC
-	bool "Enable HMAC DRBG"
+	bool
 	default y
 	select CRYPTO_HMAC
-	help
-	  Enable the HMAC DRBG variant as defined in NIST SP800-90A.
+	select CRYPTO_SHA256
 
 config CRYPTO_DRBG_HASH
 	bool "Enable Hash DRBG"
-	select CRYPTO_HASH
+	select CRYPTO_SHA256
 	help
 	  Enable the Hash DRBG variant as defined in NIST SP800-90A.
 
@@ -1477,11 +1542,21 @@ config CRYPTO_DRBG_CTR
 
 config CRYPTO_DRBG
 	tristate
-	default CRYPTO_DRBG_MENU if (CRYPTO_DRBG_HMAC || CRYPTO_DRBG_HASH || CRYPTO_DRBG_CTR)
+	default CRYPTO_DRBG_MENU
 	select CRYPTO_RNG
+	select CRYPTO_JITTERENTROPY
 
 endif	# if CRYPTO_DRBG_MENU
 
+config CRYPTO_JITTERENTROPY
+	tristate "Jitterentropy Non-Deterministic Random Number Generator"
+	help
+	  The Jitterentropy RNG is a noise that is intended
+	  to provide seed to another RNG. The RNG does not
+	  perform any cryptographic whitening of the generated
+	  random numbers. This Jitterentropy RNG registers with
+	  the kernel crypto API and can be used by any caller.
+
 config CRYPTO_USER_API
 	tristate
 
@@ -1512,6 +1587,15 @@ config CRYPTO_USER_API_RNG
 	  This option enables the user-spaces interface for random
 	  number generator algorithms.
 
+config CRYPTO_USER_API_AEAD
+	tristate "User-space interface for AEAD cipher algorithms"
+	depends on NET
+	select CRYPTO_AEAD
+	select CRYPTO_USER_API
+	help
+	  This option enables the user-spaces interface for AEAD
+	  cipher algorithms.
+
 config CRYPTO_HASH_INFO
 	bool
 

+ 14 - 1
crypto/Makefile

@@ -21,12 +21,22 @@ obj-$(CONFIG_CRYPTO_BLKCIPHER2) += crypto_blkcipher.o
 obj-$(CONFIG_CRYPTO_BLKCIPHER2) += chainiv.o
 obj-$(CONFIG_CRYPTO_BLKCIPHER2) += eseqiv.o
 obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o
+obj-$(CONFIG_CRYPTO_ECHAINIV) += echainiv.o
 
 crypto_hash-y += ahash.o
 crypto_hash-y += shash.o
 obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o
 
 obj-$(CONFIG_CRYPTO_PCOMP2) += pcompress.o
+obj-$(CONFIG_CRYPTO_AKCIPHER2) += akcipher.o
+
+$(obj)/rsakey-asn1.o: $(obj)/rsakey-asn1.c $(obj)/rsakey-asn1.h
+clean-files += rsakey-asn1.c rsakey-asn1.h
+
+rsa_generic-y := rsakey-asn1.o
+rsa_generic-y += rsa.o
+rsa_generic-y += rsa_helper.o
+obj-$(CONFIG_CRYPTO_RSA) += rsa_generic.o
 
 cryptomgr-y := algboss.o testmgr.o
 
@@ -58,6 +68,7 @@ obj-$(CONFIG_CRYPTO_XTS) += xts.o
 obj-$(CONFIG_CRYPTO_CTR) += ctr.o
 obj-$(CONFIG_CRYPTO_GCM) += gcm.o
 obj-$(CONFIG_CRYPTO_CCM) += ccm.o
+obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o
 obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
 obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
 obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o
@@ -79,6 +90,8 @@ obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
 obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
 obj-$(CONFIG_CRYPTO_SEED) += seed.o
 obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
+obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
+obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
 obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
 obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
@@ -91,9 +104,9 @@ obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
 obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
 obj-$(CONFIG_CRYPTO_842) += 842.o
 obj-$(CONFIG_CRYPTO_RNG2) += rng.o
-obj-$(CONFIG_CRYPTO_RNG2) += krng.o
 obj-$(CONFIG_CRYPTO_ANSI_CPRNG) += ansi_cprng.o
 obj-$(CONFIG_CRYPTO_DRBG) += drbg.o
+obj-$(CONFIG_CRYPTO_JITTERENTROPY) += jitterentropy.o
 obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
 obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o
 obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o

+ 10 - 2
crypto/ablkcipher.c

@@ -454,7 +454,7 @@ static int crypto_init_givcipher_ops(struct crypto_tfm *tfm, u32 type,
 		      alg->setkey : setkey;
 	crt->encrypt = alg->encrypt;
 	crt->decrypt = alg->decrypt;
-	crt->givencrypt = alg->givencrypt;
+	crt->givencrypt = alg->givencrypt ?: no_givdecrypt;
 	crt->givdecrypt = alg->givdecrypt ?: no_givdecrypt;
 	crt->base = __crypto_ablkcipher_cast(tfm);
 	crt->ivsize = alg->ivsize;
@@ -586,6 +586,13 @@ static int crypto_givcipher_default(struct crypto_alg *alg, u32 type, u32 mask)
 	if (!tmpl)
 		goto kill_larval;
 
+	if (tmpl->create) {
+		err = tmpl->create(tmpl, tb);
+		if (err)
+			goto put_tmpl;
+		goto ok;
+	}
+
 	inst = tmpl->alloc(tb);
 	err = PTR_ERR(inst);
 	if (IS_ERR(inst))
@@ -597,6 +604,7 @@ static int crypto_givcipher_default(struct crypto_alg *alg, u32 type, u32 mask)
 		goto put_tmpl;
 	}
 
+ok:
 	/* Redo the lookup to use the instance we just registered. */
 	err = -EAGAIN;
 
@@ -636,7 +644,7 @@ struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 mask)
 
 	if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
 	    CRYPTO_ALG_TYPE_GIVCIPHER) {
-		if ((alg->cra_flags ^ type ^ ~mask) & CRYPTO_ALG_TESTED) {
+		if (~alg->cra_flags & (type ^ ~mask) & CRYPTO_ALG_TESTED) {
 			crypto_mod_put(alg);
 			alg = ERR_PTR(-ENOENT);
 		}

+ 530 - 154
crypto/aead.c

@@ -12,7 +12,8 @@
  *
  */
 
-#include <crypto/internal/aead.h>
+#include <crypto/internal/geniv.h>
+#include <crypto/scatterwalk.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -26,10 +27,20 @@
 
 #include "internal.h"
 
+struct compat_request_ctx {
+	struct scatterlist src[2];
+	struct scatterlist dst[2];
+	struct scatterlist ivbuf[2];
+	struct scatterlist *ivsg;
+	struct aead_givcrypt_request subreq;
+};
+
+static int aead_null_givencrypt(struct aead_givcrypt_request *req);
+static int aead_null_givdecrypt(struct aead_givcrypt_request *req);
+
 static int setkey_unaligned(struct crypto_aead *tfm, const u8 *key,
 			    unsigned int keylen)
 {
-	struct aead_alg *aead = crypto_aead_alg(tfm);
 	unsigned long alignmask = crypto_aead_alignmask(tfm);
 	int ret;
 	u8 *buffer, *alignbuffer;
@@ -42,47 +53,95 @@ static int setkey_unaligned(struct crypto_aead *tfm, const u8 *key,
 
 	alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
 	memcpy(alignbuffer, key, keylen);
-	ret = aead->setkey(tfm, alignbuffer, keylen);
+	ret = tfm->setkey(tfm, alignbuffer, keylen);
 	memset(alignbuffer, 0, keylen);
 	kfree(buffer);
 	return ret;
 }
 
-static int setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen)
+int crypto_aead_setkey(struct crypto_aead *tfm,
+		       const u8 *key, unsigned int keylen)
 {
-	struct aead_alg *aead = crypto_aead_alg(tfm);
 	unsigned long alignmask = crypto_aead_alignmask(tfm);
 
+	tfm = tfm->child;
+
 	if ((unsigned long)key & alignmask)
 		return setkey_unaligned(tfm, key, keylen);
 
-	return aead->setkey(tfm, key, keylen);
+	return tfm->setkey(tfm, key, keylen);
 }
+EXPORT_SYMBOL_GPL(crypto_aead_setkey);
 
 int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
 {
-	struct aead_tfm *crt = crypto_aead_crt(tfm);
 	int err;
 
-	if (authsize > crypto_aead_alg(tfm)->maxauthsize)
+	if (authsize > crypto_aead_maxauthsize(tfm))
 		return -EINVAL;
 
-	if (crypto_aead_alg(tfm)->setauthsize) {
-		err = crypto_aead_alg(tfm)->setauthsize(crt->base, authsize);
+	if (tfm->setauthsize) {
+		err = tfm->setauthsize(tfm->child, authsize);
 		if (err)
 			return err;
 	}
 
-	crypto_aead_crt(crt->base)->authsize = authsize;
-	crt->authsize = authsize;
+	tfm->child->authsize = authsize;
+	tfm->authsize = authsize;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_aead_setauthsize);
 
-static unsigned int crypto_aead_ctxsize(struct crypto_alg *alg, u32 type,
-					u32 mask)
+struct aead_old_request {
+	struct scatterlist srcbuf[2];
+	struct scatterlist dstbuf[2];
+	struct aead_request subreq;
+};
+
+unsigned int crypto_aead_reqsize(struct crypto_aead *tfm)
 {
-	return alg->cra_ctxsize;
+	return tfm->reqsize + sizeof(struct aead_old_request);
+}
+EXPORT_SYMBOL_GPL(crypto_aead_reqsize);
+
+static int old_crypt(struct aead_request *req,
+		     int (*crypt)(struct aead_request *req))
+{
+	struct aead_old_request *nreq = aead_request_ctx(req);
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct scatterlist *src, *dst;
+
+	if (req->old)
+		return crypt(req);
+
+	src = scatterwalk_ffwd(nreq->srcbuf, req->src, req->assoclen);
+	dst = req->src == req->dst ?
+	      src : scatterwalk_ffwd(nreq->dstbuf, req->dst, req->assoclen);
+
+	aead_request_set_tfm(&nreq->subreq, aead);
+	aead_request_set_callback(&nreq->subreq, aead_request_flags(req),
+				  req->base.complete, req->base.data);
+	aead_request_set_crypt(&nreq->subreq, src, dst, req->cryptlen,
+			       req->iv);
+	aead_request_set_assoc(&nreq->subreq, req->src, req->assoclen);
+
+	return crypt(&nreq->subreq);
+}
+
+static int old_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct old_aead_alg *alg = crypto_old_aead_alg(aead);
+
+	return old_crypt(req, alg->encrypt);
+}
+
+static int old_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct old_aead_alg *alg = crypto_old_aead_alg(aead);
+
+	return old_crypt(req, alg->decrypt);
 }
 
 static int no_givcrypt(struct aead_givcrypt_request *req)
@@ -90,32 +149,68 @@ static int no_givcrypt(struct aead_givcrypt_request *req)
 	return -ENOSYS;
 }
 
-static int crypto_init_aead_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
+static int crypto_old_aead_init_tfm(struct crypto_tfm *tfm)
 {
-	struct aead_alg *alg = &tfm->__crt_alg->cra_aead;
-	struct aead_tfm *crt = &tfm->crt_aead;
+	struct old_aead_alg *alg = &tfm->__crt_alg->cra_aead;
+	struct crypto_aead *crt = __crypto_aead_cast(tfm);
 
 	if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8)
 		return -EINVAL;
 
-	crt->setkey = tfm->__crt_alg->cra_flags & CRYPTO_ALG_GENIV ?
-		      alg->setkey : setkey;
-	crt->encrypt = alg->encrypt;
-	crt->decrypt = alg->decrypt;
-	crt->givencrypt = alg->givencrypt ?: no_givcrypt;
-	crt->givdecrypt = alg->givdecrypt ?: no_givcrypt;
-	crt->base = __crypto_aead_cast(tfm);
-	crt->ivsize = alg->ivsize;
+	crt->setkey = alg->setkey;
+	crt->setauthsize = alg->setauthsize;
+	crt->encrypt = old_encrypt;
+	crt->decrypt = old_decrypt;
+	if (alg->ivsize) {
+		crt->givencrypt = alg->givencrypt ?: no_givcrypt;
+		crt->givdecrypt = alg->givdecrypt ?: no_givcrypt;
+	} else {
+		crt->givencrypt = aead_null_givencrypt;
+		crt->givdecrypt = aead_null_givdecrypt;
+	}
+	crt->child = __crypto_aead_cast(tfm);
 	crt->authsize = alg->maxauthsize;
 
 	return 0;
 }
 
+static void crypto_aead_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_aead *aead = __crypto_aead_cast(tfm);
+	struct aead_alg *alg = crypto_aead_alg(aead);
+
+	alg->exit(aead);
+}
+
+static int crypto_aead_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_aead *aead = __crypto_aead_cast(tfm);
+	struct aead_alg *alg = crypto_aead_alg(aead);
+
+	if (crypto_old_aead_alg(aead)->encrypt)
+		return crypto_old_aead_init_tfm(tfm);
+
+	aead->setkey = alg->setkey;
+	aead->setauthsize = alg->setauthsize;
+	aead->encrypt = alg->encrypt;
+	aead->decrypt = alg->decrypt;
+	aead->child = __crypto_aead_cast(tfm);
+	aead->authsize = alg->maxauthsize;
+
+	if (alg->exit)
+		aead->base.exit = crypto_aead_exit_tfm;
+
+	if (alg->init)
+		return alg->init(aead);
+
+	return 0;
+}
+
 #ifdef CONFIG_NET
-static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
+static int crypto_old_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_aead raead;
-	struct aead_alg *aead = &alg->cra_aead;
+	struct old_aead_alg *aead = &alg->cra_aead;
 
 	strncpy(raead.type, "aead", sizeof(raead.type));
 	strncpy(raead.geniv, aead->geniv ?: "<built-in>", sizeof(raead.geniv));
@@ -129,6 +224,64 @@ static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
 		goto nla_put_failure;
 	return 0;
 
+nla_put_failure:
+	return -EMSGSIZE;
+}
+#else
+static int crypto_old_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	return -ENOSYS;
+}
+#endif
+
+static void crypto_old_aead_show(struct seq_file *m, struct crypto_alg *alg)
+	__attribute__ ((unused));
+static void crypto_old_aead_show(struct seq_file *m, struct crypto_alg *alg)
+{
+	struct old_aead_alg *aead = &alg->cra_aead;
+
+	seq_printf(m, "type         : aead\n");
+	seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
+					     "yes" : "no");
+	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
+	seq_printf(m, "ivsize       : %u\n", aead->ivsize);
+	seq_printf(m, "maxauthsize  : %u\n", aead->maxauthsize);
+	seq_printf(m, "geniv        : %s\n", aead->geniv ?: "<built-in>");
+}
+
+const struct crypto_type crypto_aead_type = {
+	.extsize = crypto_alg_extsize,
+	.init_tfm = crypto_aead_init_tfm,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_old_aead_show,
+#endif
+	.report = crypto_old_aead_report,
+	.lookup = crypto_lookup_aead,
+	.maskclear = ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV),
+	.maskset = CRYPTO_ALG_TYPE_MASK,
+	.type = CRYPTO_ALG_TYPE_AEAD,
+	.tfmsize = offsetof(struct crypto_aead, base),
+};
+EXPORT_SYMBOL_GPL(crypto_aead_type);
+
+#ifdef CONFIG_NET
+static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_aead raead;
+	struct aead_alg *aead = container_of(alg, struct aead_alg, base);
+
+	strncpy(raead.type, "aead", sizeof(raead.type));
+	strncpy(raead.geniv, "<none>", sizeof(raead.geniv));
+
+	raead.blocksize = alg->cra_blocksize;
+	raead.maxauthsize = aead->maxauthsize;
+	raead.ivsize = aead->ivsize;
+
+	if (nla_put(skb, CRYPTOCFGA_REPORT_AEAD,
+		    sizeof(struct crypto_report_aead), &raead))
+		goto nla_put_failure;
+	return 0;
+
 nla_put_failure:
 	return -EMSGSIZE;
 }
@@ -143,7 +296,7 @@ static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
 	__attribute__ ((unused));
 static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
 {
-	struct aead_alg *aead = &alg->cra_aead;
+	struct aead_alg *aead = container_of(alg, struct aead_alg, base);
 
 	seq_printf(m, "type         : aead\n");
 	seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
@@ -151,18 +304,21 @@ static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
 	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
 	seq_printf(m, "ivsize       : %u\n", aead->ivsize);
 	seq_printf(m, "maxauthsize  : %u\n", aead->maxauthsize);
-	seq_printf(m, "geniv        : %s\n", aead->geniv ?: "<built-in>");
+	seq_printf(m, "geniv        : <none>\n");
 }
 
-const struct crypto_type crypto_aead_type = {
-	.ctxsize = crypto_aead_ctxsize,
-	.init = crypto_init_aead_ops,
+static const struct crypto_type crypto_new_aead_type = {
+	.extsize = crypto_alg_extsize,
+	.init_tfm = crypto_aead_init_tfm,
 #ifdef CONFIG_PROC_FS
 	.show = crypto_aead_show,
 #endif
 	.report = crypto_aead_report,
+	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
+	.maskset = CRYPTO_ALG_TYPE_MASK,
+	.type = CRYPTO_ALG_TYPE_AEAD,
+	.tfmsize = offsetof(struct crypto_aead, base),
 };
-EXPORT_SYMBOL_GPL(crypto_aead_type);
 
 static int aead_null_givencrypt(struct aead_givcrypt_request *req)
 {
@@ -174,33 +330,11 @@ static int aead_null_givdecrypt(struct aead_givcrypt_request *req)
 	return crypto_aead_decrypt(&req->areq);
 }
 
-static int crypto_init_nivaead_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
-{
-	struct aead_alg *alg = &tfm->__crt_alg->cra_aead;
-	struct aead_tfm *crt = &tfm->crt_aead;
-
-	if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8)
-		return -EINVAL;
-
-	crt->setkey = setkey;
-	crt->encrypt = alg->encrypt;
-	crt->decrypt = alg->decrypt;
-	if (!alg->ivsize) {
-		crt->givencrypt = aead_null_givencrypt;
-		crt->givdecrypt = aead_null_givdecrypt;
-	}
-	crt->base = __crypto_aead_cast(tfm);
-	crt->ivsize = alg->ivsize;
-	crt->authsize = alg->maxauthsize;
-
-	return 0;
-}
-
 #ifdef CONFIG_NET
 static int crypto_nivaead_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_aead raead;
-	struct aead_alg *aead = &alg->cra_aead;
+	struct old_aead_alg *aead = &alg->cra_aead;
 
 	strncpy(raead.type, "nivaead", sizeof(raead.type));
 	strncpy(raead.geniv, aead->geniv, sizeof(raead.geniv));
@@ -229,7 +363,7 @@ static void crypto_nivaead_show(struct seq_file *m, struct crypto_alg *alg)
 	__attribute__ ((unused));
 static void crypto_nivaead_show(struct seq_file *m, struct crypto_alg *alg)
 {
-	struct aead_alg *aead = &alg->cra_aead;
+	struct old_aead_alg *aead = &alg->cra_aead;
 
 	seq_printf(m, "type         : nivaead\n");
 	seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
@@ -241,43 +375,215 @@ static void crypto_nivaead_show(struct seq_file *m, struct crypto_alg *alg)
 }
 
 const struct crypto_type crypto_nivaead_type = {
-	.ctxsize = crypto_aead_ctxsize,
-	.init = crypto_init_nivaead_ops,
+	.extsize = crypto_alg_extsize,
+	.init_tfm = crypto_aead_init_tfm,
 #ifdef CONFIG_PROC_FS
 	.show = crypto_nivaead_show,
 #endif
 	.report = crypto_nivaead_report,
+	.maskclear = ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV),
+	.maskset = CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV,
+	.type = CRYPTO_ALG_TYPE_AEAD,
+	.tfmsize = offsetof(struct crypto_aead, base),
 };
 EXPORT_SYMBOL_GPL(crypto_nivaead_type);
 
 static int crypto_grab_nivaead(struct crypto_aead_spawn *spawn,
 			       const char *name, u32 type, u32 mask)
 {
-	struct crypto_alg *alg;
+	spawn->base.frontend = &crypto_nivaead_type;
+	return crypto_grab_spawn(&spawn->base, name, type, mask);
+}
+
+static int aead_geniv_setkey(struct crypto_aead *tfm,
+			     const u8 *key, unsigned int keylen)
+{
+	struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm);
+
+	return crypto_aead_setkey(ctx->child, key, keylen);
+}
+
+static int aead_geniv_setauthsize(struct crypto_aead *tfm,
+				  unsigned int authsize)
+{
+	struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm);
+
+	return crypto_aead_setauthsize(ctx->child, authsize);
+}
+
+static void compat_encrypt_complete2(struct aead_request *req, int err)
+{
+	struct compat_request_ctx *rctx = aead_request_ctx(req);
+	struct aead_givcrypt_request *subreq = &rctx->subreq;
+	struct crypto_aead *geniv;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	if (err)
+		goto out;
+
+	geniv = crypto_aead_reqtfm(req);
+	scatterwalk_map_and_copy(subreq->giv, rctx->ivsg, 0,
+				 crypto_aead_ivsize(geniv), 1);
+
+out:
+	kzfree(subreq->giv);
+}
+
+static void compat_encrypt_complete(struct crypto_async_request *base, int err)
+{
+	struct aead_request *req = base->data;
+
+	compat_encrypt_complete2(req, err);
+	aead_request_complete(req, err);
+}
+
+static int compat_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct aead_geniv_ctx *ctx = crypto_aead_ctx(geniv);
+	struct compat_request_ctx *rctx = aead_request_ctx(req);
+	struct aead_givcrypt_request *subreq = &rctx->subreq;
+	unsigned int ivsize = crypto_aead_ivsize(geniv);
+	struct scatterlist *src, *dst;
+	crypto_completion_t compl;
+	void *data;
+	u8 *info;
+	__be64 seq;
 	int err;
 
-	type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
-	type |= CRYPTO_ALG_TYPE_AEAD;
-	mask |= CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV;
+	if (req->cryptlen < ivsize)
+		return -EINVAL;
 
-	alg = crypto_alg_mod_lookup(name, type, mask);
-	if (IS_ERR(alg))
-		return PTR_ERR(alg);
+	compl = req->base.complete;
+	data = req->base.data;
+
+	rctx->ivsg = scatterwalk_ffwd(rctx->ivbuf, req->dst, req->assoclen);
+	info = PageHighMem(sg_page(rctx->ivsg)) ? NULL : sg_virt(rctx->ivsg);
+
+	if (!info) {
+		info = kmalloc(ivsize, req->base.flags &
+				       CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL:
+								  GFP_ATOMIC);
+		if (!info)
+			return -ENOMEM;
+
+		compl = compat_encrypt_complete;
+		data = req;
+	}
+
+	memcpy(&seq, req->iv + ivsize - sizeof(seq), sizeof(seq));
+
+	src = scatterwalk_ffwd(rctx->src, req->src, req->assoclen + ivsize);
+	dst = req->src == req->dst ?
+	      src : scatterwalk_ffwd(rctx->dst, rctx->ivsg, ivsize);
+
+	aead_givcrypt_set_tfm(subreq, ctx->child);
+	aead_givcrypt_set_callback(subreq, req->base.flags,
+				   req->base.complete, req->base.data);
+	aead_givcrypt_set_crypt(subreq, src, dst,
+				req->cryptlen - ivsize, req->iv);
+	aead_givcrypt_set_assoc(subreq, req->src, req->assoclen);
+	aead_givcrypt_set_giv(subreq, info, be64_to_cpu(seq));
+
+	err = crypto_aead_givencrypt(subreq);
+	if (unlikely(PageHighMem(sg_page(rctx->ivsg))))
+		compat_encrypt_complete2(req, err);
+	return err;
+}
+
+static int compat_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct aead_geniv_ctx *ctx = crypto_aead_ctx(geniv);
+	struct compat_request_ctx *rctx = aead_request_ctx(req);
+	struct aead_request *subreq = &rctx->subreq.areq;
+	unsigned int ivsize = crypto_aead_ivsize(geniv);
+	struct scatterlist *src, *dst;
+	crypto_completion_t compl;
+	void *data;
+
+	if (req->cryptlen < ivsize)
+		return -EINVAL;
+
+	aead_request_set_tfm(subreq, ctx->child);
+
+	compl = req->base.complete;
+	data = req->base.data;
+
+	src = scatterwalk_ffwd(rctx->src, req->src, req->assoclen + ivsize);
+	dst = req->src == req->dst ?
+	      src : scatterwalk_ffwd(rctx->dst, req->dst,
+				     req->assoclen + ivsize);
+
+	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_crypt(subreq, src, dst,
+			       req->cryptlen - ivsize, req->iv);
+	aead_request_set_assoc(subreq, req->src, req->assoclen);
+
+	scatterwalk_map_and_copy(req->iv, req->src, req->assoclen, ivsize, 0);
+
+	return crypto_aead_decrypt(subreq);
+}
+
+static int compat_encrypt_first(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct aead_geniv_ctx *ctx = crypto_aead_ctx(geniv);
+	int err = 0;
+
+	spin_lock_bh(&ctx->lock);
+	if (geniv->encrypt != compat_encrypt_first)
+		goto unlock;
+
+	geniv->encrypt = compat_encrypt;
+
+unlock:
+	spin_unlock_bh(&ctx->lock);
+
+	if (err)
+		return err;
+
+	return compat_encrypt(req);
+}
+
+static int aead_geniv_init_compat(struct crypto_tfm *tfm)
+{
+	struct crypto_aead *geniv = __crypto_aead_cast(tfm);
+	struct aead_geniv_ctx *ctx = crypto_aead_ctx(geniv);
+	int err;
+
+	spin_lock_init(&ctx->lock);
+
+	crypto_aead_set_reqsize(geniv, sizeof(struct compat_request_ctx));
+
+	err = aead_geniv_init(tfm);
+
+	ctx->child = geniv->child;
+	geniv->child = geniv;
 
-	err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask);
-	crypto_mod_put(alg);
 	return err;
 }
 
-struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl,
-					 struct rtattr **tb, u32 type,
-					 u32 mask)
+static void aead_geniv_exit_compat(struct crypto_tfm *tfm)
+{
+	struct crypto_aead *geniv = __crypto_aead_cast(tfm);
+	struct aead_geniv_ctx *ctx = crypto_aead_ctx(geniv);
+
+	crypto_free_aead(ctx->child);
+}
+
+struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
+				       struct rtattr **tb, u32 type, u32 mask)
 {
 	const char *name;
 	struct crypto_aead_spawn *spawn;
 	struct crypto_attr_type *algt;
-	struct crypto_instance *inst;
-	struct crypto_alg *alg;
+	struct aead_instance *inst;
+	struct aead_alg *alg;
+	unsigned int ivsize;
+	unsigned int maxauthsize;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
@@ -296,20 +602,25 @@ struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl,
 	if (!inst)
 		return ERR_PTR(-ENOMEM);
 
-	spawn = crypto_instance_ctx(inst);
+	spawn = aead_instance_ctx(inst);
 
 	/* Ignore async algorithms if necessary. */
 	mask |= crypto_requires_sync(algt->type, algt->mask);
 
-	crypto_set_aead_spawn(spawn, inst);
-	err = crypto_grab_nivaead(spawn, name, type, mask);
+	crypto_set_aead_spawn(spawn, aead_crypto_instance(inst));
+	err = (algt->mask & CRYPTO_ALG_GENIV) ?
+	      crypto_grab_nivaead(spawn, name, type, mask) :
+	      crypto_grab_aead(spawn, name, type, mask);
 	if (err)
 		goto err_free_inst;
 
-	alg = crypto_aead_spawn_alg(spawn);
+	alg = crypto_spawn_aead_alg(spawn);
+
+	ivsize = crypto_aead_alg_ivsize(alg);
+	maxauthsize = crypto_aead_alg_maxauthsize(alg);
 
 	err = -EINVAL;
-	if (!alg->cra_aead.ivsize)
+	if (ivsize < sizeof(u64))
 		goto err_drop_alg;
 
 	/*
@@ -318,39 +629,64 @@ struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl,
 	 * template name and double-check the IV generator.
 	 */
 	if (algt->mask & CRYPTO_ALG_GENIV) {
-		if (strcmp(tmpl->name, alg->cra_aead.geniv))
+		if (!alg->base.cra_aead.encrypt)
+			goto err_drop_alg;
+		if (strcmp(tmpl->name, alg->base.cra_aead.geniv))
 			goto err_drop_alg;
 
-		memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
-		memcpy(inst->alg.cra_driver_name, alg->cra_driver_name,
+		memcpy(inst->alg.base.cra_name, alg->base.cra_name,
 		       CRYPTO_MAX_ALG_NAME);
-	} else {
-		err = -ENAMETOOLONG;
-		if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
-			     "%s(%s)", tmpl->name, alg->cra_name) >=
-		    CRYPTO_MAX_ALG_NAME)
-			goto err_drop_alg;
-		if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-			     "%s(%s)", tmpl->name, alg->cra_driver_name) >=
-		    CRYPTO_MAX_ALG_NAME)
-			goto err_drop_alg;
+		memcpy(inst->alg.base.cra_driver_name,
+		       alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME);
+
+		inst->alg.base.cra_flags = CRYPTO_ALG_TYPE_AEAD |
+					   CRYPTO_ALG_GENIV;
+		inst->alg.base.cra_flags |= alg->base.cra_flags &
+					    CRYPTO_ALG_ASYNC;
+		inst->alg.base.cra_priority = alg->base.cra_priority;
+		inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
+		inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
+		inst->alg.base.cra_type = &crypto_aead_type;
+
+		inst->alg.base.cra_aead.ivsize = ivsize;
+		inst->alg.base.cra_aead.maxauthsize = maxauthsize;
+
+		inst->alg.base.cra_aead.setkey = alg->base.cra_aead.setkey;
+		inst->alg.base.cra_aead.setauthsize =
+			alg->base.cra_aead.setauthsize;
+		inst->alg.base.cra_aead.encrypt = alg->base.cra_aead.encrypt;
+		inst->alg.base.cra_aead.decrypt = alg->base.cra_aead.decrypt;
+
+		goto out;
 	}
 
-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_GENIV;
-	inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC;
-	inst->alg.cra_priority = alg->cra_priority;
-	inst->alg.cra_blocksize = alg->cra_blocksize;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
-	inst->alg.cra_type = &crypto_aead_type;
+	err = -ENAMETOOLONG;
+	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+		     "%s(%s)", tmpl->name, alg->base.cra_name) >=
+	    CRYPTO_MAX_ALG_NAME)
+		goto err_drop_alg;
+	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		     "%s(%s)", tmpl->name, alg->base.cra_driver_name) >=
+	    CRYPTO_MAX_ALG_NAME)
+		goto err_drop_alg;
+
+	inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_priority = alg->base.cra_priority;
+	inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
+	inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
+	inst->alg.base.cra_ctxsize = sizeof(struct aead_geniv_ctx);
+
+	inst->alg.setkey = aead_geniv_setkey;
+	inst->alg.setauthsize = aead_geniv_setauthsize;
 
-	inst->alg.cra_aead.ivsize = alg->cra_aead.ivsize;
-	inst->alg.cra_aead.maxauthsize = alg->cra_aead.maxauthsize;
-	inst->alg.cra_aead.geniv = alg->cra_aead.geniv;
+	inst->alg.ivsize = ivsize;
+	inst->alg.maxauthsize = maxauthsize;
 
-	inst->alg.cra_aead.setkey = alg->cra_aead.setkey;
-	inst->alg.cra_aead.setauthsize = alg->cra_aead.setauthsize;
-	inst->alg.cra_aead.encrypt = alg->cra_aead.encrypt;
-	inst->alg.cra_aead.decrypt = alg->cra_aead.decrypt;
+	inst->alg.encrypt = compat_encrypt_first;
+	inst->alg.decrypt = compat_decrypt;
+
+	inst->alg.base.cra_init = aead_geniv_init_compat;
+	inst->alg.base.cra_exit = aead_geniv_exit_compat;
 
 out:
 	return inst;
@@ -364,9 +700,9 @@ err_free_inst:
 }
 EXPORT_SYMBOL_GPL(aead_geniv_alloc);
 
-void aead_geniv_free(struct crypto_instance *inst)
+void aead_geniv_free(struct aead_instance *inst)
 {
-	crypto_drop_aead(crypto_instance_ctx(inst));
+	crypto_drop_aead(aead_instance_ctx(inst));
 	kfree(inst);
 }
 EXPORT_SYMBOL_GPL(aead_geniv_free);
@@ -374,14 +710,17 @@ EXPORT_SYMBOL_GPL(aead_geniv_free);
 int aead_geniv_init(struct crypto_tfm *tfm)
 {
 	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_aead *child;
 	struct crypto_aead *aead;
 
-	aead = crypto_spawn_aead(crypto_instance_ctx(inst));
-	if (IS_ERR(aead))
-		return PTR_ERR(aead);
+	aead = __crypto_aead_cast(tfm);
+
+	child = crypto_spawn_aead(crypto_instance_ctx(inst));
+	if (IS_ERR(child))
+		return PTR_ERR(child);
 
-	tfm->crt_aead.base = aead;
-	tfm->crt_aead.reqsize += crypto_aead_reqsize(aead);
+	aead->child = child;
+	aead->reqsize += crypto_aead_reqsize(child);
 
 	return 0;
 }
@@ -389,7 +728,7 @@ EXPORT_SYMBOL_GPL(aead_geniv_init);
 
 void aead_geniv_exit(struct crypto_tfm *tfm)
 {
-	crypto_free_aead(tfm->crt_aead.base);
+	crypto_free_aead(__crypto_aead_cast(tfm)->child);
 }
 EXPORT_SYMBOL_GPL(aead_geniv_exit);
 
@@ -443,6 +782,13 @@ static int crypto_nivaead_default(struct crypto_alg *alg, u32 type, u32 mask)
 	if (!tmpl)
 		goto kill_larval;
 
+	if (tmpl->create) {
+		err = tmpl->create(tmpl, tb);
+		if (err)
+			goto put_tmpl;
+		goto ok;
+	}
+
 	inst = tmpl->alloc(tb);
 	err = PTR_ERR(inst);
 	if (IS_ERR(inst))
@@ -454,6 +800,7 @@ static int crypto_nivaead_default(struct crypto_alg *alg, u32 type, u32 mask)
 		goto put_tmpl;
 	}
 
+ok:
 	/* Redo the lookup to use the instance we just registered. */
 	err = -EAGAIN;
 
@@ -489,7 +836,7 @@ struct crypto_alg *crypto_lookup_aead(const char *name, u32 type, u32 mask)
 		return alg;
 
 	if (alg->cra_type == &crypto_aead_type) {
-		if ((alg->cra_flags ^ type ^ ~mask) & CRYPTO_ALG_TESTED) {
+		if (~alg->cra_flags & (type ^ ~mask) & CRYPTO_ALG_TESTED) {
 			crypto_mod_put(alg);
 			alg = ERR_PTR(-ENOENT);
 		}
@@ -505,62 +852,91 @@ EXPORT_SYMBOL_GPL(crypto_lookup_aead);
 int crypto_grab_aead(struct crypto_aead_spawn *spawn, const char *name,
 		     u32 type, u32 mask)
 {
-	struct crypto_alg *alg;
-	int err;
+	spawn->base.frontend = &crypto_aead_type;
+	return crypto_grab_spawn(&spawn->base, name, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_grab_aead);
 
-	type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
-	type |= CRYPTO_ALG_TYPE_AEAD;
-	mask &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
-	mask |= CRYPTO_ALG_TYPE_MASK;
+struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask)
+{
+	return crypto_alloc_tfm(alg_name, &crypto_aead_type, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_aead);
 
-	alg = crypto_lookup_aead(name, type, mask);
-	if (IS_ERR(alg))
-		return PTR_ERR(alg);
+static int aead_prepare_alg(struct aead_alg *alg)
+{
+	struct crypto_alg *base = &alg->base;
 
-	err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask);
-	crypto_mod_put(alg);
-	return err;
+	if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8)
+		return -EINVAL;
+
+	base->cra_type = &crypto_new_aead_type;
+	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
+	base->cra_flags |= CRYPTO_ALG_TYPE_AEAD;
+
+	return 0;
 }
-EXPORT_SYMBOL_GPL(crypto_grab_aead);
 
-struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask)
+int crypto_register_aead(struct aead_alg *alg)
 {
-	struct crypto_tfm *tfm;
+	struct crypto_alg *base = &alg->base;
 	int err;
 
-	type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
-	type |= CRYPTO_ALG_TYPE_AEAD;
-	mask &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
-	mask |= CRYPTO_ALG_TYPE_MASK;
+	err = aead_prepare_alg(alg);
+	if (err)
+		return err;
 
-	for (;;) {
-		struct crypto_alg *alg;
+	return crypto_register_alg(base);
+}
+EXPORT_SYMBOL_GPL(crypto_register_aead);
 
-		alg = crypto_lookup_aead(alg_name, type, mask);
-		if (IS_ERR(alg)) {
-			err = PTR_ERR(alg);
-			goto err;
-		}
+void crypto_unregister_aead(struct aead_alg *alg)
+{
+	crypto_unregister_alg(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_aead);
 
-		tfm = __crypto_alloc_tfm(alg, type, mask);
-		if (!IS_ERR(tfm))
-			return __crypto_aead_cast(tfm);
+int crypto_register_aeads(struct aead_alg *algs, int count)
+{
+	int i, ret;
 
-		crypto_mod_put(alg);
-		err = PTR_ERR(tfm);
+	for (i = 0; i < count; i++) {
+		ret = crypto_register_aead(&algs[i]);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
 
 err:
-		if (err != -EAGAIN)
-			break;
-		if (signal_pending(current)) {
-			err = -EINTR;
-			break;
-		}
-	}
+	for (--i; i >= 0; --i)
+		crypto_unregister_aead(&algs[i]);
 
-	return ERR_PTR(err);
+	return ret;
 }
-EXPORT_SYMBOL_GPL(crypto_alloc_aead);
+EXPORT_SYMBOL_GPL(crypto_register_aeads);
+
+void crypto_unregister_aeads(struct aead_alg *algs, int count)
+{
+	int i;
+
+	for (i = count - 1; i >= 0; --i)
+		crypto_unregister_aead(&algs[i]);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_aeads);
+
+int aead_register_instance(struct crypto_template *tmpl,
+			   struct aead_instance *inst)
+{
+	int err;
+
+	err = aead_prepare_alg(&inst->alg);
+	if (err)
+		return err;
+
+	return crypto_register_instance(tmpl, aead_crypto_instance(inst));
+}
+EXPORT_SYMBOL_GPL(aead_register_instance);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Authenticated Encryption with Associated Data (AEAD)");

+ 4 - 1
crypto/af_alg.c

@@ -127,6 +127,7 @@ EXPORT_SYMBOL_GPL(af_alg_release);
 
 static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
+	const u32 forbidden = CRYPTO_ALG_INTERNAL;
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct sockaddr_alg *sa = (void *)uaddr;
@@ -151,7 +152,9 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (IS_ERR(type))
 		return PTR_ERR(type);
 
-	private = type->bind(sa->salg_name, sa->salg_feat, sa->salg_mask);
+	private = type->bind(sa->salg_name,
+			     sa->salg_feat & ~forbidden,
+			     sa->salg_mask & ~forbidden);
 	if (IS_ERR(private)) {
 		module_put(type->owner);
 		return PTR_ERR(private);

+ 117 - 0
crypto/akcipher.c

@@ -0,0 +1,117 @@
+/*
+ * Public Key Encryption
+ *
+ * Copyright (c) 2015, Intel Corporation
+ * Authors: Tadeusz Struk <tadeusz.struk@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <linux/cryptouser.h>
+#include <net/netlink.h>
+#include <crypto/akcipher.h>
+#include <crypto/public_key.h>
+#include "internal.h"
+
+#ifdef CONFIG_NET
+static int crypto_akcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_akcipher rakcipher;
+
+	strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
+
+	if (nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER,
+		    sizeof(struct crypto_report_akcipher), &rakcipher))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+#else
+static int crypto_akcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	return -ENOSYS;
+}
+#endif
+
+static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg)
+	__attribute__ ((unused));
+
+static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg)
+{
+	seq_puts(m, "type         : akcipher\n");
+}
+
+static void crypto_akcipher_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_akcipher *akcipher = __crypto_akcipher_tfm(tfm);
+	struct akcipher_alg *alg = crypto_akcipher_alg(akcipher);
+
+	alg->exit(akcipher);
+}
+
+static int crypto_akcipher_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_akcipher *akcipher = __crypto_akcipher_tfm(tfm);
+	struct akcipher_alg *alg = crypto_akcipher_alg(akcipher);
+
+	if (alg->exit)
+		akcipher->base.exit = crypto_akcipher_exit_tfm;
+
+	if (alg->init)
+		return alg->init(akcipher);
+
+	return 0;
+}
+
+static const struct crypto_type crypto_akcipher_type = {
+	.extsize = crypto_alg_extsize,
+	.init_tfm = crypto_akcipher_init_tfm,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_akcipher_show,
+#endif
+	.report = crypto_akcipher_report,
+	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
+	.maskset = CRYPTO_ALG_TYPE_MASK,
+	.type = CRYPTO_ALG_TYPE_AKCIPHER,
+	.tfmsize = offsetof(struct crypto_akcipher, base),
+};
+
+struct crypto_akcipher *crypto_alloc_akcipher(const char *alg_name, u32 type,
+					      u32 mask)
+{
+	return crypto_alloc_tfm(alg_name, &crypto_akcipher_type, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_akcipher);
+
+int crypto_register_akcipher(struct akcipher_alg *alg)
+{
+	struct crypto_alg *base = &alg->base;
+
+	base->cra_type = &crypto_akcipher_type;
+	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
+	base->cra_flags |= CRYPTO_ALG_TYPE_AKCIPHER;
+	return crypto_register_alg(base);
+}
+EXPORT_SYMBOL_GPL(crypto_register_akcipher);
+
+void crypto_unregister_akcipher(struct akcipher_alg *alg)
+{
+	crypto_unregister_alg(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_akcipher);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Generic public key cihper type");

+ 26 - 5
crypto/algapi.c

@@ -12,6 +12,7 @@
 
 #include <linux/err.h>
 #include <linux/errno.h>
+#include <linux/fips.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
@@ -43,12 +44,9 @@ static inline int crypto_set_driver_name(struct crypto_alg *alg)
 
 static inline void crypto_check_module_sig(struct module *mod)
 {
-#ifdef CONFIG_CRYPTO_FIPS
-	if (fips_enabled && mod && !mod->sig_ok)
+	if (fips_enabled && mod && !module_sig_ok(mod))
 		panic("Module %s signature verification failed in FIPS mode\n",
-		      mod->name);
-#endif
-	return;
+		      module_name(mod));
 }
 
 static int crypto_check_alg(struct crypto_alg *alg)
@@ -614,6 +612,22 @@ out:
 }
 EXPORT_SYMBOL_GPL(crypto_init_spawn2);
 
+int crypto_grab_spawn(struct crypto_spawn *spawn, const char *name,
+		      u32 type, u32 mask)
+{
+	struct crypto_alg *alg;
+	int err;
+
+	alg = crypto_find_alg(name, spawn->frontend, type, mask);
+	if (IS_ERR(alg))
+		return PTR_ERR(alg);
+
+	err = crypto_init_spawn(spawn, alg, spawn->inst, mask);
+	crypto_mod_put(alg);
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_grab_spawn);
+
 void crypto_drop_spawn(struct crypto_spawn *spawn)
 {
 	if (!spawn->alg)
@@ -964,6 +978,13 @@ void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
 }
 EXPORT_SYMBOL_GPL(crypto_xor);
 
+unsigned int crypto_alg_extsize(struct crypto_alg *alg)
+{
+	return alg->cra_ctxsize +
+	       (alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1));
+}
+EXPORT_SYMBOL_GPL(crypto_alg_extsize);
+
 static int __init crypto_algapi_init(void)
 {
 	crypto_init_proc();

+ 11 - 59
crypto/algif_aead.c

@@ -13,6 +13,7 @@
  * any later version.
  */
 
+#include <crypto/aead.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/if_alg.h>
 #include <linux/init.h>
@@ -71,7 +72,7 @@ static inline bool aead_sufficient_data(struct aead_ctx *ctx)
 {
 	unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
 
-	return (ctx->used >= (ctx->aead_assoclen + (ctx->enc ? 0 : as)));
+	return ctx->used >= ctx->aead_assoclen + as;
 }
 
 static void aead_put_sgl(struct sock *sk)
@@ -352,12 +353,8 @@ static int aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t ignored,
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct aead_ctx *ctx = ask->private;
-	unsigned bs = crypto_aead_blocksize(crypto_aead_reqtfm(&ctx->aead_req));
 	unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
 	struct aead_sg_list *sgl = &ctx->tsgl;
-	struct scatterlist *sg = NULL;
-	struct scatterlist assoc[ALG_MAX_PAGES];
-	size_t assoclen = 0;
 	unsigned int i = 0;
 	int err = -EINVAL;
 	unsigned long used = 0;
@@ -406,23 +403,13 @@ static int aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t ignored,
 	if (!aead_sufficient_data(ctx))
 		goto unlock;
 
+	outlen = used;
+
 	/*
 	 * The cipher operation input data is reduced by the associated data
 	 * length as this data is processed separately later on.
 	 */
-	used -= ctx->aead_assoclen;
-
-	if (ctx->enc) {
-		/* round up output buffer to multiple of block size */
-		outlen = ((used + bs - 1) / bs * bs);
-		/* add the size needed for the auth tag to be created */
-		outlen += as;
-	} else {
-		/* output data size is input without the authentication tag */
-		outlen = used - as;
-		/* round up output buffer to multiple of block size */
-		outlen = ((outlen + bs - 1) / bs * bs);
-	}
+	used -= ctx->aead_assoclen + (ctx->enc ? as : 0);
 
 	/* convert iovecs of output buffers into scatterlists */
 	while (iov_iter_count(&msg->msg_iter)) {
@@ -451,47 +438,11 @@ static int aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t ignored,
 	if (usedpages < outlen)
 		goto unlock;
 
-	sg_init_table(assoc, ALG_MAX_PAGES);
-	assoclen = ctx->aead_assoclen;
-	/*
-	 * Split scatterlist into two: first part becomes AD, second part
-	 * is plaintext / ciphertext. The first part is assigned to assoc
-	 * scatterlist. When this loop finishes, sg points to the start of the
-	 * plaintext / ciphertext.
-	 */
-	for (i = 0; i < ctx->tsgl.cur; i++) {
-		sg = sgl->sg + i;
-		if (sg->length <= assoclen) {
-			/* AD is larger than one page */
-			sg_set_page(assoc + i, sg_page(sg),
-				    sg->length, sg->offset);
-			assoclen -= sg->length;
-			if (i >= ctx->tsgl.cur)
-				goto unlock;
-		} else if (!assoclen) {
-			/* current page is to start of plaintext / ciphertext */
-			if (i)
-				/* AD terminates at page boundary */
-				sg_mark_end(assoc + i - 1);
-			else
-				/* AD size is zero */
-				sg_mark_end(assoc);
-			break;
-		} else {
-			/* AD does not terminate at page boundary */
-			sg_set_page(assoc + i, sg_page(sg),
-				    assoclen, sg->offset);
-			sg_mark_end(assoc + i);
-			/* plaintext / ciphertext starts after AD */
-			sg->length -= assoclen;
-			sg->offset += assoclen;
-			break;
-		}
-	}
+	sg_mark_end(sgl->sg + sgl->cur - 1);
 
-	aead_request_set_assoc(&ctx->aead_req, assoc, ctx->aead_assoclen);
-	aead_request_set_crypt(&ctx->aead_req, sg, ctx->rsgl[0].sg, used,
-			       ctx->iv);
+	aead_request_set_crypt(&ctx->aead_req, sgl->sg, ctx->rsgl[0].sg,
+			       used, ctx->iv);
+	aead_request_set_ad(&ctx->aead_req, ctx->aead_assoclen);
 
 	err = af_alg_wait_for_completion(ctx->enc ?
 					 crypto_aead_encrypt(&ctx->aead_req) :
@@ -563,7 +514,8 @@ static struct proto_ops algif_aead_ops = {
 
 static void *aead_bind(const char *name, u32 type, u32 mask)
 {
-	return crypto_alloc_aead(name, type, mask);
+	return crypto_alloc_aead(name, type | CRYPTO_ALG_AEAD_NEW,
+				 mask | CRYPTO_ALG_AEAD_NEW);
 }
 
 static void aead_release(void *private)

+ 1 - 1
crypto/algif_rng.c

@@ -164,7 +164,7 @@ static int rng_setkey(void *private, const u8 *seed, unsigned int seedlen)
 	 * Check whether seedlen is of sufficient size is done in RNG
 	 * implementations.
 	 */
-	return crypto_rng_reset(private, (u8 *)seed, seedlen);
+	return crypto_rng_reset(private, seed, seedlen);
 }
 
 static const struct af_alg_type algif_type_rng = {

+ 41 - 47
crypto/ansi_cprng.c

@@ -20,8 +20,6 @@
 #include <linux/moduleparam.h>
 #include <linux/string.h>
 
-#include "internal.h"
-
 #define DEFAULT_PRNG_KEY "0123456789abcdef"
 #define DEFAULT_PRNG_KSZ 16
 #define DEFAULT_BLK_SZ 16
@@ -281,11 +279,11 @@ static void free_prng_context(struct prng_context *ctx)
 }
 
 static int reset_prng_context(struct prng_context *ctx,
-			      unsigned char *key, size_t klen,
-			      unsigned char *V, unsigned char *DT)
+			      const unsigned char *key, size_t klen,
+			      const unsigned char *V, const unsigned char *DT)
 {
 	int ret;
-	unsigned char *prng_key;
+	const unsigned char *prng_key;
 
 	spin_lock_bh(&ctx->prng_lock);
 	ctx->flags |= PRNG_NEED_RESET;
@@ -353,8 +351,9 @@ static void cprng_exit(struct crypto_tfm *tfm)
 	free_prng_context(crypto_tfm_ctx(tfm));
 }
 
-static int cprng_get_random(struct crypto_rng *tfm, u8 *rdata,
-			    unsigned int dlen)
+static int cprng_get_random(struct crypto_rng *tfm,
+			    const u8 *src, unsigned int slen,
+			    u8 *rdata, unsigned int dlen)
 {
 	struct prng_context *prng = crypto_rng_ctx(tfm);
 
@@ -367,11 +366,12 @@ static int cprng_get_random(struct crypto_rng *tfm, u8 *rdata,
  *  V and KEY are required during reset, and DT is optional, detected
  *  as being present by testing the length of the seed
  */
-static int cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
+static int cprng_reset(struct crypto_rng *tfm,
+		       const u8 *seed, unsigned int slen)
 {
 	struct prng_context *prng = crypto_rng_ctx(tfm);
-	u8 *key = seed + DEFAULT_BLK_SZ;
-	u8 *dt = NULL;
+	const u8 *key = seed + DEFAULT_BLK_SZ;
+	const u8 *dt = NULL;
 
 	if (slen < DEFAULT_PRNG_KSZ + DEFAULT_BLK_SZ)
 		return -EINVAL;
@@ -387,18 +387,20 @@ static int cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
 }
 
 #ifdef CONFIG_CRYPTO_FIPS
-static int fips_cprng_get_random(struct crypto_rng *tfm, u8 *rdata,
-			    unsigned int dlen)
+static int fips_cprng_get_random(struct crypto_rng *tfm,
+				 const u8 *src, unsigned int slen,
+				 u8 *rdata, unsigned int dlen)
 {
 	struct prng_context *prng = crypto_rng_ctx(tfm);
 
 	return get_prng_bytes(rdata, dlen, prng, 1);
 }
 
-static int fips_cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
+static int fips_cprng_reset(struct crypto_rng *tfm,
+			    const u8 *seed, unsigned int slen)
 {
 	u8 rdata[DEFAULT_BLK_SZ];
-	u8 *key = seed + DEFAULT_BLK_SZ;
+	const u8 *key = seed + DEFAULT_BLK_SZ;
 	int rc;
 
 	struct prng_context *prng = crypto_rng_ctx(tfm);
@@ -424,40 +426,32 @@ out:
 }
 #endif
 
-static struct crypto_alg rng_algs[] = { {
-	.cra_name		= "stdrng",
-	.cra_driver_name	= "ansi_cprng",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_RNG,
-	.cra_ctxsize		= sizeof(struct prng_context),
-	.cra_type		= &crypto_rng_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= cprng_init,
-	.cra_exit		= cprng_exit,
-	.cra_u			= {
-		.rng = {
-			.rng_make_random	= cprng_get_random,
-			.rng_reset		= cprng_reset,
-			.seedsize = DEFAULT_PRNG_KSZ + 2*DEFAULT_BLK_SZ,
-		}
+static struct rng_alg rng_algs[] = { {
+	.generate		= cprng_get_random,
+	.seed			= cprng_reset,
+	.seedsize		= DEFAULT_PRNG_KSZ + 2 * DEFAULT_BLK_SZ,
+	.base			=	{
+		.cra_name		= "stdrng",
+		.cra_driver_name	= "ansi_cprng",
+		.cra_priority		= 100,
+		.cra_ctxsize		= sizeof(struct prng_context),
+		.cra_module		= THIS_MODULE,
+		.cra_init		= cprng_init,
+		.cra_exit		= cprng_exit,
 	}
 #ifdef CONFIG_CRYPTO_FIPS
 }, {
-	.cra_name		= "fips(ansi_cprng)",
-	.cra_driver_name	= "fips_ansi_cprng",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_RNG,
-	.cra_ctxsize		= sizeof(struct prng_context),
-	.cra_type		= &crypto_rng_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= cprng_init,
-	.cra_exit		= cprng_exit,
-	.cra_u			= {
-		.rng = {
-			.rng_make_random	= fips_cprng_get_random,
-			.rng_reset		= fips_cprng_reset,
-			.seedsize = DEFAULT_PRNG_KSZ + 2*DEFAULT_BLK_SZ,
-		}
+	.generate		= fips_cprng_get_random,
+	.seed			= fips_cprng_reset,
+	.seedsize		= DEFAULT_PRNG_KSZ + 2 * DEFAULT_BLK_SZ,
+	.base			=	{
+		.cra_name		= "fips(ansi_cprng)",
+		.cra_driver_name	= "fips_ansi_cprng",
+		.cra_priority		= 300,
+		.cra_ctxsize		= sizeof(struct prng_context),
+		.cra_module		= THIS_MODULE,
+		.cra_init		= cprng_init,
+		.cra_exit		= cprng_exit,
 	}
 #endif
 } };
@@ -465,12 +459,12 @@ static struct crypto_alg rng_algs[] = { {
 /* Module initalization */
 static int __init prng_mod_init(void)
 {
-	return crypto_register_algs(rng_algs, ARRAY_SIZE(rng_algs));
+	return crypto_register_rngs(rng_algs, ARRAY_SIZE(rng_algs));
 }
 
 static void __exit prng_mod_fini(void)
 {
-	crypto_unregister_algs(rng_algs, ARRAY_SIZE(rng_algs));
+	crypto_unregister_rngs(rng_algs, ARRAY_SIZE(rng_algs));
 }
 
 MODULE_LICENSE("GPL");

+ 9 - 8
crypto/authenc.c

@@ -10,7 +10,7 @@
  *
  */
 
-#include <crypto/aead.h>
+#include <crypto/internal/aead.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/authenc.h>
@@ -570,13 +570,14 @@ static int crypto_authenc_init_tfm(struct crypto_tfm *tfm)
 			    crypto_ahash_alignmask(auth) + 1) +
 		      crypto_ablkcipher_ivsize(enc);
 
-	tfm->crt_aead.reqsize = sizeof(struct authenc_request_ctx) +
-				ctx->reqoff +
-				max_t(unsigned int,
-				crypto_ahash_reqsize(auth) +
-				sizeof(struct ahash_request),
-				sizeof(struct skcipher_givcrypt_request) +
-				crypto_ablkcipher_reqsize(enc));
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+		sizeof(struct authenc_request_ctx) +
+		ctx->reqoff +
+		max_t(unsigned int,
+			crypto_ahash_reqsize(auth) +
+			sizeof(struct ahash_request),
+			sizeof(struct skcipher_givcrypt_request) +
+			crypto_ablkcipher_reqsize(enc)));
 
 	return 0;
 

+ 9 - 8
crypto/authencesn.c

@@ -12,7 +12,7 @@
  *
  */
 
-#include <crypto/aead.h>
+#include <crypto/internal/aead.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/authenc.h>
@@ -662,13 +662,14 @@ static int crypto_authenc_esn_init_tfm(struct crypto_tfm *tfm)
 			    crypto_ahash_alignmask(auth) + 1) +
 		      crypto_ablkcipher_ivsize(enc);
 
-	tfm->crt_aead.reqsize = sizeof(struct authenc_esn_request_ctx) +
-				ctx->reqoff +
-				max_t(unsigned int,
-				crypto_ahash_reqsize(auth) +
-				sizeof(struct ahash_request),
-				sizeof(struct skcipher_givcrypt_request) +
-				crypto_ablkcipher_reqsize(enc));
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+		sizeof(struct authenc_esn_request_ctx) +
+		ctx->reqoff +
+		max_t(unsigned int,
+			crypto_ahash_reqsize(auth) +
+			sizeof(struct ahash_request),
+			sizeof(struct skcipher_givcrypt_request) +
+			crypto_ablkcipher_reqsize(enc)));
 
 	return 0;
 

+ 1 - 0
crypto/blkcipher.c

@@ -14,6 +14,7 @@
  *
  */
 
+#include <crypto/aead.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 #include <linux/errno.h>

+ 7 - 7
crypto/ccm.c

@@ -453,9 +453,9 @@ static int crypto_ccm_init_tfm(struct crypto_tfm *tfm)
 
 	align = crypto_tfm_alg_alignmask(tfm);
 	align &= ~(crypto_tfm_ctx_alignment() - 1);
-	tfm->crt_aead.reqsize = align +
-				sizeof(struct crypto_ccm_req_priv_ctx) +
-				crypto_ablkcipher_reqsize(ctr);
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+		align + sizeof(struct crypto_ccm_req_priv_ctx) +
+		crypto_ablkcipher_reqsize(ctr));
 
 	return 0;
 
@@ -729,10 +729,10 @@ static int crypto_rfc4309_init_tfm(struct crypto_tfm *tfm)
 
 	align = crypto_aead_alignmask(aead);
 	align &= ~(crypto_tfm_ctx_alignment() - 1);
-	tfm->crt_aead.reqsize = sizeof(struct aead_request) +
-				ALIGN(crypto_aead_reqsize(aead),
-				      crypto_tfm_ctx_alignment()) +
-				align + 16;
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+		sizeof(struct aead_request) +
+		ALIGN(crypto_aead_reqsize(aead), crypto_tfm_ctx_alignment()) +
+		align + 16);
 
 	return 0;
 }

+ 216 - 0
crypto/chacha20_generic.c

@@ -0,0 +1,216 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#define CHACHA20_NONCE_SIZE 16
+#define CHACHA20_KEY_SIZE   32
+#define CHACHA20_BLOCK_SIZE 64
+
+struct chacha20_ctx {
+	u32 key[8];
+};
+
+static inline u32 rotl32(u32 v, u8 n)
+{
+	return (v << n) | (v >> (sizeof(v) * 8 - n));
+}
+
+static inline u32 le32_to_cpuvp(const void *p)
+{
+	return le32_to_cpup(p);
+}
+
+static void chacha20_block(u32 *state, void *stream)
+{
+	u32 x[16], *out = stream;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(x); i++)
+		x[i] = state[i];
+
+	for (i = 0; i < 20; i += 2) {
+		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],  16);
+		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],  16);
+		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],  16);
+		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],  16);
+
+		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
+		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
+		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
+		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
+
+		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],   8);
+		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],   8);
+		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],   8);
+		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],   8);
+
+		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
+		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
+		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
+		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
+
+		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],  16);
+		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],  16);
+		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],  16);
+		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],  16);
+
+		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
+		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
+		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
+		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
+
+		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],   8);
+		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],   8);
+		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],   8);
+		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],   8);
+
+		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
+		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
+		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
+		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(x); i++)
+		out[i] = cpu_to_le32(x[i] + state[i]);
+
+	state[12]++;
+}
+
+static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
+			     unsigned int bytes)
+{
+	u8 stream[CHACHA20_BLOCK_SIZE];
+
+	if (dst != src)
+		memcpy(dst, src, bytes);
+
+	while (bytes >= CHACHA20_BLOCK_SIZE) {
+		chacha20_block(state, stream);
+		crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE);
+		bytes -= CHACHA20_BLOCK_SIZE;
+		dst += CHACHA20_BLOCK_SIZE;
+	}
+	if (bytes) {
+		chacha20_block(state, stream);
+		crypto_xor(dst, stream, bytes);
+	}
+}
+
+static void chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
+{
+	static const char constant[16] = "expand 32-byte k";
+
+	state[0]  = le32_to_cpuvp(constant +  0);
+	state[1]  = le32_to_cpuvp(constant +  4);
+	state[2]  = le32_to_cpuvp(constant +  8);
+	state[3]  = le32_to_cpuvp(constant + 12);
+	state[4]  = ctx->key[0];
+	state[5]  = ctx->key[1];
+	state[6]  = ctx->key[2];
+	state[7]  = ctx->key[3];
+	state[8]  = ctx->key[4];
+	state[9]  = ctx->key[5];
+	state[10] = ctx->key[6];
+	state[11] = ctx->key[7];
+	state[12] = le32_to_cpuvp(iv +  0);
+	state[13] = le32_to_cpuvp(iv +  4);
+	state[14] = le32_to_cpuvp(iv +  8);
+	state[15] = le32_to_cpuvp(iv + 12);
+}
+
+static int chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
+			   unsigned int keysize)
+{
+	struct chacha20_ctx *ctx = crypto_tfm_ctx(tfm);
+	int i;
+
+	if (keysize != CHACHA20_KEY_SIZE)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
+		ctx->key[i] = le32_to_cpuvp(key + i * sizeof(u32));
+
+	return 0;
+}
+
+static int chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+			  struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	u32 state[16];
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
+
+	chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
+
+	while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
+		chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
+				 rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
+		err = blkcipher_walk_done(desc, &walk,
+					  walk.nbytes % CHACHA20_BLOCK_SIZE);
+	}
+
+	if (walk.nbytes) {
+		chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
+				 walk.nbytes);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	return err;
+}
+
+static struct crypto_alg alg = {
+	.cra_name		= "chacha20",
+	.cra_driver_name	= "chacha20-generic",
+	.cra_priority		= 100,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_ctxsize		= sizeof(struct chacha20_ctx),
+	.cra_alignmask		= sizeof(u32) - 1,
+	.cra_module		= THIS_MODULE,
+	.cra_u			= {
+		.blkcipher = {
+			.min_keysize	= CHACHA20_KEY_SIZE,
+			.max_keysize	= CHACHA20_KEY_SIZE,
+			.ivsize		= CHACHA20_NONCE_SIZE,
+			.geniv		= "seqiv",
+			.setkey		= chacha20_setkey,
+			.encrypt	= chacha20_crypt,
+			.decrypt	= chacha20_crypt,
+		},
+	},
+};
+
+static int __init chacha20_generic_mod_init(void)
+{
+	return crypto_register_alg(&alg);
+}
+
+static void __exit chacha20_generic_mod_fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(chacha20_generic_mod_init);
+module_exit(chacha20_generic_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("chacha20 cipher algorithm");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-generic");

+ 695 - 0
crypto/chacha20poly1305.c

@@ -0,0 +1,695 @@
+/*
+ * ChaCha20-Poly1305 AEAD, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/internal/aead.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "internal.h"
+
+#define POLY1305_BLOCK_SIZE	16
+#define POLY1305_DIGEST_SIZE	16
+#define POLY1305_KEY_SIZE	32
+#define CHACHA20_KEY_SIZE	32
+#define CHACHA20_IV_SIZE	16
+#define CHACHAPOLY_IV_SIZE	12
+
+struct chachapoly_instance_ctx {
+	struct crypto_skcipher_spawn chacha;
+	struct crypto_ahash_spawn poly;
+	unsigned int saltlen;
+};
+
+struct chachapoly_ctx {
+	struct crypto_ablkcipher *chacha;
+	struct crypto_ahash *poly;
+	/* key bytes we use for the ChaCha20 IV */
+	unsigned int saltlen;
+	u8 salt[];
+};
+
+struct poly_req {
+	/* zero byte padding for AD/ciphertext, as needed */
+	u8 pad[POLY1305_BLOCK_SIZE];
+	/* tail data with AD/ciphertext lengths */
+	struct {
+		__le64 assoclen;
+		__le64 cryptlen;
+	} tail;
+	struct scatterlist src[1];
+	struct ahash_request req; /* must be last member */
+};
+
+struct chacha_req {
+	u8 iv[CHACHA20_IV_SIZE];
+	struct scatterlist src[1];
+	struct ablkcipher_request req; /* must be last member */
+};
+
+struct chachapoly_req_ctx {
+	/* the key we generate for Poly1305 using Chacha20 */
+	u8 key[POLY1305_KEY_SIZE];
+	/* calculated Poly1305 tag */
+	u8 tag[POLY1305_DIGEST_SIZE];
+	/* length of data to en/decrypt, without ICV */
+	unsigned int cryptlen;
+	union {
+		struct poly_req poly;
+		struct chacha_req chacha;
+	} u;
+};
+
+static inline void async_done_continue(struct aead_request *req, int err,
+				       int (*cont)(struct aead_request *))
+{
+	if (!err)
+		err = cont(req);
+
+	if (err != -EINPROGRESS && err != -EBUSY)
+		aead_request_complete(req, err);
+}
+
+static void chacha_iv(u8 *iv, struct aead_request *req, u32 icb)
+{
+	struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	__le32 leicb = cpu_to_le32(icb);
+
+	memcpy(iv, &leicb, sizeof(leicb));
+	memcpy(iv + sizeof(leicb), ctx->salt, ctx->saltlen);
+	memcpy(iv + sizeof(leicb) + ctx->saltlen, req->iv,
+	       CHACHA20_IV_SIZE - sizeof(leicb) - ctx->saltlen);
+}
+
+static int poly_verify_tag(struct aead_request *req)
+{
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+	u8 tag[sizeof(rctx->tag)];
+
+	scatterwalk_map_and_copy(tag, req->src, rctx->cryptlen, sizeof(tag), 0);
+	if (crypto_memneq(tag, rctx->tag, sizeof(tag)))
+		return -EBADMSG;
+	return 0;
+}
+
+static int poly_copy_tag(struct aead_request *req)
+{
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+
+	scatterwalk_map_and_copy(rctx->tag, req->dst, rctx->cryptlen,
+				 sizeof(rctx->tag), 1);
+	return 0;
+}
+
+static void chacha_decrypt_done(struct crypto_async_request *areq, int err)
+{
+	async_done_continue(areq->data, err, poly_verify_tag);
+}
+
+static int chacha_decrypt(struct aead_request *req)
+{
+	struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+	struct chacha_req *creq = &rctx->u.chacha;
+	int err;
+
+	chacha_iv(creq->iv, req, 1);
+
+	ablkcipher_request_set_callback(&creq->req, aead_request_flags(req),
+					chacha_decrypt_done, req);
+	ablkcipher_request_set_tfm(&creq->req, ctx->chacha);
+	ablkcipher_request_set_crypt(&creq->req, req->src, req->dst,
+				     rctx->cryptlen, creq->iv);
+	err = crypto_ablkcipher_decrypt(&creq->req);
+	if (err)
+		return err;
+
+	return poly_verify_tag(req);
+}
+
+static int poly_tail_continue(struct aead_request *req)
+{
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+
+	if (rctx->cryptlen == req->cryptlen) /* encrypting */
+		return poly_copy_tag(req);
+
+	return chacha_decrypt(req);
+}
+
+static void poly_tail_done(struct crypto_async_request *areq, int err)
+{
+	async_done_continue(areq->data, err, poly_tail_continue);
+}
+
+static int poly_tail(struct aead_request *req)
+{
+	struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+	struct poly_req *preq = &rctx->u.poly;
+	__le64 len;
+	int err;
+
+	sg_init_table(preq->src, 1);
+	len = cpu_to_le64(req->assoclen);
+	memcpy(&preq->tail.assoclen, &len, sizeof(len));
+	len = cpu_to_le64(rctx->cryptlen);
+	memcpy(&preq->tail.cryptlen, &len, sizeof(len));
+	sg_set_buf(preq->src, &preq->tail, sizeof(preq->tail));
+
+	ahash_request_set_callback(&preq->req, aead_request_flags(req),
+				   poly_tail_done, req);
+	ahash_request_set_tfm(&preq->req, ctx->poly);
+	ahash_request_set_crypt(&preq->req, preq->src,
+				rctx->tag, sizeof(preq->tail));
+
+	err = crypto_ahash_finup(&preq->req);
+	if (err)
+		return err;
+
+	return poly_tail_continue(req);
+}
+
+static void poly_cipherpad_done(struct crypto_async_request *areq, int err)
+{
+	async_done_continue(areq->data, err, poly_tail);
+}
+
+static int poly_cipherpad(struct aead_request *req)
+{
+	struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+	struct poly_req *preq = &rctx->u.poly;
+	unsigned int padlen, bs = POLY1305_BLOCK_SIZE;
+	int err;
+
+	padlen = (bs - (rctx->cryptlen % bs)) % bs;
+	memset(preq->pad, 0, sizeof(preq->pad));
+	sg_init_table(preq->src, 1);
+	sg_set_buf(preq->src, &preq->pad, padlen);
+
+	ahash_request_set_callback(&preq->req, aead_request_flags(req),
+				   poly_cipherpad_done, req);
+	ahash_request_set_tfm(&preq->req, ctx->poly);
+	ahash_request_set_crypt(&preq->req, preq->src, NULL, padlen);
+
+	err = crypto_ahash_update(&preq->req);
+	if (err)
+		return err;
+
+	return poly_tail(req);
+}
+
+static void poly_cipher_done(struct crypto_async_request *areq, int err)
+{
+	async_done_continue(areq->data, err, poly_cipherpad);
+}
+
+static int poly_cipher(struct aead_request *req)
+{
+	struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+	struct poly_req *preq = &rctx->u.poly;
+	struct scatterlist *crypt = req->src;
+	int err;
+
+	if (rctx->cryptlen == req->cryptlen) /* encrypting */
+		crypt = req->dst;
+
+	ahash_request_set_callback(&preq->req, aead_request_flags(req),
+				   poly_cipher_done, req);
+	ahash_request_set_tfm(&preq->req, ctx->poly);
+	ahash_request_set_crypt(&preq->req, crypt, NULL, rctx->cryptlen);
+
+	err = crypto_ahash_update(&preq->req);
+	if (err)
+		return err;
+
+	return poly_cipherpad(req);
+}
+
+static void poly_adpad_done(struct crypto_async_request *areq, int err)
+{
+	async_done_continue(areq->data, err, poly_cipher);
+}
+
+static int poly_adpad(struct aead_request *req)
+{
+	struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+	struct poly_req *preq = &rctx->u.poly;
+	unsigned int padlen, bs = POLY1305_BLOCK_SIZE;
+	int err;
+
+	padlen = (bs - (req->assoclen % bs)) % bs;
+	memset(preq->pad, 0, sizeof(preq->pad));
+	sg_init_table(preq->src, 1);
+	sg_set_buf(preq->src, preq->pad, padlen);
+
+	ahash_request_set_callback(&preq->req, aead_request_flags(req),
+				   poly_adpad_done, req);
+	ahash_request_set_tfm(&preq->req, ctx->poly);
+	ahash_request_set_crypt(&preq->req, preq->src, NULL, padlen);
+
+	err = crypto_ahash_update(&preq->req);
+	if (err)
+		return err;
+
+	return poly_cipher(req);
+}
+
+static void poly_ad_done(struct crypto_async_request *areq, int err)
+{
+	async_done_continue(areq->data, err, poly_adpad);
+}
+
+static int poly_ad(struct aead_request *req)
+{
+	struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+	struct poly_req *preq = &rctx->u.poly;
+	int err;
+
+	ahash_request_set_callback(&preq->req, aead_request_flags(req),
+				   poly_ad_done, req);
+	ahash_request_set_tfm(&preq->req, ctx->poly);
+	ahash_request_set_crypt(&preq->req, req->assoc, NULL, req->assoclen);
+
+	err = crypto_ahash_update(&preq->req);
+	if (err)
+		return err;
+
+	return poly_adpad(req);
+}
+
+static void poly_setkey_done(struct crypto_async_request *areq, int err)
+{
+	async_done_continue(areq->data, err, poly_ad);
+}
+
+static int poly_setkey(struct aead_request *req)
+{
+	struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+	struct poly_req *preq = &rctx->u.poly;
+	int err;
+
+	sg_init_table(preq->src, 1);
+	sg_set_buf(preq->src, rctx->key, sizeof(rctx->key));
+
+	ahash_request_set_callback(&preq->req, aead_request_flags(req),
+				   poly_setkey_done, req);
+	ahash_request_set_tfm(&preq->req, ctx->poly);
+	ahash_request_set_crypt(&preq->req, preq->src, NULL, sizeof(rctx->key));
+
+	err = crypto_ahash_update(&preq->req);
+	if (err)
+		return err;
+
+	return poly_ad(req);
+}
+
+static void poly_init_done(struct crypto_async_request *areq, int err)
+{
+	async_done_continue(areq->data, err, poly_setkey);
+}
+
+static int poly_init(struct aead_request *req)
+{
+	struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+	struct poly_req *preq = &rctx->u.poly;
+	int err;
+
+	ahash_request_set_callback(&preq->req, aead_request_flags(req),
+				   poly_init_done, req);
+	ahash_request_set_tfm(&preq->req, ctx->poly);
+
+	err = crypto_ahash_init(&preq->req);
+	if (err)
+		return err;
+
+	return poly_setkey(req);
+}
+
+static void poly_genkey_done(struct crypto_async_request *areq, int err)
+{
+	async_done_continue(areq->data, err, poly_init);
+}
+
+static int poly_genkey(struct aead_request *req)
+{
+	struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+	struct chacha_req *creq = &rctx->u.chacha;
+	int err;
+
+	sg_init_table(creq->src, 1);
+	memset(rctx->key, 0, sizeof(rctx->key));
+	sg_set_buf(creq->src, rctx->key, sizeof(rctx->key));
+
+	chacha_iv(creq->iv, req, 0);
+
+	ablkcipher_request_set_callback(&creq->req, aead_request_flags(req),
+					poly_genkey_done, req);
+	ablkcipher_request_set_tfm(&creq->req, ctx->chacha);
+	ablkcipher_request_set_crypt(&creq->req, creq->src, creq->src,
+				     POLY1305_KEY_SIZE, creq->iv);
+
+	err = crypto_ablkcipher_decrypt(&creq->req);
+	if (err)
+		return err;
+
+	return poly_init(req);
+}
+
+static void chacha_encrypt_done(struct crypto_async_request *areq, int err)
+{
+	async_done_continue(areq->data, err, poly_genkey);
+}
+
+static int chacha_encrypt(struct aead_request *req)
+{
+	struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+	struct chacha_req *creq = &rctx->u.chacha;
+	int err;
+
+	chacha_iv(creq->iv, req, 1);
+
+	ablkcipher_request_set_callback(&creq->req, aead_request_flags(req),
+					chacha_encrypt_done, req);
+	ablkcipher_request_set_tfm(&creq->req, ctx->chacha);
+	ablkcipher_request_set_crypt(&creq->req, req->src, req->dst,
+				     req->cryptlen, creq->iv);
+	err = crypto_ablkcipher_encrypt(&creq->req);
+	if (err)
+		return err;
+
+	return poly_genkey(req);
+}
+
+static int chachapoly_encrypt(struct aead_request *req)
+{
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+
+	rctx->cryptlen = req->cryptlen;
+
+	/* encrypt call chain:
+	 * - chacha_encrypt/done()
+	 * - poly_genkey/done()
+	 * - poly_init/done()
+	 * - poly_setkey/done()
+	 * - poly_ad/done()
+	 * - poly_adpad/done()
+	 * - poly_cipher/done()
+	 * - poly_cipherpad/done()
+	 * - poly_tail/done/continue()
+	 * - poly_copy_tag()
+	 */
+	return chacha_encrypt(req);
+}
+
+static int chachapoly_decrypt(struct aead_request *req)
+{
+	struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
+
+	if (req->cryptlen < POLY1305_DIGEST_SIZE)
+		return -EINVAL;
+	rctx->cryptlen = req->cryptlen - POLY1305_DIGEST_SIZE;
+
+	/* decrypt call chain:
+	 * - poly_genkey/done()
+	 * - poly_init/done()
+	 * - poly_setkey/done()
+	 * - poly_ad/done()
+	 * - poly_adpad/done()
+	 * - poly_cipher/done()
+	 * - poly_cipherpad/done()
+	 * - poly_tail/done/continue()
+	 * - chacha_decrypt/done()
+	 * - poly_verify_tag()
+	 */
+	return poly_genkey(req);
+}
+
+static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key,
+			     unsigned int keylen)
+{
+	struct chachapoly_ctx *ctx = crypto_aead_ctx(aead);
+	int err;
+
+	if (keylen != ctx->saltlen + CHACHA20_KEY_SIZE)
+		return -EINVAL;
+
+	keylen -= ctx->saltlen;
+	memcpy(ctx->salt, key + keylen, ctx->saltlen);
+
+	crypto_ablkcipher_clear_flags(ctx->chacha, CRYPTO_TFM_REQ_MASK);
+	crypto_ablkcipher_set_flags(ctx->chacha, crypto_aead_get_flags(aead) &
+				    CRYPTO_TFM_REQ_MASK);
+
+	err = crypto_ablkcipher_setkey(ctx->chacha, key, keylen);
+	crypto_aead_set_flags(aead, crypto_ablkcipher_get_flags(ctx->chacha) &
+			      CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static int chachapoly_setauthsize(struct crypto_aead *tfm,
+				  unsigned int authsize)
+{
+	if (authsize != POLY1305_DIGEST_SIZE)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int chachapoly_init(struct crypto_tfm *tfm)
+{
+	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct chachapoly_instance_ctx *ictx = crypto_instance_ctx(inst);
+	struct chachapoly_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_ablkcipher *chacha;
+	struct crypto_ahash *poly;
+	unsigned long align;
+
+	poly = crypto_spawn_ahash(&ictx->poly);
+	if (IS_ERR(poly))
+		return PTR_ERR(poly);
+
+	chacha = crypto_spawn_skcipher(&ictx->chacha);
+	if (IS_ERR(chacha)) {
+		crypto_free_ahash(poly);
+		return PTR_ERR(chacha);
+	}
+
+	ctx->chacha = chacha;
+	ctx->poly = poly;
+	ctx->saltlen = ictx->saltlen;
+
+	align = crypto_tfm_alg_alignmask(tfm);
+	align &= ~(crypto_tfm_ctx_alignment() - 1);
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+				align + offsetof(struct chachapoly_req_ctx, u) +
+				max(offsetof(struct chacha_req, req) +
+				    sizeof(struct ablkcipher_request) +
+				    crypto_ablkcipher_reqsize(chacha),
+				    offsetof(struct poly_req, req) +
+				    sizeof(struct ahash_request) +
+				    crypto_ahash_reqsize(poly)));
+
+	return 0;
+}
+
+static void chachapoly_exit(struct crypto_tfm *tfm)
+{
+	struct chachapoly_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_ahash(ctx->poly);
+	crypto_free_ablkcipher(ctx->chacha);
+}
+
+static struct crypto_instance *chachapoly_alloc(struct rtattr **tb,
+						const char *name,
+						unsigned int ivsize)
+{
+	struct crypto_attr_type *algt;
+	struct crypto_instance *inst;
+	struct crypto_alg *chacha;
+	struct crypto_alg *poly;
+	struct ahash_alg *poly_ahash;
+	struct chachapoly_instance_ctx *ctx;
+	const char *chacha_name, *poly_name;
+	int err;
+
+	if (ivsize > CHACHAPOLY_IV_SIZE)
+		return ERR_PTR(-EINVAL);
+
+	algt = crypto_get_attr_type(tb);
+	if (IS_ERR(algt))
+		return ERR_CAST(algt);
+
+	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
+		return ERR_PTR(-EINVAL);
+
+	chacha_name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(chacha_name))
+		return ERR_CAST(chacha_name);
+	poly_name = crypto_attr_alg_name(tb[2]);
+	if (IS_ERR(poly_name))
+		return ERR_CAST(poly_name);
+
+	poly = crypto_find_alg(poly_name, &crypto_ahash_type,
+			       CRYPTO_ALG_TYPE_HASH,
+			       CRYPTO_ALG_TYPE_AHASH_MASK);
+	if (IS_ERR(poly))
+		return ERR_CAST(poly);
+
+	err = -ENOMEM;
+	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
+	if (!inst)
+		goto out_put_poly;
+
+	ctx = crypto_instance_ctx(inst);
+	ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize;
+	poly_ahash = container_of(poly, struct ahash_alg, halg.base);
+	err = crypto_init_ahash_spawn(&ctx->poly, &poly_ahash->halg, inst);
+	if (err)
+		goto err_free_inst;
+
+	crypto_set_skcipher_spawn(&ctx->chacha, inst);
+	err = crypto_grab_skcipher(&ctx->chacha, chacha_name, 0,
+				   crypto_requires_sync(algt->type,
+							algt->mask));
+	if (err)
+		goto err_drop_poly;
+
+	chacha = crypto_skcipher_spawn_alg(&ctx->chacha);
+
+	err = -EINVAL;
+	/* Need 16-byte IV size, including Initial Block Counter value */
+	if (chacha->cra_ablkcipher.ivsize != CHACHA20_IV_SIZE)
+		goto out_drop_chacha;
+	/* Not a stream cipher? */
+	if (chacha->cra_blocksize != 1)
+		goto out_drop_chacha;
+
+	err = -ENAMETOOLONG;
+	if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
+		     "%s(%s,%s)", name, chacha_name,
+		     poly_name) >= CRYPTO_MAX_ALG_NAME)
+		goto out_drop_chacha;
+	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		     "%s(%s,%s)", name, chacha->cra_driver_name,
+		     poly->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+		goto out_drop_chacha;
+
+	inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD;
+	inst->alg.cra_flags |= (chacha->cra_flags |
+				poly->cra_flags) & CRYPTO_ALG_ASYNC;
+	inst->alg.cra_priority = (chacha->cra_priority +
+				  poly->cra_priority) / 2;
+	inst->alg.cra_blocksize = 1;
+	inst->alg.cra_alignmask = chacha->cra_alignmask | poly->cra_alignmask;
+	inst->alg.cra_type = &crypto_nivaead_type;
+	inst->alg.cra_aead.ivsize = ivsize;
+	inst->alg.cra_aead.maxauthsize = POLY1305_DIGEST_SIZE;
+	inst->alg.cra_ctxsize = sizeof(struct chachapoly_ctx) + ctx->saltlen;
+	inst->alg.cra_init = chachapoly_init;
+	inst->alg.cra_exit = chachapoly_exit;
+	inst->alg.cra_aead.encrypt = chachapoly_encrypt;
+	inst->alg.cra_aead.decrypt = chachapoly_decrypt;
+	inst->alg.cra_aead.setkey = chachapoly_setkey;
+	inst->alg.cra_aead.setauthsize = chachapoly_setauthsize;
+	inst->alg.cra_aead.geniv = "seqiv";
+
+out:
+	crypto_mod_put(poly);
+	return inst;
+
+out_drop_chacha:
+	crypto_drop_skcipher(&ctx->chacha);
+err_drop_poly:
+	crypto_drop_ahash(&ctx->poly);
+err_free_inst:
+	kfree(inst);
+out_put_poly:
+	inst = ERR_PTR(err);
+	goto out;
+}
+
+static struct crypto_instance *rfc7539_alloc(struct rtattr **tb)
+{
+	return chachapoly_alloc(tb, "rfc7539", 12);
+}
+
+static struct crypto_instance *rfc7539esp_alloc(struct rtattr **tb)
+{
+	return chachapoly_alloc(tb, "rfc7539esp", 8);
+}
+
+static void chachapoly_free(struct crypto_instance *inst)
+{
+	struct chachapoly_instance_ctx *ctx = crypto_instance_ctx(inst);
+
+	crypto_drop_skcipher(&ctx->chacha);
+	crypto_drop_ahash(&ctx->poly);
+	kfree(inst);
+}
+
+static struct crypto_template rfc7539_tmpl = {
+	.name = "rfc7539",
+	.alloc = rfc7539_alloc,
+	.free = chachapoly_free,
+	.module = THIS_MODULE,
+};
+
+static struct crypto_template rfc7539esp_tmpl = {
+	.name = "rfc7539esp",
+	.alloc = rfc7539esp_alloc,
+	.free = chachapoly_free,
+	.module = THIS_MODULE,
+};
+
+static int __init chacha20poly1305_module_init(void)
+{
+	int err;
+
+	err = crypto_register_template(&rfc7539_tmpl);
+	if (err)
+		return err;
+
+	err = crypto_register_template(&rfc7539esp_tmpl);
+	if (err)
+		crypto_unregister_template(&rfc7539_tmpl);
+
+	return err;
+}
+
+static void __exit chacha20poly1305_module_exit(void)
+{
+	crypto_unregister_template(&rfc7539esp_tmpl);
+	crypto_unregister_template(&rfc7539_tmpl);
+}
+
+module_init(chacha20poly1305_module_init);
+module_exit(chacha20poly1305_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("ChaCha20-Poly1305 AEAD");
+MODULE_ALIAS_CRYPTO("chacha20poly1305");
+MODULE_ALIAS_CRYPTO("rfc7539");
+MODULE_ALIAS_CRYPTO("rfc7539esp");

+ 30 - 75
crypto/chainiv.c

@@ -80,44 +80,37 @@ unlock:
 	return err;
 }
 
-static int chainiv_givencrypt_first(struct skcipher_givcrypt_request *req)
+static int chainiv_init_common(struct crypto_tfm *tfm, char iv[])
 {
-	struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
-	struct chainiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
+	struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm);
 	int err = 0;
 
-	spin_lock_bh(&ctx->lock);
-	if (crypto_ablkcipher_crt(geniv)->givencrypt !=
-	    chainiv_givencrypt_first)
-		goto unlock;
-
-	crypto_ablkcipher_crt(geniv)->givencrypt = chainiv_givencrypt;
-	err = crypto_rng_get_bytes(crypto_default_rng, ctx->iv,
-				   crypto_ablkcipher_ivsize(geniv));
-
-unlock:
-	spin_unlock_bh(&ctx->lock);
-
-	if (err)
-		return err;
-
-	return chainiv_givencrypt(req);
-}
-
-static int chainiv_init_common(struct crypto_tfm *tfm)
-{
 	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request);
 
-	return skcipher_geniv_init(tfm);
+	if (iv) {
+		err = crypto_rng_get_bytes(crypto_default_rng, iv,
+					   crypto_ablkcipher_ivsize(geniv));
+		crypto_put_default_rng();
+	}
+
+	return err ?: skcipher_geniv_init(tfm);
 }
 
 static int chainiv_init(struct crypto_tfm *tfm)
 {
+	struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm);
 	struct chainiv_ctx *ctx = crypto_tfm_ctx(tfm);
+	char *iv;
 
 	spin_lock_init(&ctx->lock);
 
-	return chainiv_init_common(tfm);
+	iv = NULL;
+	if (!crypto_get_default_rng()) {
+		crypto_ablkcipher_crt(geniv)->givencrypt = chainiv_givencrypt;
+		iv = ctx->iv;
+	}
+
+	return chainiv_init_common(tfm, iv);
 }
 
 static int async_chainiv_schedule_work(struct async_chainiv_ctx *ctx)
@@ -205,33 +198,6 @@ postpone:
 	return async_chainiv_postpone_request(req);
 }
 
-static int async_chainiv_givencrypt_first(struct skcipher_givcrypt_request *req)
-{
-	struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
-	struct async_chainiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
-	int err = 0;
-
-	if (test_and_set_bit(CHAINIV_STATE_INUSE, &ctx->state))
-		goto out;
-
-	if (crypto_ablkcipher_crt(geniv)->givencrypt !=
-	    async_chainiv_givencrypt_first)
-		goto unlock;
-
-	crypto_ablkcipher_crt(geniv)->givencrypt = async_chainiv_givencrypt;
-	err = crypto_rng_get_bytes(crypto_default_rng, ctx->iv,
-				   crypto_ablkcipher_ivsize(geniv));
-
-unlock:
-	clear_bit(CHAINIV_STATE_INUSE, &ctx->state);
-
-	if (err)
-		return err;
-
-out:
-	return async_chainiv_givencrypt(req);
-}
-
 static void async_chainiv_do_postponed(struct work_struct *work)
 {
 	struct async_chainiv_ctx *ctx = container_of(work,
@@ -263,14 +229,23 @@ static void async_chainiv_do_postponed(struct work_struct *work)
 
 static int async_chainiv_init(struct crypto_tfm *tfm)
 {
+	struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm);
 	struct async_chainiv_ctx *ctx = crypto_tfm_ctx(tfm);
+	char *iv;
 
 	spin_lock_init(&ctx->lock);
 
 	crypto_init_queue(&ctx->queue, 100);
 	INIT_WORK(&ctx->postponed, async_chainiv_do_postponed);
 
-	return chainiv_init_common(tfm);
+	iv = NULL;
+	if (!crypto_get_default_rng()) {
+		crypto_ablkcipher_crt(geniv)->givencrypt =
+			async_chainiv_givencrypt;
+		iv = ctx->iv;
+	}
+
+	return chainiv_init_common(tfm, iv);
 }
 
 static void async_chainiv_exit(struct crypto_tfm *tfm)
@@ -288,21 +263,14 @@ static struct crypto_instance *chainiv_alloc(struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
 	struct crypto_instance *inst;
-	int err;
 
 	algt = crypto_get_attr_type(tb);
 	if (IS_ERR(algt))
 		return ERR_CAST(algt);
 
-	err = crypto_get_default_rng();
-	if (err)
-		return ERR_PTR(err);
-
 	inst = skcipher_geniv_alloc(&chainiv_tmpl, tb, 0, 0);
 	if (IS_ERR(inst))
-		goto put_rng;
-
-	inst->alg.cra_ablkcipher.givencrypt = chainiv_givencrypt_first;
+		goto out;
 
 	inst->alg.cra_init = chainiv_init;
 	inst->alg.cra_exit = skcipher_geniv_exit;
@@ -312,9 +280,6 @@ static struct crypto_instance *chainiv_alloc(struct rtattr **tb)
 	if (!crypto_requires_sync(algt->type, algt->mask)) {
 		inst->alg.cra_flags |= CRYPTO_ALG_ASYNC;
 
-		inst->alg.cra_ablkcipher.givencrypt =
-			async_chainiv_givencrypt_first;
-
 		inst->alg.cra_init = async_chainiv_init;
 		inst->alg.cra_exit = async_chainiv_exit;
 
@@ -325,22 +290,12 @@ static struct crypto_instance *chainiv_alloc(struct rtattr **tb)
 
 out:
 	return inst;
-
-put_rng:
-	crypto_put_default_rng();
-	goto out;
-}
-
-static void chainiv_free(struct crypto_instance *inst)
-{
-	skcipher_geniv_free(inst);
-	crypto_put_default_rng();
 }
 
 static struct crypto_template chainiv_tmpl = {
 	.name = "chainiv",
 	.alloc = chainiv_alloc,
-	.free = chainiv_free,
+	.free = skcipher_geniv_free,
 	.module = THIS_MODULE,
 };
 

+ 81 - 54
crypto/cryptd.c

@@ -295,6 +295,23 @@ static void cryptd_blkcipher_exit_tfm(struct crypto_tfm *tfm)
 	crypto_free_blkcipher(ctx->child);
 }
 
+static int cryptd_init_instance(struct crypto_instance *inst,
+				struct crypto_alg *alg)
+{
+	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		     "cryptd(%s)",
+		     alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+		return -ENAMETOOLONG;
+
+	memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
+
+	inst->alg.cra_priority = alg->cra_priority + 50;
+	inst->alg.cra_blocksize = alg->cra_blocksize;
+	inst->alg.cra_alignmask = alg->cra_alignmask;
+
+	return 0;
+}
+
 static void *cryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
 				   unsigned int tail)
 {
@@ -308,17 +325,10 @@ static void *cryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
 
 	inst = (void *)(p + head);
 
-	err = -ENAMETOOLONG;
-	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-		     "cryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+	err = cryptd_init_instance(inst, alg);
+	if (err)
 		goto out_free_inst;
 
-	memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
-
-	inst->alg.cra_priority = alg->cra_priority + 50;
-	inst->alg.cra_blocksize = alg->cra_blocksize;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
-
 out:
 	return p;
 
@@ -654,6 +664,24 @@ out_put_alg:
 	return err;
 }
 
+static int cryptd_aead_setkey(struct crypto_aead *parent,
+			      const u8 *key, unsigned int keylen)
+{
+	struct cryptd_aead_ctx *ctx = crypto_aead_ctx(parent);
+	struct crypto_aead *child = ctx->child;
+
+	return crypto_aead_setkey(child, key, keylen);
+}
+
+static int cryptd_aead_setauthsize(struct crypto_aead *parent,
+				   unsigned int authsize)
+{
+	struct cryptd_aead_ctx *ctx = crypto_aead_ctx(parent);
+	struct crypto_aead *child = ctx->child;
+
+	return crypto_aead_setauthsize(child, authsize);
+}
+
 static void cryptd_aead_crypt(struct aead_request *req,
 			struct crypto_aead *child,
 			int err,
@@ -715,27 +743,26 @@ static int cryptd_aead_decrypt_enqueue(struct aead_request *req)
 	return cryptd_aead_enqueue(req, cryptd_aead_decrypt );
 }
 
-static int cryptd_aead_init_tfm(struct crypto_tfm *tfm)
+static int cryptd_aead_init_tfm(struct crypto_aead *tfm)
 {
-	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
-	struct aead_instance_ctx *ictx = crypto_instance_ctx(inst);
+	struct aead_instance *inst = aead_alg_instance(tfm);
+	struct aead_instance_ctx *ictx = aead_instance_ctx(inst);
 	struct crypto_aead_spawn *spawn = &ictx->aead_spawn;
-	struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct cryptd_aead_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_aead *cipher;
 
 	cipher = crypto_spawn_aead(spawn);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
-	crypto_aead_set_flags(cipher, CRYPTO_TFM_REQ_MAY_SLEEP);
 	ctx->child = cipher;
-	tfm->crt_aead.reqsize = sizeof(struct cryptd_aead_request_ctx);
+	crypto_aead_set_reqsize(tfm, sizeof(struct cryptd_aead_request_ctx));
 	return 0;
 }
 
-static void cryptd_aead_exit_tfm(struct crypto_tfm *tfm)
+static void cryptd_aead_exit_tfm(struct crypto_aead *tfm)
 {
-	struct cryptd_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct cryptd_aead_ctx *ctx = crypto_aead_ctx(tfm);
 	crypto_free_aead(ctx->child);
 }
 
@@ -744,57 +771,57 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
 			      struct cryptd_queue *queue)
 {
 	struct aead_instance_ctx *ctx;
-	struct crypto_instance *inst;
-	struct crypto_alg *alg;
-	u32 type = CRYPTO_ALG_TYPE_AEAD;
-	u32 mask = CRYPTO_ALG_TYPE_MASK;
+	struct aead_instance *inst;
+	struct aead_alg *alg;
+	const char *name;
+	u32 type = 0;
+	u32 mask = 0;
 	int err;
 
 	cryptd_check_internal(tb, &type, &mask);
 
-	alg = crypto_get_attr_alg(tb, type, mask);
-        if (IS_ERR(alg))
-		return PTR_ERR(alg);
+	name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(name))
+		return PTR_ERR(name);
 
-	inst = cryptd_alloc_instance(alg, 0, sizeof(*ctx));
-	err = PTR_ERR(inst);
-	if (IS_ERR(inst))
-		goto out_put_alg;
+	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
 
-	ctx = crypto_instance_ctx(inst);
+	ctx = aead_instance_ctx(inst);
 	ctx->queue = queue;
 
-	err = crypto_init_spawn(&ctx->aead_spawn.base, alg, inst,
-			CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC);
+	crypto_set_aead_spawn(&ctx->aead_spawn, aead_crypto_instance(inst));
+	err = crypto_grab_aead(&ctx->aead_spawn, name, type, mask);
 	if (err)
 		goto out_free_inst;
 
-	type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
-	if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
-		type |= CRYPTO_ALG_INTERNAL;
-	inst->alg.cra_flags = type;
-	inst->alg.cra_type = alg->cra_type;
-	inst->alg.cra_ctxsize = sizeof(struct cryptd_aead_ctx);
-	inst->alg.cra_init = cryptd_aead_init_tfm;
-	inst->alg.cra_exit = cryptd_aead_exit_tfm;
-	inst->alg.cra_aead.setkey      = alg->cra_aead.setkey;
-	inst->alg.cra_aead.setauthsize = alg->cra_aead.setauthsize;
-	inst->alg.cra_aead.geniv       = alg->cra_aead.geniv;
-	inst->alg.cra_aead.ivsize      = alg->cra_aead.ivsize;
-	inst->alg.cra_aead.maxauthsize = alg->cra_aead.maxauthsize;
-	inst->alg.cra_aead.encrypt     = cryptd_aead_encrypt_enqueue;
-	inst->alg.cra_aead.decrypt     = cryptd_aead_decrypt_enqueue;
-	inst->alg.cra_aead.givencrypt  = alg->cra_aead.givencrypt;
-	inst->alg.cra_aead.givdecrypt  = alg->cra_aead.givdecrypt;
+	alg = crypto_spawn_aead_alg(&ctx->aead_spawn);
+	err = cryptd_init_instance(aead_crypto_instance(inst), &alg->base);
+	if (err)
+		goto out_drop_aead;
 
-	err = crypto_register_instance(tmpl, inst);
+	inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC |
+				   (alg->base.cra_flags & CRYPTO_ALG_INTERNAL);
+	inst->alg.base.cra_ctxsize = sizeof(struct cryptd_aead_ctx);
+
+	inst->alg.ivsize = crypto_aead_alg_ivsize(alg);
+	inst->alg.maxauthsize = crypto_aead_alg_maxauthsize(alg);
+
+	inst->alg.init = cryptd_aead_init_tfm;
+	inst->alg.exit = cryptd_aead_exit_tfm;
+	inst->alg.setkey = cryptd_aead_setkey;
+	inst->alg.setauthsize = cryptd_aead_setauthsize;
+	inst->alg.encrypt = cryptd_aead_encrypt_enqueue;
+	inst->alg.decrypt = cryptd_aead_decrypt_enqueue;
+
+	err = aead_register_instance(tmpl, inst);
 	if (err) {
-		crypto_drop_spawn(&ctx->aead_spawn.base);
+out_drop_aead:
+		crypto_drop_aead(&ctx->aead_spawn);
 out_free_inst:
 		kfree(inst);
 	}
-out_put_alg:
-	crypto_mod_put(alg);
 	return err;
 }
 
@@ -832,8 +859,8 @@ static void cryptd_free(struct crypto_instance *inst)
 		kfree(ahash_instance(inst));
 		return;
 	case CRYPTO_ALG_TYPE_AEAD:
-		crypto_drop_spawn(&aead_ctx->aead_spawn.base);
-		kfree(inst);
+		crypto_drop_aead(&aead_ctx->aead_spawn);
+		kfree(aead_instance(inst));
 		return;
 	default:
 		crypto_drop_spawn(&ctx->spawn);

+ 39 - 0
crypto/crypto_null.c

@@ -25,6 +25,10 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 
+static DEFINE_MUTEX(crypto_default_null_skcipher_lock);
+static struct crypto_blkcipher *crypto_default_null_skcipher;
+static int crypto_default_null_skcipher_refcnt;
+
 static int null_compress(struct crypto_tfm *tfm, const u8 *src,
 			 unsigned int slen, u8 *dst, unsigned int *dlen)
 {
@@ -149,6 +153,41 @@ MODULE_ALIAS_CRYPTO("compress_null");
 MODULE_ALIAS_CRYPTO("digest_null");
 MODULE_ALIAS_CRYPTO("cipher_null");
 
+struct crypto_blkcipher *crypto_get_default_null_skcipher(void)
+{
+	struct crypto_blkcipher *tfm;
+
+	mutex_lock(&crypto_default_null_skcipher_lock);
+	tfm = crypto_default_null_skcipher;
+
+	if (!tfm) {
+		tfm = crypto_alloc_blkcipher("ecb(cipher_null)", 0, 0);
+		if (IS_ERR(tfm))
+			goto unlock;
+
+		crypto_default_null_skcipher = tfm;
+	}
+
+	crypto_default_null_skcipher_refcnt++;
+
+unlock:
+	mutex_unlock(&crypto_default_null_skcipher_lock);
+
+	return tfm;
+}
+EXPORT_SYMBOL_GPL(crypto_get_default_null_skcipher);
+
+void crypto_put_default_null_skcipher(void)
+{
+	mutex_lock(&crypto_default_null_skcipher_lock);
+	if (!--crypto_default_null_skcipher_refcnt) {
+		crypto_free_blkcipher(crypto_default_null_skcipher);
+		crypto_default_null_skcipher = NULL;
+	}
+	mutex_unlock(&crypto_default_null_skcipher_lock);
+}
+EXPORT_SYMBOL_GPL(crypto_put_default_null_skcipher);
+
 static int __init crypto_null_mod_init(void)
 {
 	int ret = 0;

+ 33 - 1
crypto/crypto_user.c

@@ -27,6 +27,8 @@
 #include <net/net_namespace.h>
 #include <crypto/internal/aead.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/internal/rng.h>
+#include <crypto/akcipher.h>
 
 #include "internal.h"
 
@@ -110,6 +112,21 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_akcipher rakcipher;
+
+	strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
+
+	if (nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER,
+		    sizeof(struct crypto_report_akcipher), &rakcipher))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
 static int crypto_report_one(struct crypto_alg *alg,
 			     struct crypto_user_alg *ualg, struct sk_buff *skb)
 {
@@ -154,6 +171,12 @@ static int crypto_report_one(struct crypto_alg *alg,
 			goto nla_put_failure;
 
 		break;
+
+	case CRYPTO_ALG_TYPE_AKCIPHER:
+		if (crypto_report_akcipher(skb, alg))
+			goto nla_put_failure;
+
+		break;
 	}
 
 out:
@@ -450,13 +473,21 @@ static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	return 0;
 }
 
+static int crypto_del_rng(struct sk_buff *skb, struct nlmsghdr *nlh,
+			  struct nlattr **attrs)
+{
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
+		return -EPERM;
+	return crypto_del_default_rng();
+}
+
 #define MSGSIZE(type) sizeof(struct type)
 
 static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
 	[CRYPTO_MSG_NEWALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_DELALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_UPDATEALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
-	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = 0,
 };
 
 static const struct nla_policy crypto_policy[CRYPTOCFGA_MAX+1] = {
@@ -476,6 +507,7 @@ static const struct crypto_link {
 	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = { .doit = crypto_report,
 						       .dump = crypto_dump_report,
 						       .done = crypto_dump_report_done},
+	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = { .doit = crypto_del_rng },
 };
 
 static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)

+ 298 - 269
crypto/drbg.c

@@ -98,6 +98,7 @@
  */
 
 #include <crypto/drbg.h>
+#include <linux/kernel.h>
 
 /***************************************************************
  * Backend cipher definitions available to DRBG
@@ -190,6 +191,8 @@ static const struct drbg_core drbg_cores[] = {
 #endif /* CONFIG_CRYPTO_DRBG_HMAC */
 };
 
+static int drbg_uninstantiate(struct drbg_state *drbg);
+
 /******************************************************************
  * Generic helper functions
  ******************************************************************/
@@ -235,7 +238,7 @@ static bool drbg_fips_continuous_test(struct drbg_state *drbg,
 #ifdef CONFIG_CRYPTO_FIPS
 	int ret = 0;
 	/* skip test if we test the overall system */
-	if (drbg->test_data)
+	if (list_empty(&drbg->test_data.list))
 		return true;
 	/* only perform test in FIPS mode */
 	if (0 == fips_enabled)
@@ -487,7 +490,7 @@ static int drbg_ctr_df(struct drbg_state *drbg,
 
 out:
 	memset(iv, 0, drbg_blocklen(drbg));
-	memset(temp, 0, drbg_statelen(drbg));
+	memset(temp, 0, drbg_statelen(drbg) + drbg_blocklen(drbg));
 	memset(pad, 0, drbg_blocklen(drbg));
 	return ret;
 }
@@ -1041,6 +1044,58 @@ static struct drbg_state_ops drbg_hash_ops = {
  * Functions common for DRBG implementations
  ******************************************************************/
 
+static inline int __drbg_seed(struct drbg_state *drbg, struct list_head *seed,
+			      int reseed)
+{
+	int ret = drbg->d_ops->update(drbg, seed, reseed);
+
+	if (ret)
+		return ret;
+
+	drbg->seeded = true;
+	/* 10.1.1.2 / 10.1.1.3 step 5 */
+	drbg->reseed_ctr = 1;
+
+	return ret;
+}
+
+static void drbg_async_seed(struct work_struct *work)
+{
+	struct drbg_string data;
+	LIST_HEAD(seedlist);
+	struct drbg_state *drbg = container_of(work, struct drbg_state,
+					       seed_work);
+	unsigned int entropylen = drbg_sec_strength(drbg->core->flags);
+	unsigned char entropy[32];
+
+	BUG_ON(!entropylen);
+	BUG_ON(entropylen > sizeof(entropy));
+	get_random_bytes(entropy, entropylen);
+
+	drbg_string_fill(&data, entropy, entropylen);
+	list_add_tail(&data.list, &seedlist);
+
+	mutex_lock(&drbg->drbg_mutex);
+
+	/* If nonblocking pool is initialized, deactivate Jitter RNG */
+	crypto_free_rng(drbg->jent);
+	drbg->jent = NULL;
+
+	/* Set seeded to false so that if __drbg_seed fails the
+	 * next generate call will trigger a reseed.
+	 */
+	drbg->seeded = false;
+
+	__drbg_seed(drbg, &seedlist, true);
+
+	if (drbg->seeded)
+		drbg->reseed_threshold = drbg_max_requests(drbg);
+
+	mutex_unlock(&drbg->drbg_mutex);
+
+	memzero_explicit(entropy, entropylen);
+}
+
 /*
  * Seeding or reseeding of the DRBG
  *
@@ -1055,9 +1110,9 @@ static struct drbg_state_ops drbg_hash_ops = {
 static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers,
 		     bool reseed)
 {
-	int ret = 0;
-	unsigned char *entropy = NULL;
-	size_t entropylen = 0;
+	int ret;
+	unsigned char entropy[((32 + 16) * 2)];
+	unsigned int entropylen = drbg_sec_strength(drbg->core->flags);
 	struct drbg_string data1;
 	LIST_HEAD(seedlist);
 
@@ -1068,31 +1123,45 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers,
 		return -EINVAL;
 	}
 
-	if (drbg->test_data && drbg->test_data->testentropy) {
-		drbg_string_fill(&data1, drbg->test_data->testentropy->buf,
-				 drbg->test_data->testentropy->len);
+	if (list_empty(&drbg->test_data.list)) {
+		drbg_string_fill(&data1, drbg->test_data.buf,
+				 drbg->test_data.len);
 		pr_devel("DRBG: using test entropy\n");
 	} else {
 		/*
 		 * Gather entropy equal to the security strength of the DRBG.
 		 * With a derivation function, a nonce is required in addition
 		 * to the entropy. A nonce must be at least 1/2 of the security
-		 * strength of the DRBG in size. Thus, entropy * nonce is 3/2
+		 * strength of the DRBG in size. Thus, entropy + nonce is 3/2
 		 * of the strength. The consideration of a nonce is only
 		 * applicable during initial seeding.
 		 */
-		entropylen = drbg_sec_strength(drbg->core->flags);
-		if (!entropylen)
-			return -EFAULT;
+		BUG_ON(!entropylen);
 		if (!reseed)
 			entropylen = ((entropylen + 1) / 2) * 3;
-		pr_devel("DRBG: (re)seeding with %zu bytes of entropy\n",
-			 entropylen);
-		entropy = kzalloc(entropylen, GFP_KERNEL);
-		if (!entropy)
-			return -ENOMEM;
+		BUG_ON((entropylen * 2) > sizeof(entropy));
+
+		/* Get seed from in-kernel /dev/urandom */
 		get_random_bytes(entropy, entropylen);
-		drbg_string_fill(&data1, entropy, entropylen);
+
+		if (!drbg->jent) {
+			drbg_string_fill(&data1, entropy, entropylen);
+			pr_devel("DRBG: (re)seeding with %u bytes of entropy\n",
+				 entropylen);
+		} else {
+			/* Get seed from Jitter RNG */
+			ret = crypto_rng_get_bytes(drbg->jent,
+						   entropy + entropylen,
+						   entropylen);
+			if (ret) {
+				pr_devel("DRBG: jent failed with %d\n", ret);
+				return ret;
+			}
+
+			drbg_string_fill(&data1, entropy, entropylen * 2);
+			pr_devel("DRBG: (re)seeding with %u bytes of entropy\n",
+				 entropylen * 2);
+		}
 	}
 	list_add_tail(&data1.list, &seedlist);
 
@@ -1111,16 +1180,10 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers,
 		memset(drbg->C, 0, drbg_statelen(drbg));
 	}
 
-	ret = drbg->d_ops->update(drbg, &seedlist, reseed);
-	if (ret)
-		goto out;
+	ret = __drbg_seed(drbg, &seedlist, reseed);
 
-	drbg->seeded = true;
-	/* 10.1.1.2 / 10.1.1.3 step 5 */
-	drbg->reseed_ctr = 1;
+	memzero_explicit(entropy, entropylen * 2);
 
-out:
-	kzfree(entropy);
 	return ret;
 }
 
@@ -1136,6 +1199,8 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg)
 	kzfree(drbg->scratchpad);
 	drbg->scratchpad = NULL;
 	drbg->reseed_ctr = 0;
+	drbg->d_ops = NULL;
+	drbg->core = NULL;
 #ifdef CONFIG_CRYPTO_FIPS
 	kzfree(drbg->prev);
 	drbg->prev = NULL;
@@ -1152,6 +1217,27 @@ static inline int drbg_alloc_state(struct drbg_state *drbg)
 	int ret = -ENOMEM;
 	unsigned int sb_size = 0;
 
+	switch (drbg->core->flags & DRBG_TYPE_MASK) {
+#ifdef CONFIG_CRYPTO_DRBG_HMAC
+	case DRBG_HMAC:
+		drbg->d_ops = &drbg_hmac_ops;
+		break;
+#endif /* CONFIG_CRYPTO_DRBG_HMAC */
+#ifdef CONFIG_CRYPTO_DRBG_HASH
+	case DRBG_HASH:
+		drbg->d_ops = &drbg_hash_ops;
+		break;
+#endif /* CONFIG_CRYPTO_DRBG_HASH */
+#ifdef CONFIG_CRYPTO_DRBG_CTR
+	case DRBG_CTR:
+		drbg->d_ops = &drbg_ctr_ops;
+		break;
+#endif /* CONFIG_CRYPTO_DRBG_CTR */
+	default:
+		ret = -EOPNOTSUPP;
+		goto err;
+	}
+
 	drbg->V = kmalloc(drbg_statelen(drbg), GFP_KERNEL);
 	if (!drbg->V)
 		goto err;
@@ -1181,87 +1267,14 @@ static inline int drbg_alloc_state(struct drbg_state *drbg)
 		if (!drbg->scratchpad)
 			goto err;
 	}
-	spin_lock_init(&drbg->drbg_lock);
-	return 0;
-
-err:
-	drbg_dealloc_state(drbg);
-	return ret;
-}
-
-/*
- * Strategy to avoid holding long term locks: generate a shadow copy of DRBG
- * and perform all operations on this shadow copy. After finishing, restore
- * the updated state of the shadow copy into original drbg state. This way,
- * only the read and write operations of the original drbg state must be
- * locked
- */
-static inline void drbg_copy_drbg(struct drbg_state *src,
-				  struct drbg_state *dst)
-{
-	if (!src || !dst)
-		return;
-	memcpy(dst->V, src->V, drbg_statelen(src));
-	memcpy(dst->C, src->C, drbg_statelen(src));
-	dst->reseed_ctr = src->reseed_ctr;
-	dst->seeded = src->seeded;
-	dst->pr = src->pr;
-#ifdef CONFIG_CRYPTO_FIPS
-	dst->fips_primed = src->fips_primed;
-	memcpy(dst->prev, src->prev, drbg_blocklen(src));
-#endif
-	/*
-	 * Not copied:
-	 * scratchpad is initialized drbg_alloc_state;
-	 * priv_data is initialized with call to crypto_init;
-	 * d_ops and core are set outside, as these parameters are const;
-	 * test_data is set outside to prevent it being copied back.
-	 */
-}
-
-static int drbg_make_shadow(struct drbg_state *drbg, struct drbg_state **shadow)
-{
-	int ret = -ENOMEM;
-	struct drbg_state *tmp = NULL;
-
-	tmp = kzalloc(sizeof(struct drbg_state), GFP_KERNEL);
-	if (!tmp)
-		return -ENOMEM;
-
-	/* read-only data as they are defined as const, no lock needed */
-	tmp->core = drbg->core;
-	tmp->d_ops = drbg->d_ops;
 
-	ret = drbg_alloc_state(tmp);
-	if (ret)
-		goto err;
-
-	spin_lock_bh(&drbg->drbg_lock);
-	drbg_copy_drbg(drbg, tmp);
-	/* only make a link to the test buffer, as we only read that data */
-	tmp->test_data = drbg->test_data;
-	spin_unlock_bh(&drbg->drbg_lock);
-	*shadow = tmp;
 	return 0;
 
 err:
-	kzfree(tmp);
+	drbg_dealloc_state(drbg);
 	return ret;
 }
 
-static void drbg_restore_shadow(struct drbg_state *drbg,
-				struct drbg_state **shadow)
-{
-	struct drbg_state *tmp = *shadow;
-
-	spin_lock_bh(&drbg->drbg_lock);
-	drbg_copy_drbg(tmp, drbg);
-	spin_unlock_bh(&drbg->drbg_lock);
-	drbg_dealloc_state(tmp);
-	kzfree(tmp);
-	*shadow = NULL;
-}
-
 /*************************************************************************
  * DRBG interface functions
  *************************************************************************/
@@ -1287,14 +1300,12 @@ static int drbg_generate(struct drbg_state *drbg,
 			 struct drbg_string *addtl)
 {
 	int len = 0;
-	struct drbg_state *shadow = NULL;
 	LIST_HEAD(addtllist);
-	struct drbg_string timestamp;
-	union {
-		cycles_t cycles;
-		unsigned char char_cycles[sizeof(cycles_t)];
-	} now;
 
+	if (!drbg->core) {
+		pr_devel("DRBG: not yet seeded\n");
+		return -EINVAL;
+	}
 	if (0 == buflen || !buf) {
 		pr_devel("DRBG: no output buffer provided\n");
 		return -EINVAL;
@@ -1304,15 +1315,9 @@ static int drbg_generate(struct drbg_state *drbg,
 		return -EINVAL;
 	}
 
-	len = drbg_make_shadow(drbg, &shadow);
-	if (len) {
-		pr_devel("DRBG: shadow copy cannot be generated\n");
-		return len;
-	}
-
 	/* 9.3.1 step 2 */
 	len = -EINVAL;
-	if (buflen > (drbg_max_request_bytes(shadow))) {
+	if (buflen > (drbg_max_request_bytes(drbg))) {
 		pr_devel("DRBG: requested random numbers too large %u\n",
 			 buflen);
 		goto err;
@@ -1321,7 +1326,7 @@ static int drbg_generate(struct drbg_state *drbg,
 	/* 9.3.1 step 3 is implicit with the chosen DRBG */
 
 	/* 9.3.1 step 4 */
-	if (addtl && addtl->len > (drbg_max_addtl(shadow))) {
+	if (addtl && addtl->len > (drbg_max_addtl(drbg))) {
 		pr_devel("DRBG: additional information string too long %zu\n",
 			 addtl->len);
 		goto err;
@@ -1332,46 +1337,29 @@ static int drbg_generate(struct drbg_state *drbg,
 	 * 9.3.1 step 6 and 9 supplemented by 9.3.2 step c is implemented
 	 * here. The spec is a bit convoluted here, we make it simpler.
 	 */
-	if ((drbg_max_requests(shadow)) < shadow->reseed_ctr)
-		shadow->seeded = false;
+	if (drbg->reseed_threshold < drbg->reseed_ctr)
+		drbg->seeded = false;
 
-	/* allocate cipher handle */
-	len = shadow->d_ops->crypto_init(shadow);
-	if (len)
-		goto err;
-
-	if (shadow->pr || !shadow->seeded) {
+	if (drbg->pr || !drbg->seeded) {
 		pr_devel("DRBG: reseeding before generation (prediction "
 			 "resistance: %s, state %s)\n",
 			 drbg->pr ? "true" : "false",
 			 drbg->seeded ? "seeded" : "unseeded");
 		/* 9.3.1 steps 7.1 through 7.3 */
-		len = drbg_seed(shadow, addtl, true);
+		len = drbg_seed(drbg, addtl, true);
 		if (len)
 			goto err;
 		/* 9.3.1 step 7.4 */
 		addtl = NULL;
 	}
 
-	/*
-	 * Mix the time stamp into the DRBG state if the DRBG is not in
-	 * test mode. If there are two callers invoking the DRBG at the same
-	 * time, i.e. before the first caller merges its shadow state back,
-	 * both callers would obtain the same random number stream without
-	 * changing the state here.
-	 */
-	if (!drbg->test_data) {
-		now.cycles = random_get_entropy();
-		drbg_string_fill(&timestamp, now.char_cycles, sizeof(cycles_t));
-		list_add_tail(&timestamp.list, &addtllist);
-	}
 	if (addtl && 0 < addtl->len)
 		list_add_tail(&addtl->list, &addtllist);
 	/* 9.3.1 step 8 and 10 */
-	len = shadow->d_ops->generate(shadow, buf, buflen, &addtllist);
+	len = drbg->d_ops->generate(drbg, buf, buflen, &addtllist);
 
 	/* 10.1.1.4 step 6, 10.1.2.5 step 7, 10.2.1.5.2 step 7 */
-	shadow->reseed_ctr++;
+	drbg->reseed_ctr++;
 	if (0 >= len)
 		goto err;
 
@@ -1391,7 +1379,7 @@ static int drbg_generate(struct drbg_state *drbg,
 	 * case somebody has a need to implement the test of 11.3.3.
 	 */
 #if 0
-	if (shadow->reseed_ctr && !(shadow->reseed_ctr % 4096)) {
+	if (drbg->reseed_ctr && !(drbg->reseed_ctr % 4096)) {
 		int err = 0;
 		pr_devel("DRBG: start to perform self test\n");
 		if (drbg->core->flags & DRBG_HMAC)
@@ -1410,8 +1398,6 @@ static int drbg_generate(struct drbg_state *drbg,
 			 * are returned when reusing this DRBG cipher handle
 			 */
 			drbg_uninstantiate(drbg);
-			drbg_dealloc_state(shadow);
-			kzfree(shadow);
 			return 0;
 		} else {
 			pr_devel("DRBG: self test successful\n");
@@ -1425,8 +1411,6 @@ static int drbg_generate(struct drbg_state *drbg,
 	 */
 	len = 0;
 err:
-	shadow->d_ops->crypto_fini(shadow);
-	drbg_restore_shadow(drbg, &shadow);
 	return len;
 }
 
@@ -1442,19 +1426,68 @@ static int drbg_generate_long(struct drbg_state *drbg,
 			      unsigned char *buf, unsigned int buflen,
 			      struct drbg_string *addtl)
 {
-	int len = 0;
+	unsigned int len = 0;
 	unsigned int slice = 0;
 	do {
-		int tmplen = 0;
+		int err = 0;
 		unsigned int chunk = 0;
 		slice = ((buflen - len) / drbg_max_request_bytes(drbg));
 		chunk = slice ? drbg_max_request_bytes(drbg) : (buflen - len);
-		tmplen = drbg_generate(drbg, buf + len, chunk, addtl);
-		if (0 >= tmplen)
-			return tmplen;
-		len += tmplen;
+		mutex_lock(&drbg->drbg_mutex);
+		err = drbg_generate(drbg, buf + len, chunk, addtl);
+		mutex_unlock(&drbg->drbg_mutex);
+		if (0 > err)
+			return err;
+		len += chunk;
 	} while (slice > 0 && (len < buflen));
-	return len;
+	return 0;
+}
+
+static void drbg_schedule_async_seed(struct random_ready_callback *rdy)
+{
+	struct drbg_state *drbg = container_of(rdy, struct drbg_state,
+					       random_ready);
+
+	schedule_work(&drbg->seed_work);
+}
+
+static int drbg_prepare_hrng(struct drbg_state *drbg)
+{
+	int err;
+
+	/* We do not need an HRNG in test mode. */
+	if (list_empty(&drbg->test_data.list))
+		return 0;
+
+	INIT_WORK(&drbg->seed_work, drbg_async_seed);
+
+	drbg->random_ready.owner = THIS_MODULE;
+	drbg->random_ready.func = drbg_schedule_async_seed;
+
+	err = add_random_ready_callback(&drbg->random_ready);
+
+	switch (err) {
+	case 0:
+		break;
+
+	case -EALREADY:
+		err = 0;
+		/* fall through */
+
+	default:
+		drbg->random_ready.func = NULL;
+		return err;
+	}
+
+	drbg->jent = crypto_alloc_rng("jitterentropy_rng", 0, 0);
+
+	/*
+	 * Require frequent reseeds until the seed source is fully
+	 * initialized.
+	 */
+	drbg->reseed_threshold = 50;
+
+	return err;
 }
 
 /*
@@ -1477,32 +1510,12 @@ static int drbg_generate_long(struct drbg_state *drbg,
 static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers,
 			    int coreref, bool pr)
 {
-	int ret = -ENOMEM;
+	int ret;
+	bool reseed = true;
 
 	pr_devel("DRBG: Initializing DRBG core %d with prediction resistance "
 		 "%s\n", coreref, pr ? "enabled" : "disabled");
-	drbg->core = &drbg_cores[coreref];
-	drbg->pr = pr;
-	drbg->seeded = false;
-	switch (drbg->core->flags & DRBG_TYPE_MASK) {
-#ifdef CONFIG_CRYPTO_DRBG_HMAC
-	case DRBG_HMAC:
-		drbg->d_ops = &drbg_hmac_ops;
-		break;
-#endif /* CONFIG_CRYPTO_DRBG_HMAC */
-#ifdef CONFIG_CRYPTO_DRBG_HASH
-	case DRBG_HASH:
-		drbg->d_ops = &drbg_hash_ops;
-		break;
-#endif /* CONFIG_CRYPTO_DRBG_HASH */
-#ifdef CONFIG_CRYPTO_DRBG_CTR
-	case DRBG_CTR:
-		drbg->d_ops = &drbg_ctr_ops;
-		break;
-#endif /* CONFIG_CRYPTO_DRBG_CTR */
-	default:
-		return -EOPNOTSUPP;
-	}
+	mutex_lock(&drbg->drbg_mutex);
 
 	/* 9.1 step 1 is implicit with the selected DRBG type */
 
@@ -1514,22 +1527,52 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers,
 
 	/* 9.1 step 4 is implicit in  drbg_sec_strength */
 
-	ret = drbg_alloc_state(drbg);
-	if (ret)
-		return ret;
+	if (!drbg->core) {
+		drbg->core = &drbg_cores[coreref];
+		drbg->pr = pr;
+		drbg->seeded = false;
+		drbg->reseed_threshold = drbg_max_requests(drbg);
 
-	ret = -EFAULT;
-	if (drbg->d_ops->crypto_init(drbg))
-		goto err;
-	ret = drbg_seed(drbg, pers, false);
-	drbg->d_ops->crypto_fini(drbg);
-	if (ret)
-		goto err;
+		ret = drbg_alloc_state(drbg);
+		if (ret)
+			goto unlock;
 
-	return 0;
+		ret = -EFAULT;
+		if (drbg->d_ops->crypto_init(drbg))
+			goto err;
+
+		ret = drbg_prepare_hrng(drbg);
+		if (ret)
+			goto free_everything;
+
+		if (IS_ERR(drbg->jent)) {
+			ret = PTR_ERR(drbg->jent);
+			drbg->jent = NULL;
+			if (fips_enabled || ret != -ENOENT)
+				goto free_everything;
+			pr_info("DRBG: Continuing without Jitter RNG\n");
+		}
+
+		reseed = false;
+	}
+
+	ret = drbg_seed(drbg, pers, reseed);
+
+	if (ret && !reseed)
+		goto free_everything;
+
+	mutex_unlock(&drbg->drbg_mutex);
+	return ret;
 
 err:
 	drbg_dealloc_state(drbg);
+unlock:
+	mutex_unlock(&drbg->drbg_mutex);
+	return ret;
+
+free_everything:
+	mutex_unlock(&drbg->drbg_mutex);
+	drbg_uninstantiate(drbg);
 	return ret;
 }
 
@@ -1544,10 +1587,17 @@ err:
  */
 static int drbg_uninstantiate(struct drbg_state *drbg)
 {
-	spin_lock_bh(&drbg->drbg_lock);
+	if (drbg->random_ready.func) {
+		del_random_ready_callback(&drbg->random_ready);
+		cancel_work_sync(&drbg->seed_work);
+		crypto_free_rng(drbg->jent);
+		drbg->jent = NULL;
+	}
+
+	if (drbg->d_ops)
+		drbg->d_ops->crypto_fini(drbg);
 	drbg_dealloc_state(drbg);
 	/* no scrubbing of test_data -- this shall survive an uninstantiate */
-	spin_unlock_bh(&drbg->drbg_lock);
 	return 0;
 }
 
@@ -1555,16 +1605,17 @@ static int drbg_uninstantiate(struct drbg_state *drbg)
  * Helper function for setting the test data in the DRBG
  *
  * @drbg DRBG state handle
- * @test_data test data to sets
+ * @data test data
+ * @len test data length
  */
-static inline void drbg_set_testdata(struct drbg_state *drbg,
-				     struct drbg_test_data *test_data)
+static void drbg_kcapi_set_entropy(struct crypto_rng *tfm,
+				   const u8 *data, unsigned int len)
 {
-	if (!test_data || !test_data->testentropy)
-		return;
-	spin_lock_bh(&drbg->drbg_lock);
-	drbg->test_data = test_data;
-	spin_unlock_bh(&drbg->drbg_lock);
+	struct drbg_state *drbg = crypto_rng_ctx(tfm);
+
+	mutex_lock(&drbg->drbg_mutex);
+	drbg_string_fill(&drbg->test_data, data, len);
+	mutex_unlock(&drbg->drbg_mutex);
 }
 
 /***************************************************************
@@ -1584,7 +1635,8 @@ static int drbg_init_hash_kernel(struct drbg_state *drbg)
 
 	tfm = crypto_alloc_shash(drbg->core->backend_cra_name, 0, 0);
 	if (IS_ERR(tfm)) {
-		pr_info("DRBG: could not allocate digest TFM handle\n");
+		pr_info("DRBG: could not allocate digest TFM handle: %s\n",
+				drbg->core->backend_cra_name);
 		return PTR_ERR(tfm);
 	}
 	BUG_ON(drbg_blocklen(drbg) != crypto_shash_digestsize(tfm));
@@ -1635,7 +1687,8 @@ static int drbg_init_sym_kernel(struct drbg_state *drbg)
 
 	tfm = crypto_alloc_cipher(drbg->core->backend_cra_name, 0, 0);
 	if (IS_ERR(tfm)) {
-		pr_info("DRBG: could not allocate cipher TFM handle\n");
+		pr_info("DRBG: could not allocate cipher TFM handle: %s\n",
+				drbg->core->backend_cra_name);
 		return PTR_ERR(tfm);
 	}
 	BUG_ON(drbg_blocklen(drbg) != crypto_cipher_blocksize(tfm));
@@ -1714,15 +1767,10 @@ static inline void drbg_convert_tfm_core(const char *cra_driver_name,
 static int drbg_kcapi_init(struct crypto_tfm *tfm)
 {
 	struct drbg_state *drbg = crypto_tfm_ctx(tfm);
-	bool pr = false;
-	int coreref = 0;
 
-	drbg_convert_tfm_core(crypto_tfm_alg_driver_name(tfm), &coreref, &pr);
-	/*
-	 * when personalization string is needed, the caller must call reset
-	 * and provide the personalization string as seed information
-	 */
-	return drbg_instantiate(drbg, NULL, coreref, pr);
+	mutex_init(&drbg->drbg_mutex);
+
+	return 0;
 }
 
 static void drbg_kcapi_cleanup(struct crypto_tfm *tfm)
@@ -1734,65 +1782,49 @@ static void drbg_kcapi_cleanup(struct crypto_tfm *tfm)
  * Generate random numbers invoked by the kernel crypto API:
  * The API of the kernel crypto API is extended as follows:
  *
- * If dlen is larger than zero, rdata is interpreted as the output buffer
- * where random data is to be stored.
- *
- * If dlen is zero, rdata is interpreted as a pointer to a struct drbg_gen
- * which holds the additional information string that is used for the
- * DRBG generation process. The output buffer that is to be used to store
- * data is also pointed to by struct drbg_gen.
+ * src is additional input supplied to the RNG.
+ * slen is the length of src.
+ * dst is the output buffer where random data is to be stored.
+ * dlen is the length of dst.
  */
-static int drbg_kcapi_random(struct crypto_rng *tfm, u8 *rdata,
-			     unsigned int dlen)
+static int drbg_kcapi_random(struct crypto_rng *tfm,
+			     const u8 *src, unsigned int slen,
+			     u8 *dst, unsigned int dlen)
 {
 	struct drbg_state *drbg = crypto_rng_ctx(tfm);
-	if (0 < dlen) {
-		return drbg_generate_long(drbg, rdata, dlen, NULL);
-	} else {
-		struct drbg_gen *data = (struct drbg_gen *)rdata;
-		struct drbg_string addtl;
-		/* catch NULL pointer */
-		if (!data)
-			return 0;
-		drbg_set_testdata(drbg, data->test_data);
+	struct drbg_string *addtl = NULL;
+	struct drbg_string string;
+
+	if (slen) {
 		/* linked list variable is now local to allow modification */
-		drbg_string_fill(&addtl, data->addtl->buf, data->addtl->len);
-		return drbg_generate_long(drbg, data->outbuf, data->outlen,
-					  &addtl);
+		drbg_string_fill(&string, src, slen);
+		addtl = &string;
 	}
+
+	return drbg_generate_long(drbg, dst, dlen, addtl);
 }
 
 /*
- * Reset the DRBG invoked by the kernel crypto API
- * The reset implies a full re-initialization of the DRBG. Similar to the
- * generate function of drbg_kcapi_random, this function extends the
- * kernel crypto API interface with struct drbg_gen
+ * Seed the DRBG invoked by the kernel crypto API
  */
-static int drbg_kcapi_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
+static int drbg_kcapi_seed(struct crypto_rng *tfm,
+			   const u8 *seed, unsigned int slen)
 {
 	struct drbg_state *drbg = crypto_rng_ctx(tfm);
 	struct crypto_tfm *tfm_base = crypto_rng_tfm(tfm);
 	bool pr = false;
-	struct drbg_string seed_string;
+	struct drbg_string string;
+	struct drbg_string *seed_string = NULL;
 	int coreref = 0;
 
-	drbg_uninstantiate(drbg);
 	drbg_convert_tfm_core(crypto_tfm_alg_driver_name(tfm_base), &coreref,
 			      &pr);
 	if (0 < slen) {
-		drbg_string_fill(&seed_string, seed, slen);
-		return drbg_instantiate(drbg, &seed_string, coreref, pr);
-	} else {
-		struct drbg_gen *data = (struct drbg_gen *)seed;
-		/* allow invocation of API call with NULL, 0 */
-		if (!data)
-			return drbg_instantiate(drbg, NULL, coreref, pr);
-		drbg_set_testdata(drbg, data->test_data);
-		/* linked list variable is now local to allow modification */
-		drbg_string_fill(&seed_string, data->addtl->buf,
-				 data->addtl->len);
-		return drbg_instantiate(drbg, &seed_string, coreref, pr);
+		drbg_string_fill(&string, seed, slen);
+		seed_string = &string;
 	}
+
+	return drbg_instantiate(drbg, seed_string, coreref, pr);
 }
 
 /***************************************************************
@@ -1811,7 +1843,6 @@ static int drbg_kcapi_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
  */
 static inline int __init drbg_healthcheck_sanity(void)
 {
-#ifdef CONFIG_CRYPTO_FIPS
 	int len = 0;
 #define OUTBUFLEN 16
 	unsigned char buf[OUTBUFLEN];
@@ -1839,6 +1870,8 @@ static inline int __init drbg_healthcheck_sanity(void)
 	if (!drbg)
 		return -ENOMEM;
 
+	mutex_init(&drbg->drbg_mutex);
+
 	/*
 	 * if the following tests fail, it is likely that there is a buffer
 	 * overflow as buf is much smaller than the requested or provided
@@ -1877,37 +1910,33 @@ static inline int __init drbg_healthcheck_sanity(void)
 outbuf:
 	kzfree(drbg);
 	return rc;
-#else /* CONFIG_CRYPTO_FIPS */
-	return 0;
-#endif /* CONFIG_CRYPTO_FIPS */
 }
 
-static struct crypto_alg drbg_algs[22];
+static struct rng_alg drbg_algs[22];
 
 /*
  * Fill the array drbg_algs used to register the different DRBGs
  * with the kernel crypto API. To fill the array, the information
  * from drbg_cores[] is used.
  */
-static inline void __init drbg_fill_array(struct crypto_alg *alg,
+static inline void __init drbg_fill_array(struct rng_alg *alg,
 					  const struct drbg_core *core, int pr)
 {
 	int pos = 0;
-	static int priority = 100;
+	static int priority = 200;
 
-	memset(alg, 0, sizeof(struct crypto_alg));
-	memcpy(alg->cra_name, "stdrng", 6);
+	memcpy(alg->base.cra_name, "stdrng", 6);
 	if (pr) {
-		memcpy(alg->cra_driver_name, "drbg_pr_", 8);
+		memcpy(alg->base.cra_driver_name, "drbg_pr_", 8);
 		pos = 8;
 	} else {
-		memcpy(alg->cra_driver_name, "drbg_nopr_", 10);
+		memcpy(alg->base.cra_driver_name, "drbg_nopr_", 10);
 		pos = 10;
 	}
-	memcpy(alg->cra_driver_name + pos, core->cra_name,
+	memcpy(alg->base.cra_driver_name + pos, core->cra_name,
 	       strlen(core->cra_name));
 
-	alg->cra_priority = priority;
+	alg->base.cra_priority = priority;
 	priority++;
 	/*
 	 * If FIPS mode enabled, the selected DRBG shall have the
@@ -1915,17 +1944,16 @@ static inline void __init drbg_fill_array(struct crypto_alg *alg,
 	 * it is selected.
 	 */
 	if (fips_enabled)
-		alg->cra_priority += 200;
-
-	alg->cra_flags		= CRYPTO_ALG_TYPE_RNG;
-	alg->cra_ctxsize 	= sizeof(struct drbg_state);
-	alg->cra_type		= &crypto_rng_type;
-	alg->cra_module		= THIS_MODULE;
-	alg->cra_init		= drbg_kcapi_init;
-	alg->cra_exit		= drbg_kcapi_cleanup;
-	alg->cra_u.rng.rng_make_random	= drbg_kcapi_random;
-	alg->cra_u.rng.rng_reset	= drbg_kcapi_reset;
-	alg->cra_u.rng.seedsize	= 0;
+		alg->base.cra_priority += 200;
+
+	alg->base.cra_ctxsize 	= sizeof(struct drbg_state);
+	alg->base.cra_module	= THIS_MODULE;
+	alg->base.cra_init	= drbg_kcapi_init;
+	alg->base.cra_exit	= drbg_kcapi_cleanup;
+	alg->generate		= drbg_kcapi_random;
+	alg->seed		= drbg_kcapi_seed;
+	alg->set_ent		= drbg_kcapi_set_entropy;
+	alg->seedsize		= 0;
 }
 
 static int __init drbg_init(void)
@@ -1958,12 +1986,12 @@ static int __init drbg_init(void)
 		drbg_fill_array(&drbg_algs[i], &drbg_cores[j], 1);
 	for (j = 0; ARRAY_SIZE(drbg_cores) > j; j++, i++)
 		drbg_fill_array(&drbg_algs[i], &drbg_cores[j], 0);
-	return crypto_register_algs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2));
+	return crypto_register_rngs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2));
 }
 
 static void __exit drbg_exit(void)
 {
-	crypto_unregister_algs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2));
+	crypto_unregister_rngs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2));
 }
 
 module_init(drbg_init);
@@ -1984,3 +2012,4 @@ MODULE_DESCRIPTION("NIST SP800-90A Deterministic Random Bit Generator (DRBG) "
 		   CRYPTO_DRBG_HASH_STRING
 		   CRYPTO_DRBG_HMAC_STRING
 		   CRYPTO_DRBG_CTR_STRING);
+MODULE_ALIAS_CRYPTO("stdrng");

+ 312 - 0
crypto/echainiv.c

@@ -0,0 +1,312 @@
+/*
+ * echainiv: Encrypted Chain IV Generator
+ *
+ * This generator generates an IV based on a sequence number by xoring it
+ * with a salt and then encrypting it with the same key as used to encrypt
+ * the plain text.  This algorithm requires that the block size be equal
+ * to the IV size.  It is mainly useful for CBC.
+ *
+ * This generator can only be used by algorithms where authentication
+ * is performed after encryption (i.e., authenc).
+ *
+ * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/geniv.h>
+#include <crypto/null.h>
+#include <crypto/rng.h>
+#include <crypto/scatterwalk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+
+#define MAX_IV_SIZE 16
+
+struct echainiv_ctx {
+	/* aead_geniv_ctx must be first the element */
+	struct aead_geniv_ctx geniv;
+	struct crypto_blkcipher *null;
+	u8 salt[] __attribute__ ((aligned(__alignof__(u32))));
+};
+
+static DEFINE_PER_CPU(u32 [MAX_IV_SIZE / sizeof(u32)], echainiv_iv);
+
+/* We don't care if we get preempted and read/write IVs from the next CPU. */
+static void echainiv_read_iv(u8 *dst, unsigned size)
+{
+	u32 *a = (u32 *)dst;
+	u32 __percpu *b = echainiv_iv;
+
+	for (; size >= 4; size -= 4) {
+		*a++ = this_cpu_read(*b);
+		b++;
+	}
+}
+
+static void echainiv_write_iv(const u8 *src, unsigned size)
+{
+	const u32 *a = (const u32 *)src;
+	u32 __percpu *b = echainiv_iv;
+
+	for (; size >= 4; size -= 4) {
+		this_cpu_write(*b, *a);
+		a++;
+		b++;
+	}
+}
+
+static void echainiv_encrypt_complete2(struct aead_request *req, int err)
+{
+	struct aead_request *subreq = aead_request_ctx(req);
+	struct crypto_aead *geniv;
+	unsigned int ivsize;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	if (err)
+		goto out;
+
+	geniv = crypto_aead_reqtfm(req);
+	ivsize = crypto_aead_ivsize(geniv);
+
+	echainiv_write_iv(subreq->iv, ivsize);
+
+	if (req->iv != subreq->iv)
+		memcpy(req->iv, subreq->iv, ivsize);
+
+out:
+	if (req->iv != subreq->iv)
+		kzfree(subreq->iv);
+}
+
+static void echainiv_encrypt_complete(struct crypto_async_request *base,
+					 int err)
+{
+	struct aead_request *req = base->data;
+
+	echainiv_encrypt_complete2(req, err);
+	aead_request_complete(req, err);
+}
+
+static int echainiv_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct echainiv_ctx *ctx = crypto_aead_ctx(geniv);
+	struct aead_request *subreq = aead_request_ctx(req);
+	crypto_completion_t compl;
+	void *data;
+	u8 *info;
+	unsigned int ivsize = crypto_aead_ivsize(geniv);
+	int err;
+
+	if (req->cryptlen < ivsize)
+		return -EINVAL;
+
+	aead_request_set_tfm(subreq, ctx->geniv.child);
+
+	compl = echainiv_encrypt_complete;
+	data = req;
+	info = req->iv;
+
+	if (req->src != req->dst) {
+		struct blkcipher_desc desc = {
+			.tfm = ctx->null,
+		};
+
+		err = crypto_blkcipher_encrypt(
+			&desc, req->dst, req->src,
+			req->assoclen + req->cryptlen);
+		if (err)
+			return err;
+	}
+
+	if (unlikely(!IS_ALIGNED((unsigned long)info,
+				 crypto_aead_alignmask(geniv) + 1))) {
+		info = kmalloc(ivsize, req->base.flags &
+				       CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL:
+								  GFP_ATOMIC);
+		if (!info)
+			return -ENOMEM;
+
+		memcpy(info, req->iv, ivsize);
+	}
+
+	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_crypt(subreq, req->dst, req->dst,
+			       req->cryptlen - ivsize, info);
+	aead_request_set_ad(subreq, req->assoclen + ivsize);
+
+	crypto_xor(info, ctx->salt, ivsize);
+	scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1);
+	echainiv_read_iv(info, ivsize);
+
+	err = crypto_aead_encrypt(subreq);
+	echainiv_encrypt_complete2(req, err);
+	return err;
+}
+
+static int echainiv_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct echainiv_ctx *ctx = crypto_aead_ctx(geniv);
+	struct aead_request *subreq = aead_request_ctx(req);
+	crypto_completion_t compl;
+	void *data;
+	unsigned int ivsize = crypto_aead_ivsize(geniv);
+
+	if (req->cryptlen < ivsize + crypto_aead_authsize(geniv))
+		return -EINVAL;
+
+	aead_request_set_tfm(subreq, ctx->geniv.child);
+
+	compl = req->base.complete;
+	data = req->base.data;
+
+	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_crypt(subreq, req->src, req->dst,
+			       req->cryptlen - ivsize, req->iv);
+	aead_request_set_ad(subreq, req->assoclen + ivsize);
+
+	scatterwalk_map_and_copy(req->iv, req->src, req->assoclen, ivsize, 0);
+	if (req->src != req->dst)
+		scatterwalk_map_and_copy(req->iv, req->dst,
+					 req->assoclen, ivsize, 1);
+
+	return crypto_aead_decrypt(subreq);
+}
+
+static int echainiv_init(struct crypto_tfm *tfm)
+{
+	struct crypto_aead *geniv = __crypto_aead_cast(tfm);
+	struct echainiv_ctx *ctx = crypto_aead_ctx(geniv);
+	int err;
+
+	spin_lock_init(&ctx->geniv.lock);
+
+	crypto_aead_set_reqsize(geniv, sizeof(struct aead_request));
+
+	err = crypto_get_default_rng();
+	if (err)
+		goto out;
+
+	err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
+				   crypto_aead_ivsize(geniv));
+	crypto_put_default_rng();
+	if (err)
+		goto out;
+
+	ctx->null = crypto_get_default_null_skcipher();
+	err = PTR_ERR(ctx->null);
+	if (IS_ERR(ctx->null))
+		goto out;
+
+	err = aead_geniv_init(tfm);
+	if (err)
+		goto drop_null;
+
+	ctx->geniv.child = geniv->child;
+	geniv->child = geniv;
+
+out:
+	return err;
+
+drop_null:
+	crypto_put_default_null_skcipher();
+	goto out;
+}
+
+static void echainiv_exit(struct crypto_tfm *tfm)
+{
+	struct echainiv_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_aead(ctx->geniv.child);
+	crypto_put_default_null_skcipher();
+}
+
+static int echainiv_aead_create(struct crypto_template *tmpl,
+				struct rtattr **tb)
+{
+	struct aead_instance *inst;
+	struct crypto_aead_spawn *spawn;
+	struct aead_alg *alg;
+	int err;
+
+	inst = aead_geniv_alloc(tmpl, tb, 0, 0);
+
+	if (IS_ERR(inst))
+		return PTR_ERR(inst);
+
+	spawn = aead_instance_ctx(inst);
+	alg = crypto_spawn_aead_alg(spawn);
+
+	if (alg->base.cra_aead.encrypt)
+		goto done;
+
+	err = -EINVAL;
+	if (inst->alg.ivsize & (sizeof(u32) - 1) ||
+	    inst->alg.ivsize > MAX_IV_SIZE)
+		goto free_inst;
+
+	inst->alg.encrypt = echainiv_encrypt;
+	inst->alg.decrypt = echainiv_decrypt;
+
+	inst->alg.base.cra_init = echainiv_init;
+	inst->alg.base.cra_exit = echainiv_exit;
+
+	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
+	inst->alg.base.cra_ctxsize = sizeof(struct echainiv_ctx);
+	inst->alg.base.cra_ctxsize += inst->alg.ivsize;
+
+done:
+	err = aead_register_instance(tmpl, inst);
+	if (err)
+		goto free_inst;
+
+out:
+	return err;
+
+free_inst:
+	aead_geniv_free(inst);
+	goto out;
+}
+
+static void echainiv_free(struct crypto_instance *inst)
+{
+	aead_geniv_free(aead_instance(inst));
+}
+
+static struct crypto_template echainiv_tmpl = {
+	.name = "echainiv",
+	.create = echainiv_aead_create,
+	.free = echainiv_free,
+	.module = THIS_MODULE,
+};
+
+static int __init echainiv_module_init(void)
+{
+	return crypto_register_template(&echainiv_tmpl);
+}
+
+static void __exit echainiv_module_exit(void)
+{
+	crypto_unregister_template(&echainiv_tmpl);
+}
+
+module_init(echainiv_module_init);
+module_exit(echainiv_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Encrypted Chain IV Generator");
+MODULE_ALIAS_CRYPTO("echainiv");

+ 12 - 40
crypto/eseqiv.c

@@ -146,35 +146,13 @@ out:
 	return err;
 }
 
-static int eseqiv_givencrypt_first(struct skcipher_givcrypt_request *req)
-{
-	struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
-	struct eseqiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
-	int err = 0;
-
-	spin_lock_bh(&ctx->lock);
-	if (crypto_ablkcipher_crt(geniv)->givencrypt != eseqiv_givencrypt_first)
-		goto unlock;
-
-	crypto_ablkcipher_crt(geniv)->givencrypt = eseqiv_givencrypt;
-	err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
-				   crypto_ablkcipher_ivsize(geniv));
-
-unlock:
-	spin_unlock_bh(&ctx->lock);
-
-	if (err)
-		return err;
-
-	return eseqiv_givencrypt(req);
-}
-
 static int eseqiv_init(struct crypto_tfm *tfm)
 {
 	struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm);
 	struct eseqiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
 	unsigned long alignmask;
 	unsigned int reqsize;
+	int err;
 
 	spin_lock_init(&ctx->lock);
 
@@ -198,7 +176,15 @@ static int eseqiv_init(struct crypto_tfm *tfm)
 	tfm->crt_ablkcipher.reqsize = reqsize +
 				      sizeof(struct ablkcipher_request);
 
-	return skcipher_geniv_init(tfm);
+	err = 0;
+	if (!crypto_get_default_rng()) {
+		crypto_ablkcipher_crt(geniv)->givencrypt = eseqiv_givencrypt;
+		err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
+					   crypto_ablkcipher_ivsize(geniv));
+		crypto_put_default_rng();
+	}
+
+	return err ?: skcipher_geniv_init(tfm);
 }
 
 static struct crypto_template eseqiv_tmpl;
@@ -208,20 +194,14 @@ static struct crypto_instance *eseqiv_alloc(struct rtattr **tb)
 	struct crypto_instance *inst;
 	int err;
 
-	err = crypto_get_default_rng();
-	if (err)
-		return ERR_PTR(err);
-
 	inst = skcipher_geniv_alloc(&eseqiv_tmpl, tb, 0, 0);
 	if (IS_ERR(inst))
-		goto put_rng;
+		goto out;
 
 	err = -EINVAL;
 	if (inst->alg.cra_ablkcipher.ivsize != inst->alg.cra_blocksize)
 		goto free_inst;
 
-	inst->alg.cra_ablkcipher.givencrypt = eseqiv_givencrypt_first;
-
 	inst->alg.cra_init = eseqiv_init;
 	inst->alg.cra_exit = skcipher_geniv_exit;
 
@@ -234,21 +214,13 @@ out:
 free_inst:
 	skcipher_geniv_free(inst);
 	inst = ERR_PTR(err);
-put_rng:
-	crypto_put_default_rng();
 	goto out;
 }
 
-static void eseqiv_free(struct crypto_instance *inst)
-{
-	skcipher_geniv_free(inst);
-	crypto_put_default_rng();
-}
-
 static struct crypto_template eseqiv_tmpl = {
 	.name = "eseqiv",
 	.alloc = eseqiv_alloc,
-	.free = eseqiv_free,
+	.free = skcipher_geniv_free,
 	.module = THIS_MODULE,
 };
 

+ 52 - 1
crypto/fips.c

@@ -10,7 +10,12 @@
  *
  */
 
-#include "internal.h"
+#include <linux/export.h>
+#include <linux/fips.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sysctl.h>
 
 int fips_enabled;
 EXPORT_SYMBOL_GPL(fips_enabled);
@@ -25,3 +30,49 @@ static int fips_enable(char *str)
 }
 
 __setup("fips=", fips_enable);
+
+static struct ctl_table crypto_sysctl_table[] = {
+	{
+		.procname       = "fips_enabled",
+		.data           = &fips_enabled,
+		.maxlen         = sizeof(int),
+		.mode           = 0444,
+		.proc_handler   = proc_dointvec
+	},
+	{}
+};
+
+static struct ctl_table crypto_dir_table[] = {
+	{
+		.procname       = "crypto",
+		.mode           = 0555,
+		.child          = crypto_sysctl_table
+	},
+	{}
+};
+
+static struct ctl_table_header *crypto_sysctls;
+
+static void crypto_proc_fips_init(void)
+{
+	crypto_sysctls = register_sysctl_table(crypto_dir_table);
+}
+
+static void crypto_proc_fips_exit(void)
+{
+	unregister_sysctl_table(crypto_sysctls);
+}
+
+static int __init fips_init(void)
+{
+	crypto_proc_fips_init();
+	return 0;
+}
+
+static void __exit fips_exit(void)
+{
+	crypto_proc_fips_exit();
+}
+
+module_init(fips_init);
+module_exit(fips_exit);

+ 385 - 555
crypto/gcm.c

@@ -12,6 +12,7 @@
 #include <crypto/internal/aead.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/hash.h>
+#include <crypto/null.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/hash.h>
 #include "internal.h"
@@ -39,7 +40,6 @@ struct crypto_rfc4106_ctx {
 
 struct crypto_rfc4543_instance_ctx {
 	struct crypto_aead_spawn aead;
-	struct crypto_skcipher_spawn null;
 };
 
 struct crypto_rfc4543_ctx {
@@ -49,25 +49,22 @@ struct crypto_rfc4543_ctx {
 };
 
 struct crypto_rfc4543_req_ctx {
-	u8 auth_tag[16];
-	u8 assocbuf[32];
-	struct scatterlist cipher[1];
-	struct scatterlist payload[2];
-	struct scatterlist assoc[2];
 	struct aead_request subreq;
 };
 
 struct crypto_gcm_ghash_ctx {
 	unsigned int cryptlen;
 	struct scatterlist *src;
-	void (*complete)(struct aead_request *req, int err);
+	int (*complete)(struct aead_request *req, u32 flags);
 };
 
 struct crypto_gcm_req_priv_ctx {
+	u8 iv[16];
 	u8 auth_tag[16];
 	u8 iauth_tag[16];
-	struct scatterlist src[2];
-	struct scatterlist dst[2];
+	struct scatterlist src[3];
+	struct scatterlist dst[3];
+	struct scatterlist sg;
 	struct crypto_gcm_ghash_ctx ghash_ctx;
 	union {
 		struct ahash_request ahreq;
@@ -80,7 +77,12 @@ struct crypto_gcm_setkey_result {
 	struct completion completion;
 };
 
-static void *gcm_zeroes;
+static struct {
+	u8 buf[16];
+	struct scatterlist sg;
+} *gcm_zeroes;
+
+static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc);
 
 static inline struct crypto_gcm_req_priv_ctx *crypto_gcm_reqctx(
 	struct aead_request *req)
@@ -120,15 +122,13 @@ static int crypto_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 
 	crypto_ablkcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK);
 	crypto_ablkcipher_set_flags(ctr, crypto_aead_get_flags(aead) &
-				   CRYPTO_TFM_REQ_MASK);
-
+					 CRYPTO_TFM_REQ_MASK);
 	err = crypto_ablkcipher_setkey(ctr, key, keylen);
+	crypto_aead_set_flags(aead, crypto_ablkcipher_get_flags(ctr) &
+				    CRYPTO_TFM_RES_MASK);
 	if (err)
 		return err;
 
-	crypto_aead_set_flags(aead, crypto_ablkcipher_get_flags(ctr) &
-				       CRYPTO_TFM_RES_MASK);
-
 	data = kzalloc(sizeof(*data) + crypto_ablkcipher_reqsize(ctr),
 		       GFP_KERNEL);
 	if (!data)
@@ -163,7 +163,7 @@ static int crypto_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 			      CRYPTO_TFM_RES_MASK);
 
 out:
-	kfree(data);
+	kzfree(data);
 	return err;
 }
 
@@ -186,35 +186,46 @@ static int crypto_gcm_setauthsize(struct crypto_aead *tfm,
 	return 0;
 }
 
-static void crypto_gcm_init_crypt(struct ablkcipher_request *ablk_req,
-				  struct aead_request *req,
-				  unsigned int cryptlen)
+static void crypto_gcm_init_common(struct aead_request *req)
 {
-	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct crypto_gcm_ctx *ctx = crypto_aead_ctx(aead);
 	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
-	struct scatterlist *dst;
 	__be32 counter = cpu_to_be32(1);
+	struct scatterlist *sg;
 
 	memset(pctx->auth_tag, 0, sizeof(pctx->auth_tag));
-	memcpy(req->iv + 12, &counter, 4);
+	memcpy(pctx->iv, req->iv, 12);
+	memcpy(pctx->iv + 12, &counter, 4);
 
-	sg_init_table(pctx->src, 2);
+	sg_init_table(pctx->src, 3);
 	sg_set_buf(pctx->src, pctx->auth_tag, sizeof(pctx->auth_tag));
-	scatterwalk_sg_chain(pctx->src, 2, req->src);
+	sg = scatterwalk_ffwd(pctx->src + 1, req->src, req->assoclen);
+	if (sg != pctx->src + 1)
+		scatterwalk_sg_chain(pctx->src, 2, sg);
 
-	dst = pctx->src;
 	if (req->src != req->dst) {
-		sg_init_table(pctx->dst, 2);
+		sg_init_table(pctx->dst, 3);
 		sg_set_buf(pctx->dst, pctx->auth_tag, sizeof(pctx->auth_tag));
-		scatterwalk_sg_chain(pctx->dst, 2, req->dst);
-		dst = pctx->dst;
+		sg = scatterwalk_ffwd(pctx->dst + 1, req->dst, req->assoclen);
+		if (sg != pctx->dst + 1)
+			scatterwalk_sg_chain(pctx->dst, 2, sg);
 	}
+}
+
+static void crypto_gcm_init_crypt(struct aead_request *req,
+				  unsigned int cryptlen)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_gcm_ctx *ctx = crypto_aead_ctx(aead);
+	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
+	struct ablkcipher_request *ablk_req = &pctx->u.abreq;
+	struct scatterlist *dst;
+
+	dst = req->src == req->dst ? pctx->src : pctx->dst;
 
 	ablkcipher_request_set_tfm(ablk_req, ctx->ctr);
 	ablkcipher_request_set_crypt(ablk_req, pctx->src, dst,
 				     cryptlen + sizeof(pctx->auth_tag),
-				     req->iv);
+				     pctx->iv);
 }
 
 static inline unsigned int gcm_remain(unsigned int len)
@@ -224,41 +235,31 @@ static inline unsigned int gcm_remain(unsigned int len)
 }
 
 static void gcm_hash_len_done(struct crypto_async_request *areq, int err);
-static void gcm_hash_final_done(struct crypto_async_request *areq, int err);
 
 static int gcm_hash_update(struct aead_request *req,
-			   struct crypto_gcm_req_priv_ctx *pctx,
 			   crypto_completion_t compl,
 			   struct scatterlist *src,
-			   unsigned int len)
+			   unsigned int len, u32 flags)
 {
+	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
 	struct ahash_request *ahreq = &pctx->u.ahreq;
 
-	ahash_request_set_callback(ahreq, aead_request_flags(req),
-				   compl, req);
+	ahash_request_set_callback(ahreq, flags, compl, req);
 	ahash_request_set_crypt(ahreq, src, NULL, len);
 
 	return crypto_ahash_update(ahreq);
 }
 
 static int gcm_hash_remain(struct aead_request *req,
-			   struct crypto_gcm_req_priv_ctx *pctx,
 			   unsigned int remain,
-			   crypto_completion_t compl)
+			   crypto_completion_t compl, u32 flags)
 {
-	struct ahash_request *ahreq = &pctx->u.ahreq;
-
-	ahash_request_set_callback(ahreq, aead_request_flags(req),
-				   compl, req);
-	sg_init_one(pctx->src, gcm_zeroes, remain);
-	ahash_request_set_crypt(ahreq, pctx->src, NULL, remain);
-
-	return crypto_ahash_update(ahreq);
+	return gcm_hash_update(req, compl, &gcm_zeroes->sg, remain, flags);
 }
 
-static int gcm_hash_len(struct aead_request *req,
-			struct crypto_gcm_req_priv_ctx *pctx)
+static int gcm_hash_len(struct aead_request *req, u32 flags)
 {
+	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
 	struct ahash_request *ahreq = &pctx->u.ahreq;
 	struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx;
 	u128 lengths;
@@ -266,76 +267,41 @@ static int gcm_hash_len(struct aead_request *req,
 	lengths.a = cpu_to_be64(req->assoclen * 8);
 	lengths.b = cpu_to_be64(gctx->cryptlen * 8);
 	memcpy(pctx->iauth_tag, &lengths, 16);
-	sg_init_one(pctx->src, pctx->iauth_tag, 16);
-	ahash_request_set_callback(ahreq, aead_request_flags(req),
-				   gcm_hash_len_done, req);
-	ahash_request_set_crypt(ahreq, pctx->src,
-				NULL, sizeof(lengths));
+	sg_init_one(&pctx->sg, pctx->iauth_tag, 16);
+	ahash_request_set_callback(ahreq, flags, gcm_hash_len_done, req);
+	ahash_request_set_crypt(ahreq, &pctx->sg,
+				pctx->iauth_tag, sizeof(lengths));
 
-	return crypto_ahash_update(ahreq);
-}
-
-static int gcm_hash_final(struct aead_request *req,
-			  struct crypto_gcm_req_priv_ctx *pctx)
-{
-	struct ahash_request *ahreq = &pctx->u.ahreq;
-
-	ahash_request_set_callback(ahreq, aead_request_flags(req),
-				   gcm_hash_final_done, req);
-	ahash_request_set_crypt(ahreq, NULL, pctx->iauth_tag, 0);
-
-	return crypto_ahash_final(ahreq);
+	return crypto_ahash_finup(ahreq);
 }
 
-static void __gcm_hash_final_done(struct aead_request *req, int err)
+static int gcm_hash_len_continue(struct aead_request *req, u32 flags)
 {
 	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
 	struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx;
 
-	if (!err)
-		crypto_xor(pctx->auth_tag, pctx->iauth_tag, 16);
-
-	gctx->complete(req, err);
+	return gctx->complete(req, flags);
 }
 
-static void gcm_hash_final_done(struct crypto_async_request *areq, int err)
+static void gcm_hash_len_done(struct crypto_async_request *areq, int err)
 {
 	struct aead_request *req = areq->data;
 
-	__gcm_hash_final_done(req, err);
-}
-
-static void __gcm_hash_len_done(struct aead_request *req, int err)
-{
-	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
-
-	if (!err) {
-		err = gcm_hash_final(req, pctx);
-		if (err == -EINPROGRESS || err == -EBUSY)
-			return;
-	}
-
-	__gcm_hash_final_done(req, err);
-}
+	if (err)
+		goto out;
 
-static void gcm_hash_len_done(struct crypto_async_request *areq, int err)
-{
-	struct aead_request *req = areq->data;
+	err = gcm_hash_len_continue(req, 0);
+	if (err == -EINPROGRESS)
+		return;
 
-	__gcm_hash_len_done(req, err);
+out:
+	aead_request_complete(req, err);
 }
 
-static void __gcm_hash_crypt_remain_done(struct aead_request *req, int err)
+static int gcm_hash_crypt_remain_continue(struct aead_request *req, u32 flags)
 {
-	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
-
-	if (!err) {
-		err = gcm_hash_len(req, pctx);
-		if (err == -EINPROGRESS || err == -EBUSY)
-			return;
-	}
-
-	__gcm_hash_len_done(req, err);
+	return gcm_hash_len(req, flags) ?:
+	       gcm_hash_len_continue(req, flags);
 }
 
 static void gcm_hash_crypt_remain_done(struct crypto_async_request *areq,
@@ -343,55 +309,58 @@ static void gcm_hash_crypt_remain_done(struct crypto_async_request *areq,
 {
 	struct aead_request *req = areq->data;
 
-	__gcm_hash_crypt_remain_done(req, err);
+	if (err)
+		goto out;
+
+	err = gcm_hash_crypt_remain_continue(req, 0);
+	if (err == -EINPROGRESS)
+		return;
+
+out:
+	aead_request_complete(req, err);
 }
 
-static void __gcm_hash_crypt_done(struct aead_request *req, int err)
+static int gcm_hash_crypt_continue(struct aead_request *req, u32 flags)
 {
 	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
 	struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx;
 	unsigned int remain;
 
-	if (!err) {
-		remain = gcm_remain(gctx->cryptlen);
-		BUG_ON(!remain);
-		err = gcm_hash_remain(req, pctx, remain,
-				      gcm_hash_crypt_remain_done);
-		if (err == -EINPROGRESS || err == -EBUSY)
-			return;
-	}
+	remain = gcm_remain(gctx->cryptlen);
+	if (remain)
+		return gcm_hash_remain(req, remain,
+				       gcm_hash_crypt_remain_done, flags) ?:
+		       gcm_hash_crypt_remain_continue(req, flags);
 
-	__gcm_hash_crypt_remain_done(req, err);
+	return gcm_hash_crypt_remain_continue(req, flags);
 }
 
 static void gcm_hash_crypt_done(struct crypto_async_request *areq, int err)
 {
 	struct aead_request *req = areq->data;
 
-	__gcm_hash_crypt_done(req, err);
+	if (err)
+		goto out;
+
+	err = gcm_hash_crypt_continue(req, 0);
+	if (err == -EINPROGRESS)
+		return;
+
+out:
+	aead_request_complete(req, err);
 }
 
-static void __gcm_hash_assoc_remain_done(struct aead_request *req, int err)
+static int gcm_hash_assoc_remain_continue(struct aead_request *req, u32 flags)
 {
 	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
 	struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx;
-	crypto_completion_t compl;
-	unsigned int remain = 0;
-
-	if (!err && gctx->cryptlen) {
-		remain = gcm_remain(gctx->cryptlen);
-		compl = remain ? gcm_hash_crypt_done :
-			gcm_hash_crypt_remain_done;
-		err = gcm_hash_update(req, pctx, compl,
-				      gctx->src, gctx->cryptlen);
-		if (err == -EINPROGRESS || err == -EBUSY)
-			return;
-	}
 
-	if (remain)
-		__gcm_hash_crypt_done(req, err);
-	else
-		__gcm_hash_crypt_remain_done(req, err);
+	if (gctx->cryptlen)
+		return gcm_hash_update(req, gcm_hash_crypt_done,
+				       gctx->src, gctx->cryptlen, flags) ?:
+		       gcm_hash_crypt_continue(req, flags);
+
+	return gcm_hash_crypt_remain_continue(req, flags);
 }
 
 static void gcm_hash_assoc_remain_done(struct crypto_async_request *areq,
@@ -399,146 +368,120 @@ static void gcm_hash_assoc_remain_done(struct crypto_async_request *areq,
 {
 	struct aead_request *req = areq->data;
 
-	__gcm_hash_assoc_remain_done(req, err);
+	if (err)
+		goto out;
+
+	err = gcm_hash_assoc_remain_continue(req, 0);
+	if (err == -EINPROGRESS)
+		return;
+
+out:
+	aead_request_complete(req, err);
 }
 
-static void __gcm_hash_assoc_done(struct aead_request *req, int err)
+static int gcm_hash_assoc_continue(struct aead_request *req, u32 flags)
 {
-	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
 	unsigned int remain;
 
-	if (!err) {
-		remain = gcm_remain(req->assoclen);
-		BUG_ON(!remain);
-		err = gcm_hash_remain(req, pctx, remain,
-				      gcm_hash_assoc_remain_done);
-		if (err == -EINPROGRESS || err == -EBUSY)
-			return;
-	}
+	remain = gcm_remain(req->assoclen);
+	if (remain)
+		return gcm_hash_remain(req, remain,
+				       gcm_hash_assoc_remain_done, flags) ?:
+		       gcm_hash_assoc_remain_continue(req, flags);
 
-	__gcm_hash_assoc_remain_done(req, err);
+	return gcm_hash_assoc_remain_continue(req, flags);
 }
 
 static void gcm_hash_assoc_done(struct crypto_async_request *areq, int err)
 {
 	struct aead_request *req = areq->data;
 
-	__gcm_hash_assoc_done(req, err);
+	if (err)
+		goto out;
+
+	err = gcm_hash_assoc_continue(req, 0);
+	if (err == -EINPROGRESS)
+		return;
+
+out:
+	aead_request_complete(req, err);
 }
 
-static void __gcm_hash_init_done(struct aead_request *req, int err)
+static int gcm_hash_init_continue(struct aead_request *req, u32 flags)
 {
-	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
-	crypto_completion_t compl;
-	unsigned int remain = 0;
-
-	if (!err && req->assoclen) {
-		remain = gcm_remain(req->assoclen);
-		compl = remain ? gcm_hash_assoc_done :
-			gcm_hash_assoc_remain_done;
-		err = gcm_hash_update(req, pctx, compl,
-				      req->assoc, req->assoclen);
-		if (err == -EINPROGRESS || err == -EBUSY)
-			return;
-	}
+	if (req->assoclen)
+		return gcm_hash_update(req, gcm_hash_assoc_done,
+				       req->src, req->assoclen, flags) ?:
+		       gcm_hash_assoc_continue(req, flags);
 
-	if (remain)
-		__gcm_hash_assoc_done(req, err);
-	else
-		__gcm_hash_assoc_remain_done(req, err);
+	return gcm_hash_assoc_remain_continue(req, flags);
 }
 
 static void gcm_hash_init_done(struct crypto_async_request *areq, int err)
 {
 	struct aead_request *req = areq->data;
 
-	__gcm_hash_init_done(req, err);
+	if (err)
+		goto out;
+
+	err = gcm_hash_init_continue(req, 0);
+	if (err == -EINPROGRESS)
+		return;
+
+out:
+	aead_request_complete(req, err);
 }
 
-static int gcm_hash(struct aead_request *req,
-		    struct crypto_gcm_req_priv_ctx *pctx)
+static int gcm_hash(struct aead_request *req, u32 flags)
 {
+	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
 	struct ahash_request *ahreq = &pctx->u.ahreq;
-	struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx;
-	struct crypto_gcm_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	unsigned int remain;
-	crypto_completion_t compl;
-	int err;
+	struct crypto_gcm_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
 
 	ahash_request_set_tfm(ahreq, ctx->ghash);
 
-	ahash_request_set_callback(ahreq, aead_request_flags(req),
-				   gcm_hash_init_done, req);
-	err = crypto_ahash_init(ahreq);
-	if (err)
-		return err;
-	remain = gcm_remain(req->assoclen);
-	compl = remain ? gcm_hash_assoc_done : gcm_hash_assoc_remain_done;
-	err = gcm_hash_update(req, pctx, compl, req->assoc, req->assoclen);
-	if (err)
-		return err;
-	if (remain) {
-		err = gcm_hash_remain(req, pctx, remain,
-				      gcm_hash_assoc_remain_done);
-		if (err)
-			return err;
-	}
-	remain = gcm_remain(gctx->cryptlen);
-	compl = remain ? gcm_hash_crypt_done : gcm_hash_crypt_remain_done;
-	err = gcm_hash_update(req, pctx, compl, gctx->src, gctx->cryptlen);
-	if (err)
-		return err;
-	if (remain) {
-		err = gcm_hash_remain(req, pctx, remain,
-				      gcm_hash_crypt_remain_done);
-		if (err)
-			return err;
-	}
-	err = gcm_hash_len(req, pctx);
-	if (err)
-		return err;
-	err = gcm_hash_final(req, pctx);
-	if (err)
-		return err;
-
-	return 0;
+	ahash_request_set_callback(ahreq, flags, gcm_hash_init_done, req);
+	return crypto_ahash_init(ahreq) ?:
+	       gcm_hash_init_continue(req, flags);
 }
 
-static void gcm_enc_copy_hash(struct aead_request *req,
-			      struct crypto_gcm_req_priv_ctx *pctx)
+static int gcm_enc_copy_hash(struct aead_request *req, u32 flags)
 {
+	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	u8 *auth_tag = pctx->auth_tag;
 
-	scatterwalk_map_and_copy(auth_tag, req->dst, req->cryptlen,
+	crypto_xor(auth_tag, pctx->iauth_tag, 16);
+	scatterwalk_map_and_copy(auth_tag, req->dst,
+				 req->assoclen + req->cryptlen,
 				 crypto_aead_authsize(aead), 1);
+	return 0;
 }
 
-static void gcm_enc_hash_done(struct aead_request *req, int err)
+static int gcm_encrypt_continue(struct aead_request *req, u32 flags)
 {
 	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
+	struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx;
 
-	if (!err)
-		gcm_enc_copy_hash(req, pctx);
+	gctx->src = sg_next(req->src == req->dst ? pctx->src : pctx->dst);
+	gctx->cryptlen = req->cryptlen;
+	gctx->complete = gcm_enc_copy_hash;
 
-	aead_request_complete(req, err);
+	return gcm_hash(req, flags);
 }
 
 static void gcm_encrypt_done(struct crypto_async_request *areq, int err)
 {
 	struct aead_request *req = areq->data;
-	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
 
-	if (!err) {
-		err = gcm_hash(req, pctx);
-		if (err == -EINPROGRESS || err == -EBUSY)
-			return;
-		else if (!err) {
-			crypto_xor(pctx->auth_tag, pctx->iauth_tag, 16);
-			gcm_enc_copy_hash(req, pctx);
-		}
-	}
+	if (err)
+		goto out;
+
+	err = gcm_encrypt_continue(req, 0);
+	if (err == -EINPROGRESS)
+		return;
 
+out:
 	aead_request_complete(req, err);
 }
 
@@ -546,34 +489,19 @@ static int crypto_gcm_encrypt(struct aead_request *req)
 {
 	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
 	struct ablkcipher_request *abreq = &pctx->u.abreq;
-	struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx;
-	int err;
-
-	crypto_gcm_init_crypt(abreq, req, req->cryptlen);
-	ablkcipher_request_set_callback(abreq, aead_request_flags(req),
-					gcm_encrypt_done, req);
-
-	gctx->src = req->dst;
-	gctx->cryptlen = req->cryptlen;
-	gctx->complete = gcm_enc_hash_done;
-
-	err = crypto_ablkcipher_encrypt(abreq);
-	if (err)
-		return err;
-
-	err = gcm_hash(req, pctx);
-	if (err)
-		return err;
+	u32 flags = aead_request_flags(req);
 
-	crypto_xor(pctx->auth_tag, pctx->iauth_tag, 16);
-	gcm_enc_copy_hash(req, pctx);
+	crypto_gcm_init_common(req);
+	crypto_gcm_init_crypt(req, req->cryptlen);
+	ablkcipher_request_set_callback(abreq, flags, gcm_encrypt_done, req);
 
-	return 0;
+	return crypto_ablkcipher_encrypt(abreq) ?:
+	       gcm_encrypt_continue(req, flags);
 }
 
-static int crypto_gcm_verify(struct aead_request *req,
-			     struct crypto_gcm_req_priv_ctx *pctx)
+static int crypto_gcm_verify(struct aead_request *req)
 {
+	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	u8 *auth_tag = pctx->auth_tag;
 	u8 *iauth_tag = pctx->iauth_tag;
@@ -581,78 +509,57 @@ static int crypto_gcm_verify(struct aead_request *req,
 	unsigned int cryptlen = req->cryptlen - authsize;
 
 	crypto_xor(auth_tag, iauth_tag, 16);
-	scatterwalk_map_and_copy(iauth_tag, req->src, cryptlen, authsize, 0);
+	scatterwalk_map_and_copy(iauth_tag, req->src,
+				 req->assoclen + cryptlen, authsize, 0);
 	return crypto_memneq(iauth_tag, auth_tag, authsize) ? -EBADMSG : 0;
 }
 
 static void gcm_decrypt_done(struct crypto_async_request *areq, int err)
 {
 	struct aead_request *req = areq->data;
-	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
 
 	if (!err)
-		err = crypto_gcm_verify(req, pctx);
+		err = crypto_gcm_verify(req);
 
 	aead_request_complete(req, err);
 }
 
-static void gcm_dec_hash_done(struct aead_request *req, int err)
+static int gcm_dec_hash_continue(struct aead_request *req, u32 flags)
 {
 	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
 	struct ablkcipher_request *abreq = &pctx->u.abreq;
 	struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx;
 
-	if (!err) {
-		ablkcipher_request_set_callback(abreq, aead_request_flags(req),
-						gcm_decrypt_done, req);
-		crypto_gcm_init_crypt(abreq, req, gctx->cryptlen);
-		err = crypto_ablkcipher_decrypt(abreq);
-		if (err == -EINPROGRESS || err == -EBUSY)
-			return;
-		else if (!err)
-			err = crypto_gcm_verify(req, pctx);
-	}
-
-	aead_request_complete(req, err);
+	crypto_gcm_init_crypt(req, gctx->cryptlen);
+	ablkcipher_request_set_callback(abreq, flags, gcm_decrypt_done, req);
+	return crypto_ablkcipher_decrypt(abreq) ?: crypto_gcm_verify(req);
 }
 
 static int crypto_gcm_decrypt(struct aead_request *req)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_gcm_req_priv_ctx *pctx = crypto_gcm_reqctx(req);
-	struct ablkcipher_request *abreq = &pctx->u.abreq;
 	struct crypto_gcm_ghash_ctx *gctx = &pctx->ghash_ctx;
 	unsigned int authsize = crypto_aead_authsize(aead);
 	unsigned int cryptlen = req->cryptlen;
-	int err;
+	u32 flags = aead_request_flags(req);
 
-	if (cryptlen < authsize)
-		return -EINVAL;
 	cryptlen -= authsize;
 
-	gctx->src = req->src;
-	gctx->cryptlen = cryptlen;
-	gctx->complete = gcm_dec_hash_done;
-
-	err = gcm_hash(req, pctx);
-	if (err)
-		return err;
+	crypto_gcm_init_common(req);
 
-	ablkcipher_request_set_callback(abreq, aead_request_flags(req),
-					gcm_decrypt_done, req);
-	crypto_gcm_init_crypt(abreq, req, cryptlen);
-	err = crypto_ablkcipher_decrypt(abreq);
-	if (err)
-		return err;
+	gctx->src = sg_next(pctx->src);
+	gctx->cryptlen = cryptlen;
+	gctx->complete = gcm_dec_hash_continue;
 
-	return crypto_gcm_verify(req, pctx);
+	return gcm_hash(req, flags);
 }
 
-static int crypto_gcm_init_tfm(struct crypto_tfm *tfm)
+static int crypto_gcm_init_tfm(struct crypto_aead *tfm)
 {
-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
-	struct gcm_instance_ctx *ictx = crypto_instance_ctx(inst);
-	struct crypto_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aead_instance *inst = aead_alg_instance(tfm);
+	struct gcm_instance_ctx *ictx = aead_instance_ctx(inst);
+	struct crypto_gcm_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_ablkcipher *ctr;
 	struct crypto_ahash *ghash;
 	unsigned long align;
@@ -670,14 +577,14 @@ static int crypto_gcm_init_tfm(struct crypto_tfm *tfm)
 	ctx->ctr = ctr;
 	ctx->ghash = ghash;
 
-	align = crypto_tfm_alg_alignmask(tfm);
+	align = crypto_aead_alignmask(tfm);
 	align &= ~(crypto_tfm_ctx_alignment() - 1);
-	tfm->crt_aead.reqsize = align +
-		offsetof(struct crypto_gcm_req_priv_ctx, u) +
+	crypto_aead_set_reqsize(tfm,
+		align + offsetof(struct crypto_gcm_req_priv_ctx, u) +
 		max(sizeof(struct ablkcipher_request) +
 		    crypto_ablkcipher_reqsize(ctr),
 		    sizeof(struct ahash_request) +
-		    crypto_ahash_reqsize(ghash));
+		    crypto_ahash_reqsize(ghash)));
 
 	return 0;
 
@@ -686,53 +593,59 @@ err_free_hash:
 	return err;
 }
 
-static void crypto_gcm_exit_tfm(struct crypto_tfm *tfm)
+static void crypto_gcm_exit_tfm(struct crypto_aead *tfm)
 {
-	struct crypto_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_gcm_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_ahash(ctx->ghash);
 	crypto_free_ablkcipher(ctx->ctr);
 }
 
-static struct crypto_instance *crypto_gcm_alloc_common(struct rtattr **tb,
-						       const char *full_name,
-						       const char *ctr_name,
-						       const char *ghash_name)
+static int crypto_gcm_create_common(struct crypto_template *tmpl,
+				    struct rtattr **tb,
+				    const char *full_name,
+				    const char *ctr_name,
+				    const char *ghash_name)
 {
 	struct crypto_attr_type *algt;
-	struct crypto_instance *inst;
+	struct aead_instance *inst;
 	struct crypto_alg *ctr;
 	struct crypto_alg *ghash_alg;
-	struct ahash_alg *ghash_ahash_alg;
+	struct hash_alg_common *ghash;
 	struct gcm_instance_ctx *ctx;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
 	if (IS_ERR(algt))
-		return ERR_CAST(algt);
+		return PTR_ERR(algt);
 
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type,
 				    CRYPTO_ALG_TYPE_HASH,
 				    CRYPTO_ALG_TYPE_AHASH_MASK);
 	if (IS_ERR(ghash_alg))
-		return ERR_CAST(ghash_alg);
+		return PTR_ERR(ghash_alg);
+
+	ghash = __crypto_hash_alg_common(ghash_alg);
 
 	err = -ENOMEM;
 	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
 	if (!inst)
 		goto out_put_ghash;
 
-	ctx = crypto_instance_ctx(inst);
-	ghash_ahash_alg = container_of(ghash_alg, struct ahash_alg, halg.base);
-	err = crypto_init_ahash_spawn(&ctx->ghash, &ghash_ahash_alg->halg,
-				      inst);
+	ctx = aead_instance_ctx(inst);
+	err = crypto_init_ahash_spawn(&ctx->ghash, ghash,
+				      aead_crypto_instance(inst));
 	if (err)
 		goto err_free_inst;
 
-	crypto_set_skcipher_spawn(&ctx->ctr, inst);
+	err = -EINVAL;
+	if (ghash->digestsize != 16)
+		goto err_drop_ghash;
+
+	crypto_set_skcipher_spawn(&ctx->ctr, aead_crypto_instance(inst));
 	err = crypto_grab_skcipher(&ctx->ctr, ctr_name, 0,
 				   crypto_requires_sync(algt->type,
 							algt->mask));
@@ -751,33 +664,38 @@ static struct crypto_instance *crypto_gcm_alloc_common(struct rtattr **tb,
 		goto out_put_ctr;
 
 	err = -ENAMETOOLONG;
-	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
 		     "gcm_base(%s,%s)", ctr->cra_driver_name,
 		     ghash_alg->cra_driver_name) >=
 	    CRYPTO_MAX_ALG_NAME)
 		goto out_put_ctr;
 
-	memcpy(inst->alg.cra_name, full_name, CRYPTO_MAX_ALG_NAME);
-
-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD;
-	inst->alg.cra_flags |= ctr->cra_flags & CRYPTO_ALG_ASYNC;
-	inst->alg.cra_priority = ctr->cra_priority;
-	inst->alg.cra_blocksize = 1;
-	inst->alg.cra_alignmask = ctr->cra_alignmask | (__alignof__(u64) - 1);
-	inst->alg.cra_type = &crypto_aead_type;
-	inst->alg.cra_aead.ivsize = 16;
-	inst->alg.cra_aead.maxauthsize = 16;
-	inst->alg.cra_ctxsize = sizeof(struct crypto_gcm_ctx);
-	inst->alg.cra_init = crypto_gcm_init_tfm;
-	inst->alg.cra_exit = crypto_gcm_exit_tfm;
-	inst->alg.cra_aead.setkey = crypto_gcm_setkey;
-	inst->alg.cra_aead.setauthsize = crypto_gcm_setauthsize;
-	inst->alg.cra_aead.encrypt = crypto_gcm_encrypt;
-	inst->alg.cra_aead.decrypt = crypto_gcm_decrypt;
+	memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME);
+
+	inst->alg.base.cra_flags = (ghash->base.cra_flags | ctr->cra_flags) &
+				   CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_priority = (ghash->base.cra_priority +
+				       ctr->cra_priority) / 2;
+	inst->alg.base.cra_blocksize = 1;
+	inst->alg.base.cra_alignmask = ghash->base.cra_alignmask |
+				       ctr->cra_alignmask;
+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_gcm_ctx);
+	inst->alg.ivsize = 12;
+	inst->alg.maxauthsize = 16;
+	inst->alg.init = crypto_gcm_init_tfm;
+	inst->alg.exit = crypto_gcm_exit_tfm;
+	inst->alg.setkey = crypto_gcm_setkey;
+	inst->alg.setauthsize = crypto_gcm_setauthsize;
+	inst->alg.encrypt = crypto_gcm_encrypt;
+	inst->alg.decrypt = crypto_gcm_decrypt;
+
+	err = aead_register_instance(tmpl, inst);
+	if (err)
+		goto out_put_ctr;
 
-out:
+out_put_ghash:
 	crypto_mod_put(ghash_alg);
-	return inst;
+	return err;
 
 out_put_ctr:
 	crypto_drop_skcipher(&ctx->ctr);
@@ -785,12 +703,10 @@ err_drop_ghash:
 	crypto_drop_ahash(&ctx->ghash);
 err_free_inst:
 	kfree(inst);
-out_put_ghash:
-	inst = ERR_PTR(err);
-	goto out;
+	goto out_put_ghash;
 }
 
-static struct crypto_instance *crypto_gcm_alloc(struct rtattr **tb)
+static int crypto_gcm_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	const char *cipher_name;
 	char ctr_name[CRYPTO_MAX_ALG_NAME];
@@ -798,17 +714,18 @@ static struct crypto_instance *crypto_gcm_alloc(struct rtattr **tb)
 
 	cipher_name = crypto_attr_alg_name(tb[1]);
 	if (IS_ERR(cipher_name))
-		return ERR_CAST(cipher_name);
+		return PTR_ERR(cipher_name);
 
 	if (snprintf(ctr_name, CRYPTO_MAX_ALG_NAME, "ctr(%s)", cipher_name) >=
 	    CRYPTO_MAX_ALG_NAME)
-		return ERR_PTR(-ENAMETOOLONG);
+		return -ENAMETOOLONG;
 
 	if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "gcm(%s)", cipher_name) >=
 	    CRYPTO_MAX_ALG_NAME)
-		return ERR_PTR(-ENAMETOOLONG);
+		return -ENAMETOOLONG;
 
-	return crypto_gcm_alloc_common(tb, full_name, ctr_name, "ghash");
+	return crypto_gcm_create_common(tmpl, tb, full_name,
+					ctr_name, "ghash");
 }
 
 static void crypto_gcm_free(struct crypto_instance *inst)
@@ -817,17 +734,18 @@ static void crypto_gcm_free(struct crypto_instance *inst)
 
 	crypto_drop_skcipher(&ctx->ctr);
 	crypto_drop_ahash(&ctx->ghash);
-	kfree(inst);
+	kfree(aead_instance(inst));
 }
 
 static struct crypto_template crypto_gcm_tmpl = {
 	.name = "gcm",
-	.alloc = crypto_gcm_alloc,
+	.create = crypto_gcm_create,
 	.free = crypto_gcm_free,
 	.module = THIS_MODULE,
 };
 
-static struct crypto_instance *crypto_gcm_base_alloc(struct rtattr **tb)
+static int crypto_gcm_base_create(struct crypto_template *tmpl,
+				  struct rtattr **tb)
 {
 	const char *ctr_name;
 	const char *ghash_name;
@@ -835,22 +753,23 @@ static struct crypto_instance *crypto_gcm_base_alloc(struct rtattr **tb)
 
 	ctr_name = crypto_attr_alg_name(tb[1]);
 	if (IS_ERR(ctr_name))
-		return ERR_CAST(ctr_name);
+		return PTR_ERR(ctr_name);
 
 	ghash_name = crypto_attr_alg_name(tb[2]);
 	if (IS_ERR(ghash_name))
-		return ERR_CAST(ghash_name);
+		return PTR_ERR(ghash_name);
 
 	if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "gcm_base(%s,%s)",
 		     ctr_name, ghash_name) >= CRYPTO_MAX_ALG_NAME)
-		return ERR_PTR(-ENAMETOOLONG);
+		return -ENAMETOOLONG;
 
-	return crypto_gcm_alloc_common(tb, full_name, ctr_name, ghash_name);
+	return crypto_gcm_create_common(tmpl, tb, full_name,
+					ctr_name, ghash_name);
 }
 
 static struct crypto_template crypto_gcm_base_tmpl = {
 	.name = "gcm_base",
-	.alloc = crypto_gcm_base_alloc,
+	.create = crypto_gcm_base_create,
 	.free = crypto_gcm_free,
 	.module = THIS_MODULE,
 };
@@ -911,7 +830,7 @@ static struct aead_request *crypto_rfc4106_crypt(struct aead_request *req)
 	aead_request_set_callback(subreq, req->base.flags, req->base.complete,
 				  req->base.data);
 	aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, iv);
-	aead_request_set_assoc(subreq, req->assoc, req->assoclen);
+	aead_request_set_ad(subreq, req->assoclen);
 
 	return subreq;
 }
@@ -930,11 +849,11 @@ static int crypto_rfc4106_decrypt(struct aead_request *req)
 	return crypto_aead_decrypt(req);
 }
 
-static int crypto_rfc4106_init_tfm(struct crypto_tfm *tfm)
+static int crypto_rfc4106_init_tfm(struct crypto_aead *tfm)
 {
-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
-	struct crypto_aead_spawn *spawn = crypto_instance_ctx(inst);
-	struct crypto_rfc4106_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aead_instance *inst = aead_alg_instance(tfm);
+	struct crypto_aead_spawn *spawn = aead_instance_ctx(inst);
+	struct crypto_rfc4106_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_aead *aead;
 	unsigned long align;
 
@@ -946,126 +865,120 @@ static int crypto_rfc4106_init_tfm(struct crypto_tfm *tfm)
 
 	align = crypto_aead_alignmask(aead);
 	align &= ~(crypto_tfm_ctx_alignment() - 1);
-	tfm->crt_aead.reqsize = sizeof(struct aead_request) +
-				ALIGN(crypto_aead_reqsize(aead),
-				      crypto_tfm_ctx_alignment()) +
-				align + 16;
+	crypto_aead_set_reqsize(
+		tfm,
+		sizeof(struct aead_request) +
+		ALIGN(crypto_aead_reqsize(aead), crypto_tfm_ctx_alignment()) +
+		align + 12);
 
 	return 0;
 }
 
-static void crypto_rfc4106_exit_tfm(struct crypto_tfm *tfm)
+static void crypto_rfc4106_exit_tfm(struct crypto_aead *tfm)
 {
-	struct crypto_rfc4106_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_rfc4106_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_aead(ctx->child);
 }
 
-static struct crypto_instance *crypto_rfc4106_alloc(struct rtattr **tb)
+static int crypto_rfc4106_create(struct crypto_template *tmpl,
+				 struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
-	struct crypto_instance *inst;
+	struct aead_instance *inst;
 	struct crypto_aead_spawn *spawn;
-	struct crypto_alg *alg;
+	struct aead_alg *alg;
 	const char *ccm_name;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
 	if (IS_ERR(algt))
-		return ERR_CAST(algt);
+		return PTR_ERR(algt);
 
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	ccm_name = crypto_attr_alg_name(tb[1]);
 	if (IS_ERR(ccm_name))
-		return ERR_CAST(ccm_name);
+		return PTR_ERR(ccm_name);
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
 	if (!inst)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
-	spawn = crypto_instance_ctx(inst);
-	crypto_set_aead_spawn(spawn, inst);
+	spawn = aead_instance_ctx(inst);
+	crypto_set_aead_spawn(spawn, aead_crypto_instance(inst));
 	err = crypto_grab_aead(spawn, ccm_name, 0,
 			       crypto_requires_sync(algt->type, algt->mask));
 	if (err)
 		goto out_free_inst;
 
-	alg = crypto_aead_spawn_alg(spawn);
+	alg = crypto_spawn_aead_alg(spawn);
 
 	err = -EINVAL;
 
-	/* We only support 16-byte blocks. */
-	if (alg->cra_aead.ivsize != 16)
+	/* Underlying IV size must be 12. */
+	if (crypto_aead_alg_ivsize(alg) != 12)
 		goto out_drop_alg;
 
 	/* Not a stream cipher? */
-	if (alg->cra_blocksize != 1)
+	if (alg->base.cra_blocksize != 1)
 		goto out_drop_alg;
 
 	err = -ENAMETOOLONG;
-	if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
-		     "rfc4106(%s)", alg->cra_name) >= CRYPTO_MAX_ALG_NAME ||
-	    snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-		     "rfc4106(%s)", alg->cra_driver_name) >=
+	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+		     "rfc4106(%s)", alg->base.cra_name) >=
+	    CRYPTO_MAX_ALG_NAME ||
+	    snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		     "rfc4106(%s)", alg->base.cra_driver_name) >=
 	    CRYPTO_MAX_ALG_NAME)
 		goto out_drop_alg;
 
-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD;
-	inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC;
-	inst->alg.cra_priority = alg->cra_priority;
-	inst->alg.cra_blocksize = 1;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
-	inst->alg.cra_type = &crypto_nivaead_type;
+	inst->alg.base.cra_flags |= alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_priority = alg->base.cra_priority;
+	inst->alg.base.cra_blocksize = 1;
+	inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
 
-	inst->alg.cra_aead.ivsize = 8;
-	inst->alg.cra_aead.maxauthsize = 16;
+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_rfc4106_ctx);
 
-	inst->alg.cra_ctxsize = sizeof(struct crypto_rfc4106_ctx);
+	inst->alg.ivsize = 8;
+	inst->alg.maxauthsize = crypto_aead_alg_maxauthsize(alg);
 
-	inst->alg.cra_init = crypto_rfc4106_init_tfm;
-	inst->alg.cra_exit = crypto_rfc4106_exit_tfm;
+	inst->alg.init = crypto_rfc4106_init_tfm;
+	inst->alg.exit = crypto_rfc4106_exit_tfm;
 
-	inst->alg.cra_aead.setkey = crypto_rfc4106_setkey;
-	inst->alg.cra_aead.setauthsize = crypto_rfc4106_setauthsize;
-	inst->alg.cra_aead.encrypt = crypto_rfc4106_encrypt;
-	inst->alg.cra_aead.decrypt = crypto_rfc4106_decrypt;
+	inst->alg.setkey = crypto_rfc4106_setkey;
+	inst->alg.setauthsize = crypto_rfc4106_setauthsize;
+	inst->alg.encrypt = crypto_rfc4106_encrypt;
+	inst->alg.decrypt = crypto_rfc4106_decrypt;
 
-	inst->alg.cra_aead.geniv = "seqiv";
+	err = aead_register_instance(tmpl, inst);
+	if (err)
+		goto out_drop_alg;
 
 out:
-	return inst;
+	return err;
 
 out_drop_alg:
 	crypto_drop_aead(spawn);
 out_free_inst:
 	kfree(inst);
-	inst = ERR_PTR(err);
 	goto out;
 }
 
 static void crypto_rfc4106_free(struct crypto_instance *inst)
 {
-	crypto_drop_spawn(crypto_instance_ctx(inst));
-	kfree(inst);
+	crypto_drop_aead(crypto_instance_ctx(inst));
+	kfree(aead_instance(inst));
 }
 
 static struct crypto_template crypto_rfc4106_tmpl = {
 	.name = "rfc4106",
-	.alloc = crypto_rfc4106_alloc,
+	.create = crypto_rfc4106_create,
 	.free = crypto_rfc4106_free,
 	.module = THIS_MODULE,
 };
 
-static inline struct crypto_rfc4543_req_ctx *crypto_rfc4543_reqctx(
-	struct aead_request *req)
-{
-	unsigned long align = crypto_aead_alignmask(crypto_aead_reqtfm(req));
-
-	return (void *)PTR_ALIGN((u8 *)aead_request_ctx(req), align + 1);
-}
-
 static int crypto_rfc4543_setkey(struct crypto_aead *parent, const u8 *key,
 				 unsigned int keylen)
 {
@@ -1100,83 +1013,35 @@ static int crypto_rfc4543_setauthsize(struct crypto_aead *parent,
 	return crypto_aead_setauthsize(ctx->child, authsize);
 }
 
-static void crypto_rfc4543_done(struct crypto_async_request *areq, int err)
-{
-	struct aead_request *req = areq->data;
-	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct crypto_rfc4543_req_ctx *rctx = crypto_rfc4543_reqctx(req);
-
-	if (!err) {
-		scatterwalk_map_and_copy(rctx->auth_tag, req->dst,
-					 req->cryptlen,
-					 crypto_aead_authsize(aead), 1);
-	}
-
-	aead_request_complete(req, err);
-}
-
-static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req,
-						 bool enc)
+static int crypto_rfc4543_crypt(struct aead_request *req, bool enc)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(aead);
-	struct crypto_rfc4543_req_ctx *rctx = crypto_rfc4543_reqctx(req);
+	struct crypto_rfc4543_req_ctx *rctx = aead_request_ctx(req);
 	struct aead_request *subreq = &rctx->subreq;
-	struct scatterlist *src = req->src;
-	struct scatterlist *cipher = rctx->cipher;
-	struct scatterlist *payload = rctx->payload;
-	struct scatterlist *assoc = rctx->assoc;
 	unsigned int authsize = crypto_aead_authsize(aead);
-	unsigned int assoclen = req->assoclen;
-	struct page *srcp;
-	u8 *vsrc;
 	u8 *iv = PTR_ALIGN((u8 *)(rctx + 1) + crypto_aead_reqsize(ctx->child),
 			   crypto_aead_alignmask(ctx->child) + 1);
+	int err;
+
+	if (req->src != req->dst) {
+		err = crypto_rfc4543_copy_src_to_dst(req, enc);
+		if (err)
+			return err;
+	}
 
 	memcpy(iv, ctx->nonce, 4);
 	memcpy(iv + 4, req->iv, 8);
 
-	/* construct cipher/plaintext */
-	if (enc)
-		memset(rctx->auth_tag, 0, authsize);
-	else
-		scatterwalk_map_and_copy(rctx->auth_tag, src,
-					 req->cryptlen - authsize,
-					 authsize, 0);
-
-	sg_init_one(cipher, rctx->auth_tag, authsize);
-
-	/* construct the aad */
-	srcp = sg_page(src);
-	vsrc = PageHighMem(srcp) ? NULL : page_address(srcp) + src->offset;
-
-	sg_init_table(payload, 2);
-	sg_set_buf(payload, req->iv, 8);
-	scatterwalk_crypto_chain(payload, src, vsrc == req->iv + 8, 2);
-	assoclen += 8 + req->cryptlen - (enc ? 0 : authsize);
-
-	if (req->assoc->length == req->assoclen) {
-		sg_init_table(assoc, 2);
-		sg_set_page(assoc, sg_page(req->assoc), req->assoc->length,
-			    req->assoc->offset);
-	} else {
-		BUG_ON(req->assoclen > sizeof(rctx->assocbuf));
-
-		scatterwalk_map_and_copy(rctx->assocbuf, req->assoc, 0,
-					 req->assoclen, 0);
-
-		sg_init_table(assoc, 2);
-		sg_set_buf(assoc, rctx->assocbuf, req->assoclen);
-	}
-	scatterwalk_crypto_chain(assoc, payload, 0, 2);
-
 	aead_request_set_tfm(subreq, ctx->child);
-	aead_request_set_callback(subreq, req->base.flags, crypto_rfc4543_done,
-				  req);
-	aead_request_set_crypt(subreq, cipher, cipher, enc ? 0 : authsize, iv);
-	aead_request_set_assoc(subreq, assoc, assoclen);
-
-	return subreq;
+	aead_request_set_callback(subreq, req->base.flags,
+				  req->base.complete, req->base.data);
+	aead_request_set_crypt(subreq, req->src, req->dst,
+			       enc ? 0 : authsize, iv);
+	aead_request_set_ad(subreq, req->assoclen + req->cryptlen -
+				    subreq->cryptlen);
+
+	return enc ? crypto_aead_encrypt(subreq) : crypto_aead_decrypt(subreq);
 }
 
 static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc)
@@ -1184,7 +1049,8 @@ static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc)
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(aead);
 	unsigned int authsize = crypto_aead_authsize(aead);
-	unsigned int nbytes = req->cryptlen - (enc ? 0 : authsize);
+	unsigned int nbytes = req->assoclen + req->cryptlen -
+			      (enc ? 0 : authsize);
 	struct blkcipher_desc desc = {
 		.tfm = ctx->null,
 	};
@@ -1194,49 +1060,20 @@ static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc)
 
 static int crypto_rfc4543_encrypt(struct aead_request *req)
 {
-	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct crypto_rfc4543_req_ctx *rctx = crypto_rfc4543_reqctx(req);
-	struct aead_request *subreq;
-	int err;
-
-	if (req->src != req->dst) {
-		err = crypto_rfc4543_copy_src_to_dst(req, true);
-		if (err)
-			return err;
-	}
-
-	subreq = crypto_rfc4543_crypt(req, true);
-	err = crypto_aead_encrypt(subreq);
-	if (err)
-		return err;
-
-	scatterwalk_map_and_copy(rctx->auth_tag, req->dst, req->cryptlen,
-				 crypto_aead_authsize(aead), 1);
-
-	return 0;
+	return crypto_rfc4543_crypt(req, true);
 }
 
 static int crypto_rfc4543_decrypt(struct aead_request *req)
 {
-	int err;
-
-	if (req->src != req->dst) {
-		err = crypto_rfc4543_copy_src_to_dst(req, false);
-		if (err)
-			return err;
-	}
-
-	req = crypto_rfc4543_crypt(req, false);
-
-	return crypto_aead_decrypt(req);
+	return crypto_rfc4543_crypt(req, false);
 }
 
-static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm)
+static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm)
 {
-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
-	struct crypto_rfc4543_instance_ctx *ictx = crypto_instance_ctx(inst);
+	struct aead_instance *inst = aead_alg_instance(tfm);
+	struct crypto_rfc4543_instance_ctx *ictx = aead_instance_ctx(inst);
 	struct crypto_aead_spawn *spawn = &ictx->aead;
-	struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_aead *aead;
 	struct crypto_blkcipher *null;
 	unsigned long align;
@@ -1246,7 +1083,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm)
 	if (IS_ERR(aead))
 		return PTR_ERR(aead);
 
-	null = crypto_spawn_blkcipher(&ictx->null.base);
+	null = crypto_get_default_null_skcipher();
 	err = PTR_ERR(null);
 	if (IS_ERR(null))
 		goto err_free_aead;
@@ -1256,10 +1093,11 @@ static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm)
 
 	align = crypto_aead_alignmask(aead);
 	align &= ~(crypto_tfm_ctx_alignment() - 1);
-	tfm->crt_aead.reqsize = sizeof(struct crypto_rfc4543_req_ctx) +
-				ALIGN(crypto_aead_reqsize(aead),
-				      crypto_tfm_ctx_alignment()) +
-				align + 16;
+	crypto_aead_set_reqsize(
+		tfm,
+		sizeof(struct crypto_rfc4543_req_ctx) +
+		ALIGN(crypto_aead_reqsize(aead), crypto_tfm_ctx_alignment()) +
+		align + 12);
 
 	return 0;
 
@@ -1268,107 +1106,98 @@ err_free_aead:
 	return err;
 }
 
-static void crypto_rfc4543_exit_tfm(struct crypto_tfm *tfm)
+static void crypto_rfc4543_exit_tfm(struct crypto_aead *tfm)
 {
-	struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_aead(ctx->child);
-	crypto_free_blkcipher(ctx->null);
+	crypto_put_default_null_skcipher();
 }
 
-static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb)
+static int crypto_rfc4543_create(struct crypto_template *tmpl,
+				struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
-	struct crypto_instance *inst;
+	struct aead_instance *inst;
 	struct crypto_aead_spawn *spawn;
-	struct crypto_alg *alg;
+	struct aead_alg *alg;
 	struct crypto_rfc4543_instance_ctx *ctx;
 	const char *ccm_name;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
 	if (IS_ERR(algt))
-		return ERR_CAST(algt);
+		return PTR_ERR(algt);
 
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	ccm_name = crypto_attr_alg_name(tb[1]);
 	if (IS_ERR(ccm_name))
-		return ERR_CAST(ccm_name);
+		return PTR_ERR(ccm_name);
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
 	if (!inst)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
-	ctx = crypto_instance_ctx(inst);
+	ctx = aead_instance_ctx(inst);
 	spawn = &ctx->aead;
-	crypto_set_aead_spawn(spawn, inst);
+	crypto_set_aead_spawn(spawn, aead_crypto_instance(inst));
 	err = crypto_grab_aead(spawn, ccm_name, 0,
 			       crypto_requires_sync(algt->type, algt->mask));
 	if (err)
 		goto out_free_inst;
 
-	alg = crypto_aead_spawn_alg(spawn);
-
-	crypto_set_skcipher_spawn(&ctx->null, inst);
-	err = crypto_grab_skcipher(&ctx->null, "ecb(cipher_null)", 0,
-				   CRYPTO_ALG_ASYNC);
-	if (err)
-		goto out_drop_alg;
-
-	crypto_skcipher_spawn_alg(&ctx->null);
+	alg = crypto_spawn_aead_alg(spawn);
 
 	err = -EINVAL;
 
-	/* We only support 16-byte blocks. */
-	if (alg->cra_aead.ivsize != 16)
-		goto out_drop_ecbnull;
+	/* Underlying IV size must be 12. */
+	if (crypto_aead_alg_ivsize(alg) != 12)
+		goto out_drop_alg;
 
 	/* Not a stream cipher? */
-	if (alg->cra_blocksize != 1)
-		goto out_drop_ecbnull;
+	if (alg->base.cra_blocksize != 1)
+		goto out_drop_alg;
 
 	err = -ENAMETOOLONG;
-	if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
-		     "rfc4543(%s)", alg->cra_name) >= CRYPTO_MAX_ALG_NAME ||
-	    snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-		     "rfc4543(%s)", alg->cra_driver_name) >=
+	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+		     "rfc4543(%s)", alg->base.cra_name) >=
+	    CRYPTO_MAX_ALG_NAME ||
+	    snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		     "rfc4543(%s)", alg->base.cra_driver_name) >=
 	    CRYPTO_MAX_ALG_NAME)
-		goto out_drop_ecbnull;
+		goto out_drop_alg;
 
-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD;
-	inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC;
-	inst->alg.cra_priority = alg->cra_priority;
-	inst->alg.cra_blocksize = 1;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
-	inst->alg.cra_type = &crypto_nivaead_type;
+	inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_priority = alg->base.cra_priority;
+	inst->alg.base.cra_blocksize = 1;
+	inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
 
-	inst->alg.cra_aead.ivsize = 8;
-	inst->alg.cra_aead.maxauthsize = 16;
+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_rfc4543_ctx);
 
-	inst->alg.cra_ctxsize = sizeof(struct crypto_rfc4543_ctx);
+	inst->alg.ivsize = 8;
+	inst->alg.maxauthsize = crypto_aead_alg_maxauthsize(alg);
 
-	inst->alg.cra_init = crypto_rfc4543_init_tfm;
-	inst->alg.cra_exit = crypto_rfc4543_exit_tfm;
+	inst->alg.init = crypto_rfc4543_init_tfm;
+	inst->alg.exit = crypto_rfc4543_exit_tfm;
 
-	inst->alg.cra_aead.setkey = crypto_rfc4543_setkey;
-	inst->alg.cra_aead.setauthsize = crypto_rfc4543_setauthsize;
-	inst->alg.cra_aead.encrypt = crypto_rfc4543_encrypt;
-	inst->alg.cra_aead.decrypt = crypto_rfc4543_decrypt;
+	inst->alg.setkey = crypto_rfc4543_setkey;
+	inst->alg.setauthsize = crypto_rfc4543_setauthsize;
+	inst->alg.encrypt = crypto_rfc4543_encrypt;
+	inst->alg.decrypt = crypto_rfc4543_decrypt;
 
-	inst->alg.cra_aead.geniv = "seqiv";
+	err = aead_register_instance(tmpl, inst);
+	if (err)
+		goto out_drop_alg;
 
 out:
-	return inst;
+	return err;
 
-out_drop_ecbnull:
-	crypto_drop_skcipher(&ctx->null);
 out_drop_alg:
 	crypto_drop_aead(spawn);
 out_free_inst:
 	kfree(inst);
-	inst = ERR_PTR(err);
 	goto out;
 }
 
@@ -1377,14 +1206,13 @@ static void crypto_rfc4543_free(struct crypto_instance *inst)
 	struct crypto_rfc4543_instance_ctx *ctx = crypto_instance_ctx(inst);
 
 	crypto_drop_aead(&ctx->aead);
-	crypto_drop_skcipher(&ctx->null);
 
-	kfree(inst);
+	kfree(aead_instance(inst));
 }
 
 static struct crypto_template crypto_rfc4543_tmpl = {
 	.name = "rfc4543",
-	.alloc = crypto_rfc4543_alloc,
+	.create = crypto_rfc4543_create,
 	.free = crypto_rfc4543_free,
 	.module = THIS_MODULE,
 };
@@ -1393,10 +1221,12 @@ static int __init crypto_gcm_module_init(void)
 {
 	int err;
 
-	gcm_zeroes = kzalloc(16, GFP_KERNEL);
+	gcm_zeroes = kzalloc(sizeof(*gcm_zeroes), GFP_KERNEL);
 	if (!gcm_zeroes)
 		return -ENOMEM;
 
+	sg_init_one(&gcm_zeroes->sg, gcm_zeroes->buf, sizeof(gcm_zeroes->buf));
+
 	err = crypto_register_template(&crypto_gcm_base_tmpl);
 	if (err)
 		goto out;

+ 2 - 1
crypto/internal.h

@@ -25,7 +25,6 @@
 #include <linux/notifier.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
-#include <linux/fips.h>
 
 /* Crypto notification events. */
 enum {
@@ -103,6 +102,8 @@ int crypto_register_notifier(struct notifier_block *nb);
 int crypto_unregister_notifier(struct notifier_block *nb);
 int crypto_probing_notify(unsigned long val, void *v);
 
+unsigned int crypto_alg_extsize(struct crypto_alg *alg);
+
 static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg)
 {
 	atomic_inc(&alg->cra_refcnt);

+ 928 - 0
crypto/jitterentropy.c

@@ -0,0 +1,928 @@
+/*
+ * Non-physical true random number generator based on timing jitter.
+ *
+ * Copyright Stephan Mueller <smueller@chronox.de>, 2014
+ *
+ * Design
+ * ======
+ *
+ * See http://www.chronox.de/jent.html
+ *
+ * License
+ * =======
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, and the entire permission notice in its entirety,
+ *    including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior
+ *    written permission.
+ *
+ * ALTERNATIVELY, this product may be distributed under the terms of
+ * the GNU General Public License, in which case the provisions of the GPL2 are
+ * required INSTEAD OF the above restrictions.  (This clause is
+ * necessary due to a potential bad interaction between the GPL and
+ * the restrictions contained in a BSD-style copyright.)
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+/*
+ * This Jitterentropy RNG is based on the jitterentropy library
+ * version 1.1.0 provided at http://www.chronox.de/jent.html
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/fips.h>
+#include <linux/time.h>
+#include <linux/crypto.h>
+#include <crypto/internal/rng.h>
+
+/* The entropy pool */
+struct rand_data {
+	/* all data values that are vital to maintain the security
+	 * of the RNG are marked as SENSITIVE. A user must not
+	 * access that information while the RNG executes its loops to
+	 * calculate the next random value. */
+	__u64 data;		/* SENSITIVE Actual random number */
+	__u64 old_data;		/* SENSITIVE Previous random number */
+	__u64 prev_time;	/* SENSITIVE Previous time stamp */
+#define DATA_SIZE_BITS ((sizeof(__u64)) * 8)
+	__u64 last_delta;	/* SENSITIVE stuck test */
+	__s64 last_delta2;	/* SENSITIVE stuck test */
+	unsigned int stuck:1;	/* Time measurement stuck */
+	unsigned int osr;	/* Oversample rate */
+	unsigned int stir:1;		/* Post-processing stirring */
+	unsigned int disable_unbias:1;	/* Deactivate Von-Neuman unbias */
+#define JENT_MEMORY_BLOCKS 64
+#define JENT_MEMORY_BLOCKSIZE 32
+#define JENT_MEMORY_ACCESSLOOPS 128
+#define JENT_MEMORY_SIZE (JENT_MEMORY_BLOCKS*JENT_MEMORY_BLOCKSIZE)
+	unsigned char *mem;	/* Memory access location with size of
+				 * memblocks * memblocksize */
+	unsigned int memlocation; /* Pointer to byte in *mem */
+	unsigned int memblocks;	/* Number of memory blocks in *mem */
+	unsigned int memblocksize; /* Size of one memory block in bytes */
+	unsigned int memaccessloops; /* Number of memory accesses per random
+				      * bit generation */
+};
+
+/* Flags that can be used to initialize the RNG */
+#define JENT_DISABLE_STIR (1<<0) /* Disable stirring the entropy pool */
+#define JENT_DISABLE_UNBIAS (1<<1) /* Disable the Von-Neuman Unbiaser */
+#define JENT_DISABLE_MEMORY_ACCESS (1<<2) /* Disable memory access for more
+					   * entropy, saves MEMORY_SIZE RAM for
+					   * entropy collector */
+
+#define DRIVER_NAME     "jitterentropy"
+
+/* -- error codes for init function -- */
+#define JENT_ENOTIME		1 /* Timer service not available */
+#define JENT_ECOARSETIME	2 /* Timer too coarse for RNG */
+#define JENT_ENOMONOTONIC	3 /* Timer is not monotonic increasing */
+#define JENT_EMINVARIATION	4 /* Timer variations too small for RNG */
+#define JENT_EVARVAR		5 /* Timer does not produce variations of
+				   * variations (2nd derivation of time is
+				   * zero). */
+#define JENT_EMINVARVAR		6 /* Timer variations of variations is tooi
+				   * small. */
+
+/***************************************************************************
+ * Helper functions
+ ***************************************************************************/
+
+static inline void jent_get_nstime(__u64 *out)
+{
+	struct timespec ts;
+	__u64 tmp = 0;
+
+	tmp = random_get_entropy();
+
+	/*
+	 * If random_get_entropy does not return a value (which is possible on,
+	 * for example, MIPS), invoke __getnstimeofday
+	 * hoping that there are timers we can work with.
+	 *
+	 * The list of available timers can be obtained from
+	 * /sys/devices/system/clocksource/clocksource0/available_clocksource
+	 * and are registered with clocksource_register()
+	 */
+	if ((0 == tmp) &&
+	   (0 == __getnstimeofday(&ts))) {
+		tmp = ts.tv_sec;
+		tmp = tmp << 32;
+		tmp = tmp | ts.tv_nsec;
+	}
+
+	*out = tmp;
+}
+
+
+/**
+ * Update of the loop count used for the next round of
+ * an entropy collection.
+ *
+ * Input:
+ * @ec entropy collector struct -- may be NULL
+ * @bits is the number of low bits of the timer to consider
+ * @min is the number of bits we shift the timer value to the right at
+ *	the end to make sure we have a guaranteed minimum value
+ *
+ * @return Newly calculated loop counter
+ */
+static __u64 jent_loop_shuffle(struct rand_data *ec,
+			       unsigned int bits, unsigned int min)
+{
+	__u64 time = 0;
+	__u64 shuffle = 0;
+	unsigned int i = 0;
+	unsigned int mask = (1<<bits) - 1;
+
+	jent_get_nstime(&time);
+	/*
+	 * mix the current state of the random number into the shuffle
+	 * calculation to balance that shuffle a bit more
+	 */
+	if (ec)
+		time ^= ec->data;
+	/*
+	 * we fold the time value as much as possible to ensure that as many
+	 * bits of the time stamp are included as possible
+	 */
+	for (i = 0; (DATA_SIZE_BITS / bits) > i; i++) {
+		shuffle ^= time & mask;
+		time = time >> bits;
+	}
+
+	/*
+	 * We add a lower boundary value to ensure we have a minimum
+	 * RNG loop count.
+	 */
+	return (shuffle + (1<<min));
+}
+
+/***************************************************************************
+ * Noise sources
+ ***************************************************************************/
+
+/*
+ * The disabling of the optimizations is performed as documented and assessed
+ * thoroughly in http://www.chronox.de/jent.html. However, instead of disabling
+ * the optimization of the entire C file, only the main functions the jitter is
+ * measured for are not optimized. These functions include the noise sources as
+ * well as the main functions triggering the noise sources. As the time
+ * measurement is done from one invocation of the jitter noise source to the
+ * next, even the execution jitter of the code invoking the noise sources
+ * contribute to the overall randomness as well. The behavior of the RNG and the
+ * statistical characteristics when only the mentioned functions are not
+ * optimized is almost equal to the a completely non-optimized RNG compilation
+ * as tested with the test tools provided at the initially mentioned web site.
+ */
+
+/**
+ * CPU Jitter noise source -- this is the noise source based on the CPU
+ *			      execution time jitter
+ *
+ * This function folds the time into one bit units by iterating
+ * through the DATA_SIZE_BITS bit time value as follows: assume our time value
+ * is 0xabcd
+ * 1st loop, 1st shift generates 0xd000
+ * 1st loop, 2nd shift generates 0x000d
+ * 2nd loop, 1st shift generates 0xcd00
+ * 2nd loop, 2nd shift generates 0x000c
+ * 3rd loop, 1st shift generates 0xbcd0
+ * 3rd loop, 2nd shift generates 0x000b
+ * 4th loop, 1st shift generates 0xabcd
+ * 4th loop, 2nd shift generates 0x000a
+ * Now, the values at the end of the 2nd shifts are XORed together.
+ *
+ * The code is deliberately inefficient and shall stay that way. This function
+ * is the root cause why the code shall be compiled without optimization. This
+ * function not only acts as folding operation, but this function's execution
+ * is used to measure the CPU execution time jitter. Any change to the loop in
+ * this function implies that careful retesting must be done.
+ *
+ * Input:
+ * @ec entropy collector struct -- may be NULL
+ * @time time stamp to be folded
+ * @loop_cnt if a value not equal to 0 is set, use the given value as number of
+ *	     loops to perform the folding
+ *
+ * Output:
+ * @folded result of folding operation
+ *
+ * @return Number of loops the folding operation is performed
+ */
+#pragma GCC push_options
+#pragma GCC optimize ("-O0")
+static __u64 jent_fold_time(struct rand_data *ec, __u64 time,
+			    __u64 *folded, __u64 loop_cnt)
+{
+	unsigned int i;
+	__u64 j = 0;
+	__u64 new = 0;
+#define MAX_FOLD_LOOP_BIT 4
+#define MIN_FOLD_LOOP_BIT 0
+	__u64 fold_loop_cnt =
+		jent_loop_shuffle(ec, MAX_FOLD_LOOP_BIT, MIN_FOLD_LOOP_BIT);
+
+	/*
+	 * testing purposes -- allow test app to set the counter, not
+	 * needed during runtime
+	 */
+	if (loop_cnt)
+		fold_loop_cnt = loop_cnt;
+	for (j = 0; j < fold_loop_cnt; j++) {
+		new = 0;
+		for (i = 1; (DATA_SIZE_BITS) >= i; i++) {
+			__u64 tmp = time << (DATA_SIZE_BITS - i);
+
+			tmp = tmp >> (DATA_SIZE_BITS - 1);
+			new ^= tmp;
+		}
+	}
+	*folded = new;
+	return fold_loop_cnt;
+}
+#pragma GCC pop_options
+
+/**
+ * Memory Access noise source -- this is a noise source based on variations in
+ *				 memory access times
+ *
+ * This function performs memory accesses which will add to the timing
+ * variations due to an unknown amount of CPU wait states that need to be
+ * added when accessing memory. The memory size should be larger than the L1
+ * caches as outlined in the documentation and the associated testing.
+ *
+ * The L1 cache has a very high bandwidth, albeit its access rate is  usually
+ * slower than accessing CPU registers. Therefore, L1 accesses only add minimal
+ * variations as the CPU has hardly to wait. Starting with L2, significant
+ * variations are added because L2 typically does not belong to the CPU any more
+ * and therefore a wider range of CPU wait states is necessary for accesses.
+ * L3 and real memory accesses have even a wider range of wait states. However,
+ * to reliably access either L3 or memory, the ec->mem memory must be quite
+ * large which is usually not desirable.
+ *
+ * Input:
+ * @ec Reference to the entropy collector with the memory access data -- if
+ *     the reference to the memory block to be accessed is NULL, this noise
+ *     source is disabled
+ * @loop_cnt if a value not equal to 0 is set, use the given value as number of
+ *	     loops to perform the folding
+ *
+ * @return Number of memory access operations
+ */
+#pragma GCC push_options
+#pragma GCC optimize ("-O0")
+static unsigned int jent_memaccess(struct rand_data *ec, __u64 loop_cnt)
+{
+	unsigned char *tmpval = NULL;
+	unsigned int wrap = 0;
+	__u64 i = 0;
+#define MAX_ACC_LOOP_BIT 7
+#define MIN_ACC_LOOP_BIT 0
+	__u64 acc_loop_cnt =
+		jent_loop_shuffle(ec, MAX_ACC_LOOP_BIT, MIN_ACC_LOOP_BIT);
+
+	if (NULL == ec || NULL == ec->mem)
+		return 0;
+	wrap = ec->memblocksize * ec->memblocks;
+
+	/*
+	 * testing purposes -- allow test app to set the counter, not
+	 * needed during runtime
+	 */
+	if (loop_cnt)
+		acc_loop_cnt = loop_cnt;
+
+	for (i = 0; i < (ec->memaccessloops + acc_loop_cnt); i++) {
+		tmpval = ec->mem + ec->memlocation;
+		/*
+		 * memory access: just add 1 to one byte,
+		 * wrap at 255 -- memory access implies read
+		 * from and write to memory location
+		 */
+		*tmpval = (*tmpval + 1) & 0xff;
+		/*
+		 * Addition of memblocksize - 1 to pointer
+		 * with wrap around logic to ensure that every
+		 * memory location is hit evenly
+		 */
+		ec->memlocation = ec->memlocation + ec->memblocksize - 1;
+		ec->memlocation = ec->memlocation % wrap;
+	}
+	return i;
+}
+#pragma GCC pop_options
+
+/***************************************************************************
+ * Start of entropy processing logic
+ ***************************************************************************/
+
+/**
+ * Stuck test by checking the:
+ *	1st derivation of the jitter measurement (time delta)
+ *	2nd derivation of the jitter measurement (delta of time deltas)
+ *	3rd derivation of the jitter measurement (delta of delta of time deltas)
+ *
+ * All values must always be non-zero.
+ *
+ * Input:
+ * @ec Reference to entropy collector
+ * @current_delta Jitter time delta
+ *
+ * @return
+ *	0 jitter measurement not stuck (good bit)
+ *	1 jitter measurement stuck (reject bit)
+ */
+static void jent_stuck(struct rand_data *ec, __u64 current_delta)
+{
+	__s64 delta2 = ec->last_delta - current_delta;
+	__s64 delta3 = delta2 - ec->last_delta2;
+
+	ec->last_delta = current_delta;
+	ec->last_delta2 = delta2;
+
+	if (!current_delta || !delta2 || !delta3)
+		ec->stuck = 1;
+}
+
+/**
+ * This is the heart of the entropy generation: calculate time deltas and
+ * use the CPU jitter in the time deltas. The jitter is folded into one
+ * bit. You can call this function the "random bit generator" as it
+ * produces one random bit per invocation.
+ *
+ * WARNING: ensure that ->prev_time is primed before using the output
+ *	    of this function! This can be done by calling this function
+ *	    and not using its result.
+ *
+ * Input:
+ * @entropy_collector Reference to entropy collector
+ *
+ * @return One random bit
+ */
+#pragma GCC push_options
+#pragma GCC optimize ("-O0")
+static __u64 jent_measure_jitter(struct rand_data *ec)
+{
+	__u64 time = 0;
+	__u64 data = 0;
+	__u64 current_delta = 0;
+
+	/* Invoke one noise source before time measurement to add variations */
+	jent_memaccess(ec, 0);
+
+	/*
+	 * Get time stamp and calculate time delta to previous
+	 * invocation to measure the timing variations
+	 */
+	jent_get_nstime(&time);
+	current_delta = time - ec->prev_time;
+	ec->prev_time = time;
+
+	/* Now call the next noise sources which also folds the data */
+	jent_fold_time(ec, current_delta, &data, 0);
+
+	/*
+	 * Check whether we have a stuck measurement. The enforcement
+	 * is performed after the stuck value has been mixed into the
+	 * entropy pool.
+	 */
+	jent_stuck(ec, current_delta);
+
+	return data;
+}
+#pragma GCC pop_options
+
+/**
+ * Von Neuman unbias as explained in RFC 4086 section 4.2. As shown in the
+ * documentation of that RNG, the bits from jent_measure_jitter are considered
+ * independent which implies that the Von Neuman unbias operation is applicable.
+ * A proof of the Von-Neumann unbias operation to remove skews is given in the
+ * document "A proposal for: Functionality classes for random number
+ * generators", version 2.0 by Werner Schindler, section 5.4.1.
+ *
+ * Input:
+ * @entropy_collector Reference to entropy collector
+ *
+ * @return One random bit
+ */
+static __u64 jent_unbiased_bit(struct rand_data *entropy_collector)
+{
+	do {
+		__u64 a = jent_measure_jitter(entropy_collector);
+		__u64 b = jent_measure_jitter(entropy_collector);
+
+		if (a == b)
+			continue;
+		if (1 == a)
+			return 1;
+		else
+			return 0;
+	} while (1);
+}
+
+/**
+ * Shuffle the pool a bit by mixing some value with a bijective function (XOR)
+ * into the pool.
+ *
+ * The function generates a mixer value that depends on the bits set and the
+ * location of the set bits in the random number generated by the entropy
+ * source. Therefore, based on the generated random number, this mixer value
+ * can have 2**64 different values. That mixer value is initialized with the
+ * first two SHA-1 constants. After obtaining the mixer value, it is XORed into
+ * the random number.
+ *
+ * The mixer value is not assumed to contain any entropy. But due to the XOR
+ * operation, it can also not destroy any entropy present in the entropy pool.
+ *
+ * Input:
+ * @entropy_collector Reference to entropy collector
+ */
+static void jent_stir_pool(struct rand_data *entropy_collector)
+{
+	/*
+	 * to shut up GCC on 32 bit, we have to initialize the 64 variable
+	 * with two 32 bit variables
+	 */
+	union c {
+		__u64 u64;
+		__u32 u32[2];
+	};
+	/*
+	 * This constant is derived from the first two 32 bit initialization
+	 * vectors of SHA-1 as defined in FIPS 180-4 section 5.3.1
+	 */
+	union c constant;
+	/*
+	 * The start value of the mixer variable is derived from the third
+	 * and fourth 32 bit initialization vector of SHA-1 as defined in
+	 * FIPS 180-4 section 5.3.1
+	 */
+	union c mixer;
+	unsigned int i = 0;
+
+	/*
+	 * Store the SHA-1 constants in reverse order to make up the 64 bit
+	 * value -- this applies to a little endian system, on a big endian
+	 * system, it reverses as expected. But this really does not matter
+	 * as we do not rely on the specific numbers. We just pick the SHA-1
+	 * constants as they have a good mix of bit set and unset.
+	 */
+	constant.u32[1] = 0x67452301;
+	constant.u32[0] = 0xefcdab89;
+	mixer.u32[1] = 0x98badcfe;
+	mixer.u32[0] = 0x10325476;
+
+	for (i = 0; i < DATA_SIZE_BITS; i++) {
+		/*
+		 * get the i-th bit of the input random number and only XOR
+		 * the constant into the mixer value when that bit is set
+		 */
+		if ((entropy_collector->data >> i) & 1)
+			mixer.u64 ^= constant.u64;
+		mixer.u64 = rol64(mixer.u64, 1);
+	}
+	entropy_collector->data ^= mixer.u64;
+}
+
+/**
+ * Generator of one 64 bit random number
+ * Function fills rand_data->data
+ *
+ * Input:
+ * @ec Reference to entropy collector
+ */
+#pragma GCC push_options
+#pragma GCC optimize ("-O0")
+static void jent_gen_entropy(struct rand_data *ec)
+{
+	unsigned int k = 0;
+
+	/* priming of the ->prev_time value */
+	jent_measure_jitter(ec);
+
+	while (1) {
+		__u64 data = 0;
+
+		if (ec->disable_unbias == 1)
+			data = jent_measure_jitter(ec);
+		else
+			data = jent_unbiased_bit(ec);
+
+		/* enforcement of the jent_stuck test */
+		if (ec->stuck) {
+			/*
+			 * We only mix in the bit considered not appropriate
+			 * without the LSFR. The reason is that if we apply
+			 * the LSFR and we do not rotate, the 2nd bit with LSFR
+			 * will cancel out the first LSFR application on the
+			 * bad bit.
+			 *
+			 * And we do not rotate as we apply the next bit to the
+			 * current bit location again.
+			 */
+			ec->data ^= data;
+			ec->stuck = 0;
+			continue;
+		}
+
+		/*
+		 * Fibonacci LSFR with polynom of
+		 *  x^64 + x^61 + x^56 + x^31 + x^28 + x^23 + 1 which is
+		 *  primitive according to
+		 *   http://poincare.matf.bg.ac.rs/~ezivkovm/publications/primpol1.pdf
+		 * (the shift values are the polynom values minus one
+		 * due to counting bits from 0 to 63). As the current
+		 * position is always the LSB, the polynom only needs
+		 * to shift data in from the left without wrap.
+		 */
+		ec->data ^= data;
+		ec->data ^= ((ec->data >> 63) & 1);
+		ec->data ^= ((ec->data >> 60) & 1);
+		ec->data ^= ((ec->data >> 55) & 1);
+		ec->data ^= ((ec->data >> 30) & 1);
+		ec->data ^= ((ec->data >> 27) & 1);
+		ec->data ^= ((ec->data >> 22) & 1);
+		ec->data = rol64(ec->data, 1);
+
+		/*
+		 * We multiply the loop value with ->osr to obtain the
+		 * oversampling rate requested by the caller
+		 */
+		if (++k >= (DATA_SIZE_BITS * ec->osr))
+			break;
+	}
+	if (ec->stir)
+		jent_stir_pool(ec);
+}
+#pragma GCC pop_options
+
+/**
+ * The continuous test required by FIPS 140-2 -- the function automatically
+ * primes the test if needed.
+ *
+ * Return:
+ * 0 if FIPS test passed
+ * < 0 if FIPS test failed
+ */
+static void jent_fips_test(struct rand_data *ec)
+{
+	if (!fips_enabled)
+		return;
+
+	/* prime the FIPS test */
+	if (!ec->old_data) {
+		ec->old_data = ec->data;
+		jent_gen_entropy(ec);
+	}
+
+	if (ec->data == ec->old_data)
+		panic(DRIVER_NAME ": Duplicate output detected\n");
+
+	ec->old_data = ec->data;
+}
+
+
+/**
+ * Entry function: Obtain entropy for the caller.
+ *
+ * This function invokes the entropy gathering logic as often to generate
+ * as many bytes as requested by the caller. The entropy gathering logic
+ * creates 64 bit per invocation.
+ *
+ * This function truncates the last 64 bit entropy value output to the exact
+ * size specified by the caller.
+ *
+ * Input:
+ * @ec Reference to entropy collector
+ * @data pointer to buffer for storing random data -- buffer must already
+ *	 exist
+ * @len size of the buffer, specifying also the requested number of random
+ *	in bytes
+ *
+ * @return 0 when request is fulfilled or an error
+ *
+ * The following error codes can occur:
+ *	-1	entropy_collector is NULL
+ */
+static ssize_t jent_read_entropy(struct rand_data *ec, u8 *data, size_t len)
+{
+	u8 *p = data;
+
+	if (!ec)
+		return -EINVAL;
+
+	while (0 < len) {
+		size_t tocopy;
+
+		jent_gen_entropy(ec);
+		jent_fips_test(ec);
+		if ((DATA_SIZE_BITS / 8) < len)
+			tocopy = (DATA_SIZE_BITS / 8);
+		else
+			tocopy = len;
+		memcpy(p, &ec->data, tocopy);
+
+		len -= tocopy;
+		p += tocopy;
+	}
+
+	return 0;
+}
+
+/***************************************************************************
+ * Initialization logic
+ ***************************************************************************/
+
+static struct rand_data *jent_entropy_collector_alloc(unsigned int osr,
+						      unsigned int flags)
+{
+	struct rand_data *entropy_collector;
+
+	entropy_collector = kzalloc(sizeof(struct rand_data), GFP_KERNEL);
+	if (!entropy_collector)
+		return NULL;
+
+	if (!(flags & JENT_DISABLE_MEMORY_ACCESS)) {
+		/* Allocate memory for adding variations based on memory
+		 * access
+		 */
+		entropy_collector->mem = kzalloc(JENT_MEMORY_SIZE, GFP_KERNEL);
+		if (!entropy_collector->mem) {
+			kfree(entropy_collector);
+			return NULL;
+		}
+		entropy_collector->memblocksize = JENT_MEMORY_BLOCKSIZE;
+		entropy_collector->memblocks = JENT_MEMORY_BLOCKS;
+		entropy_collector->memaccessloops = JENT_MEMORY_ACCESSLOOPS;
+	}
+
+	/* verify and set the oversampling rate */
+	if (0 == osr)
+		osr = 1; /* minimum sampling rate is 1 */
+	entropy_collector->osr = osr;
+
+	entropy_collector->stir = 1;
+	if (flags & JENT_DISABLE_STIR)
+		entropy_collector->stir = 0;
+	if (flags & JENT_DISABLE_UNBIAS)
+		entropy_collector->disable_unbias = 1;
+
+	/* fill the data pad with non-zero values */
+	jent_gen_entropy(entropy_collector);
+
+	return entropy_collector;
+}
+
+static void jent_entropy_collector_free(struct rand_data *entropy_collector)
+{
+	if (entropy_collector->mem)
+		kzfree(entropy_collector->mem);
+	entropy_collector->mem = NULL;
+	if (entropy_collector)
+		kzfree(entropy_collector);
+	entropy_collector = NULL;
+}
+
+static int jent_entropy_init(void)
+{
+	int i;
+	__u64 delta_sum = 0;
+	__u64 old_delta = 0;
+	int time_backwards = 0;
+	int count_var = 0;
+	int count_mod = 0;
+
+	/* We could perform statistical tests here, but the problem is
+	 * that we only have a few loop counts to do testing. These
+	 * loop counts may show some slight skew and we produce
+	 * false positives.
+	 *
+	 * Moreover, only old systems show potentially problematic
+	 * jitter entropy that could potentially be caught here. But
+	 * the RNG is intended for hardware that is available or widely
+	 * used, but not old systems that are long out of favor. Thus,
+	 * no statistical tests.
+	 */
+
+	/*
+	 * We could add a check for system capabilities such as clock_getres or
+	 * check for CONFIG_X86_TSC, but it does not make much sense as the
+	 * following sanity checks verify that we have a high-resolution
+	 * timer.
+	 */
+	/*
+	 * TESTLOOPCOUNT needs some loops to identify edge systems. 100 is
+	 * definitely too little.
+	 */
+#define TESTLOOPCOUNT 300
+#define CLEARCACHE 100
+	for (i = 0; (TESTLOOPCOUNT + CLEARCACHE) > i; i++) {
+		__u64 time = 0;
+		__u64 time2 = 0;
+		__u64 folded = 0;
+		__u64 delta = 0;
+		unsigned int lowdelta = 0;
+
+		jent_get_nstime(&time);
+		jent_fold_time(NULL, time, &folded, 1<<MIN_FOLD_LOOP_BIT);
+		jent_get_nstime(&time2);
+
+		/* test whether timer works */
+		if (!time || !time2)
+			return JENT_ENOTIME;
+		delta = time2 - time;
+		/*
+		 * test whether timer is fine grained enough to provide
+		 * delta even when called shortly after each other -- this
+		 * implies that we also have a high resolution timer
+		 */
+		if (!delta)
+			return JENT_ECOARSETIME;
+
+		/*
+		 * up to here we did not modify any variable that will be
+		 * evaluated later, but we already performed some work. Thus we
+		 * already have had an impact on the caches, branch prediction,
+		 * etc. with the goal to clear it to get the worst case
+		 * measurements.
+		 */
+		if (CLEARCACHE > i)
+			continue;
+
+		/* test whether we have an increasing timer */
+		if (!(time2 > time))
+			time_backwards++;
+
+		/*
+		 * Avoid modulo of 64 bit integer to allow code to compile
+		 * on 32 bit architectures.
+		 */
+		lowdelta = time2 - time;
+		if (!(lowdelta % 100))
+			count_mod++;
+
+		/*
+		 * ensure that we have a varying delta timer which is necessary
+		 * for the calculation of entropy -- perform this check
+		 * only after the first loop is executed as we need to prime
+		 * the old_data value
+		 */
+		if (i) {
+			if (delta != old_delta)
+				count_var++;
+			if (delta > old_delta)
+				delta_sum += (delta - old_delta);
+			else
+				delta_sum += (old_delta - delta);
+		}
+		old_delta = delta;
+	}
+
+	/*
+	 * we allow up to three times the time running backwards.
+	 * CLOCK_REALTIME is affected by adjtime and NTP operations. Thus,
+	 * if such an operation just happens to interfere with our test, it
+	 * should not fail. The value of 3 should cover the NTP case being
+	 * performed during our test run.
+	 */
+	if (3 < time_backwards)
+		return JENT_ENOMONOTONIC;
+	/* Error if the time variances are always identical */
+	if (!delta_sum)
+		return JENT_EVARVAR;
+
+	/*
+	 * Variations of deltas of time must on average be larger
+	 * than 1 to ensure the entropy estimation
+	 * implied with 1 is preserved
+	 */
+	if (delta_sum <= 1)
+		return JENT_EMINVARVAR;
+
+	/*
+	 * Ensure that we have variations in the time stamp below 10 for at
+	 * least 10% of all checks -- on some platforms, the counter
+	 * increments in multiples of 100, but not always
+	 */
+	if ((TESTLOOPCOUNT/10 * 9) < count_mod)
+		return JENT_ECOARSETIME;
+
+	return 0;
+}
+
+/***************************************************************************
+ * Kernel crypto API interface
+ ***************************************************************************/
+
+struct jitterentropy {
+	spinlock_t jent_lock;
+	struct rand_data *entropy_collector;
+};
+
+static int jent_kcapi_init(struct crypto_tfm *tfm)
+{
+	struct jitterentropy *rng = crypto_tfm_ctx(tfm);
+	int ret = 0;
+
+	rng->entropy_collector = jent_entropy_collector_alloc(1, 0);
+	if (!rng->entropy_collector)
+		ret = -ENOMEM;
+
+	spin_lock_init(&rng->jent_lock);
+	return ret;
+}
+
+static void jent_kcapi_cleanup(struct crypto_tfm *tfm)
+{
+	struct jitterentropy *rng = crypto_tfm_ctx(tfm);
+
+	spin_lock(&rng->jent_lock);
+	if (rng->entropy_collector)
+		jent_entropy_collector_free(rng->entropy_collector);
+	rng->entropy_collector = NULL;
+	spin_unlock(&rng->jent_lock);
+}
+
+static int jent_kcapi_random(struct crypto_rng *tfm,
+			     const u8 *src, unsigned int slen,
+			     u8 *rdata, unsigned int dlen)
+{
+	struct jitterentropy *rng = crypto_rng_ctx(tfm);
+	int ret = 0;
+
+	spin_lock(&rng->jent_lock);
+	ret = jent_read_entropy(rng->entropy_collector, rdata, dlen);
+	spin_unlock(&rng->jent_lock);
+
+	return ret;
+}
+
+static int jent_kcapi_reset(struct crypto_rng *tfm,
+			    const u8 *seed, unsigned int slen)
+{
+	return 0;
+}
+
+static struct rng_alg jent_alg = {
+	.generate		= jent_kcapi_random,
+	.seed			= jent_kcapi_reset,
+	.seedsize		= 0,
+	.base			= {
+		.cra_name               = "jitterentropy_rng",
+		.cra_driver_name        = "jitterentropy_rng",
+		.cra_priority           = 100,
+		.cra_ctxsize            = sizeof(struct jitterentropy),
+		.cra_module             = THIS_MODULE,
+		.cra_init               = jent_kcapi_init,
+		.cra_exit               = jent_kcapi_cleanup,
+
+	}
+};
+
+static int __init jent_mod_init(void)
+{
+	int ret = 0;
+
+	ret = jent_entropy_init();
+	if (ret) {
+		pr_info(DRIVER_NAME ": Initialization failed with host not compliant with requirements: %d\n", ret);
+		return -EFAULT;
+	}
+	return crypto_register_rng(&jent_alg);
+}
+
+static void __exit jent_mod_exit(void)
+{
+	crypto_unregister_rng(&jent_alg);
+}
+
+module_init(jent_mod_init);
+module_exit(jent_mod_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Stephan Mueller <smueller@chronox.de>");
+MODULE_DESCRIPTION("Non-physical True Random Number Generator based on CPU Jitter");
+MODULE_ALIAS_CRYPTO("jitterentropy_rng");

+ 0 - 66
crypto/krng.c

@@ -1,66 +0,0 @@
-/*
- * RNG implementation using standard kernel RNG.
- *
- * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * any later version.
- *
- */
-
-#include <crypto/internal/rng.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/random.h>
-
-static int krng_get_random(struct crypto_rng *tfm, u8 *rdata, unsigned int dlen)
-{
-	get_random_bytes(rdata, dlen);
-	return 0;
-}
-
-static int krng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
-{
-	return 0;
-}
-
-static struct crypto_alg krng_alg = {
-	.cra_name		= "stdrng",
-	.cra_driver_name	= "krng",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_RNG,
-	.cra_ctxsize		= 0,
-	.cra_type		= &crypto_rng_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u			= {
-		.rng = {
-			.rng_make_random	= krng_get_random,
-			.rng_reset		= krng_reset,
-			.seedsize		= 0,
-		}
-	}
-};
-
-
-/* Module initalization */
-static int __init krng_mod_init(void)
-{
-	return crypto_register_alg(&krng_alg);
-}
-
-static void __exit krng_mod_fini(void)
-{
-	crypto_unregister_alg(&krng_alg);
-	return;
-}
-
-module_init(krng_mod_init);
-module_exit(krng_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Kernel Random Number Generator");
-MODULE_ALIAS_CRYPTO("stdrng");
-MODULE_ALIAS_CRYPTO("krng");

+ 4 - 4
crypto/md5.c

@@ -51,10 +51,10 @@ static int md5_init(struct shash_desc *desc)
 {
 	struct md5_state *mctx = shash_desc_ctx(desc);
 
-	mctx->hash[0] = 0x67452301;
-	mctx->hash[1] = 0xefcdab89;
-	mctx->hash[2] = 0x98badcfe;
-	mctx->hash[3] = 0x10325476;
+	mctx->hash[0] = MD5_H0;
+	mctx->hash[1] = MD5_H1;
+	mctx->hash[2] = MD5_H2;
+	mctx->hash[3] = MD5_H3;
 	mctx->byte_count = 0;
 
 	return 0;

+ 1 - 6
crypto/pcompress.c

@@ -38,11 +38,6 @@ static int crypto_pcomp_init(struct crypto_tfm *tfm, u32 type, u32 mask)
 	return 0;
 }
 
-static unsigned int crypto_pcomp_extsize(struct crypto_alg *alg)
-{
-	return alg->cra_ctxsize;
-}
-
 static int crypto_pcomp_init_tfm(struct crypto_tfm *tfm)
 {
 	return 0;
@@ -77,7 +72,7 @@ static void crypto_pcomp_show(struct seq_file *m, struct crypto_alg *alg)
 }
 
 static const struct crypto_type crypto_pcomp_type = {
-	.extsize	= crypto_pcomp_extsize,
+	.extsize	= crypto_alg_extsize,
 	.init		= crypto_pcomp_init,
 	.init_tfm	= crypto_pcomp_init_tfm,
 #ifdef CONFIG_PROC_FS

+ 72 - 127
crypto/pcrypt.c

@@ -20,6 +20,7 @@
 
 #include <crypto/algapi.h>
 #include <crypto/internal/aead.h>
+#include <linux/atomic.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -60,8 +61,8 @@ static struct padata_pcrypt pdecrypt;
 static struct kset           *pcrypt_kset;
 
 struct pcrypt_instance_ctx {
-	struct crypto_spawn spawn;
-	unsigned int tfm_count;
+	struct crypto_aead_spawn spawn;
+	atomic_t tfm_count;
 };
 
 struct pcrypt_aead_ctx {
@@ -122,14 +123,6 @@ static void pcrypt_aead_serial(struct padata_priv *padata)
 	aead_request_complete(req->base.data, padata->info);
 }
 
-static void pcrypt_aead_giv_serial(struct padata_priv *padata)
-{
-	struct pcrypt_request *preq = pcrypt_padata_request(padata);
-	struct aead_givcrypt_request *req = pcrypt_request_ctx(preq);
-
-	aead_request_complete(req->areq.base.data, padata->info);
-}
-
 static void pcrypt_aead_done(struct crypto_async_request *areq, int err)
 {
 	struct aead_request *req = areq->data;
@@ -175,7 +168,7 @@ static int pcrypt_aead_encrypt(struct aead_request *req)
 				  pcrypt_aead_done, req);
 	aead_request_set_crypt(creq, req->src, req->dst,
 			       req->cryptlen, req->iv);
-	aead_request_set_assoc(creq, req->assoc, req->assoclen);
+	aead_request_set_ad(creq, req->assoclen);
 
 	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pencrypt);
 	if (!err)
@@ -217,7 +210,7 @@ static int pcrypt_aead_decrypt(struct aead_request *req)
 				  pcrypt_aead_done, req);
 	aead_request_set_crypt(creq, req->src, req->dst,
 			       req->cryptlen, req->iv);
-	aead_request_set_assoc(creq, req->assoc, req->assoclen);
+	aead_request_set_ad(creq, req->assoclen);
 
 	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pdecrypt);
 	if (!err)
@@ -226,182 +219,134 @@ static int pcrypt_aead_decrypt(struct aead_request *req)
 	return err;
 }
 
-static void pcrypt_aead_givenc(struct padata_priv *padata)
-{
-	struct pcrypt_request *preq = pcrypt_padata_request(padata);
-	struct aead_givcrypt_request *req = pcrypt_request_ctx(preq);
-
-	padata->info = crypto_aead_givencrypt(req);
-
-	if (padata->info == -EINPROGRESS)
-		return;
-
-	padata_do_serial(padata);
-}
-
-static int pcrypt_aead_givencrypt(struct aead_givcrypt_request *req)
-{
-	int err;
-	struct aead_request *areq = &req->areq;
-	struct pcrypt_request *preq = aead_request_ctx(areq);
-	struct aead_givcrypt_request *creq = pcrypt_request_ctx(preq);
-	struct padata_priv *padata = pcrypt_request_padata(preq);
-	struct crypto_aead *aead = aead_givcrypt_reqtfm(req);
-	struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(aead);
-	u32 flags = aead_request_flags(areq);
-
-	memset(padata, 0, sizeof(struct padata_priv));
-
-	padata->parallel = pcrypt_aead_givenc;
-	padata->serial = pcrypt_aead_giv_serial;
-
-	aead_givcrypt_set_tfm(creq, ctx->child);
-	aead_givcrypt_set_callback(creq, flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
-				   pcrypt_aead_done, areq);
-	aead_givcrypt_set_crypt(creq, areq->src, areq->dst,
-				areq->cryptlen, areq->iv);
-	aead_givcrypt_set_assoc(creq, areq->assoc, areq->assoclen);
-	aead_givcrypt_set_giv(creq, req->giv, req->seq);
-
-	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pencrypt);
-	if (!err)
-		return -EINPROGRESS;
-
-	return err;
-}
-
-static int pcrypt_aead_init_tfm(struct crypto_tfm *tfm)
+static int pcrypt_aead_init_tfm(struct crypto_aead *tfm)
 {
 	int cpu, cpu_index;
-	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
-	struct pcrypt_instance_ctx *ictx = crypto_instance_ctx(inst);
-	struct pcrypt_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aead_instance *inst = aead_alg_instance(tfm);
+	struct pcrypt_instance_ctx *ictx = aead_instance_ctx(inst);
+	struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_aead *cipher;
 
-	ictx->tfm_count++;
-
-	cpu_index = ictx->tfm_count % cpumask_weight(cpu_online_mask);
+	cpu_index = (unsigned int)atomic_inc_return(&ictx->tfm_count) %
+		    cpumask_weight(cpu_online_mask);
 
 	ctx->cb_cpu = cpumask_first(cpu_online_mask);
 	for (cpu = 0; cpu < cpu_index; cpu++)
 		ctx->cb_cpu = cpumask_next(ctx->cb_cpu, cpu_online_mask);
 
-	cipher = crypto_spawn_aead(crypto_instance_ctx(inst));
+	cipher = crypto_spawn_aead(&ictx->spawn);
 
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
 	ctx->child = cipher;
-	tfm->crt_aead.reqsize = sizeof(struct pcrypt_request)
-		+ sizeof(struct aead_givcrypt_request)
-		+ crypto_aead_reqsize(cipher);
+	crypto_aead_set_reqsize(tfm, sizeof(struct pcrypt_request) +
+				     sizeof(struct aead_request) +
+				     crypto_aead_reqsize(cipher));
 
 	return 0;
 }
 
-static void pcrypt_aead_exit_tfm(struct crypto_tfm *tfm)
+static void pcrypt_aead_exit_tfm(struct crypto_aead *tfm)
 {
-	struct pcrypt_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_aead(ctx->child);
 }
 
-static struct crypto_instance *pcrypt_alloc_instance(struct crypto_alg *alg)
+static int pcrypt_init_instance(struct crypto_instance *inst,
+				struct crypto_alg *alg)
 {
-	struct crypto_instance *inst;
-	struct pcrypt_instance_ctx *ctx;
-	int err;
-
-	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
-	if (!inst) {
-		inst = ERR_PTR(-ENOMEM);
-		goto out;
-	}
-
-	err = -ENAMETOOLONG;
 	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
 		     "pcrypt(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
-		goto out_free_inst;
+		return -ENAMETOOLONG;
 
 	memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
 
-	ctx = crypto_instance_ctx(inst);
-	err = crypto_init_spawn(&ctx->spawn, alg, inst,
-				CRYPTO_ALG_TYPE_MASK);
-	if (err)
-		goto out_free_inst;
-
 	inst->alg.cra_priority = alg->cra_priority + 100;
 	inst->alg.cra_blocksize = alg->cra_blocksize;
 	inst->alg.cra_alignmask = alg->cra_alignmask;
 
-out:
-	return inst;
-
-out_free_inst:
-	kfree(inst);
-	inst = ERR_PTR(err);
-	goto out;
+	return 0;
 }
 
-static struct crypto_instance *pcrypt_alloc_aead(struct rtattr **tb,
-						 u32 type, u32 mask)
+static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
+			      u32 type, u32 mask)
 {
-	struct crypto_instance *inst;
-	struct crypto_alg *alg;
+	struct pcrypt_instance_ctx *ctx;
+	struct aead_instance *inst;
+	struct aead_alg *alg;
+	const char *name;
+	int err;
+
+	name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(name))
+		return PTR_ERR(name);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	ctx = aead_instance_ctx(inst);
+	crypto_set_aead_spawn(&ctx->spawn, aead_crypto_instance(inst));
+
+	err = crypto_grab_aead(&ctx->spawn, name, 0, 0);
+	if (err)
+		goto out_free_inst;
 
-	alg = crypto_get_attr_alg(tb, type, (mask & CRYPTO_ALG_TYPE_MASK));
-	if (IS_ERR(alg))
-		return ERR_CAST(alg);
+	alg = crypto_spawn_aead_alg(&ctx->spawn);
+	err = pcrypt_init_instance(aead_crypto_instance(inst), &alg->base);
+	if (err)
+		goto out_drop_aead;
 
-	inst = pcrypt_alloc_instance(alg);
-	if (IS_ERR(inst))
-		goto out_put_alg;
+	inst->alg.ivsize = crypto_aead_alg_ivsize(alg);
+	inst->alg.maxauthsize = crypto_aead_alg_maxauthsize(alg);
 
-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
-	inst->alg.cra_type = &crypto_aead_type;
+	inst->alg.base.cra_ctxsize = sizeof(struct pcrypt_aead_ctx);
 
-	inst->alg.cra_aead.ivsize = alg->cra_aead.ivsize;
-	inst->alg.cra_aead.geniv = alg->cra_aead.geniv;
-	inst->alg.cra_aead.maxauthsize = alg->cra_aead.maxauthsize;
+	inst->alg.init = pcrypt_aead_init_tfm;
+	inst->alg.exit = pcrypt_aead_exit_tfm;
 
-	inst->alg.cra_ctxsize = sizeof(struct pcrypt_aead_ctx);
+	inst->alg.setkey = pcrypt_aead_setkey;
+	inst->alg.setauthsize = pcrypt_aead_setauthsize;
+	inst->alg.encrypt = pcrypt_aead_encrypt;
+	inst->alg.decrypt = pcrypt_aead_decrypt;
 
-	inst->alg.cra_init = pcrypt_aead_init_tfm;
-	inst->alg.cra_exit = pcrypt_aead_exit_tfm;
+	err = aead_register_instance(tmpl, inst);
+	if (err)
+		goto out_drop_aead;
 
-	inst->alg.cra_aead.setkey = pcrypt_aead_setkey;
-	inst->alg.cra_aead.setauthsize = pcrypt_aead_setauthsize;
-	inst->alg.cra_aead.encrypt = pcrypt_aead_encrypt;
-	inst->alg.cra_aead.decrypt = pcrypt_aead_decrypt;
-	inst->alg.cra_aead.givencrypt = pcrypt_aead_givencrypt;
+out:
+	return err;
 
-out_put_alg:
-	crypto_mod_put(alg);
-	return inst;
+out_drop_aead:
+	crypto_drop_aead(&ctx->spawn);
+out_free_inst:
+	kfree(inst);
+	goto out;
 }
 
-static struct crypto_instance *pcrypt_alloc(struct rtattr **tb)
+static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
 
 	algt = crypto_get_attr_type(tb);
 	if (IS_ERR(algt))
-		return ERR_CAST(algt);
+		return PTR_ERR(algt);
 
 	switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_AEAD:
-		return pcrypt_alloc_aead(tb, algt->type, algt->mask);
+		return pcrypt_create_aead(tmpl, tb, algt->type, algt->mask);
 	}
 
-	return ERR_PTR(-EINVAL);
+	return -EINVAL;
 }
 
 static void pcrypt_free(struct crypto_instance *inst)
 {
 	struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst);
 
-	crypto_drop_spawn(&ctx->spawn);
+	crypto_drop_aead(&ctx->spawn);
 	kfree(inst);
 }
 
@@ -516,7 +461,7 @@ static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt)
 
 static struct crypto_template pcrypt_tmpl = {
 	.name = "pcrypt",
-	.alloc = pcrypt_alloc,
+	.create = pcrypt_create,
 	.free = pcrypt_free,
 	.module = THIS_MODULE,
 };

+ 321 - 0
crypto/poly1305_generic.c

@@ -0,0 +1,321 @@
+/*
+ * Poly1305 authenticator algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#define POLY1305_BLOCK_SIZE	16
+#define POLY1305_KEY_SIZE	32
+#define POLY1305_DIGEST_SIZE	16
+
+struct poly1305_desc_ctx {
+	/* key */
+	u32 r[5];
+	/* finalize key */
+	u32 s[4];
+	/* accumulator */
+	u32 h[5];
+	/* partial buffer */
+	u8 buf[POLY1305_BLOCK_SIZE];
+	/* bytes used in partial buffer */
+	unsigned int buflen;
+	/* r key has been set */
+	bool rset;
+	/* s key has been set */
+	bool sset;
+};
+
+static inline u64 mlt(u64 a, u64 b)
+{
+	return a * b;
+}
+
+static inline u32 sr(u64 v, u_char n)
+{
+	return v >> n;
+}
+
+static inline u32 and(u32 v, u32 mask)
+{
+	return v & mask;
+}
+
+static inline u32 le32_to_cpuvp(const void *p)
+{
+	return le32_to_cpup(p);
+}
+
+static int poly1305_init(struct shash_desc *desc)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	memset(dctx->h, 0, sizeof(dctx->h));
+	dctx->buflen = 0;
+	dctx->rset = false;
+	dctx->sset = false;
+
+	return 0;
+}
+
+static int poly1305_setkey(struct crypto_shash *tfm,
+			   const u8 *key, unsigned int keylen)
+{
+	/* Poly1305 requires a unique key for each tag, which implies that
+	 * we can't set it on the tfm that gets accessed by multiple users
+	 * simultaneously. Instead we expect the key as the first 32 bytes in
+	 * the update() call. */
+	return -ENOTSUPP;
+}
+
+static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
+	dctx->r[0] = (le32_to_cpuvp(key +  0) >> 0) & 0x3ffffff;
+	dctx->r[1] = (le32_to_cpuvp(key +  3) >> 2) & 0x3ffff03;
+	dctx->r[2] = (le32_to_cpuvp(key +  6) >> 4) & 0x3ffc0ff;
+	dctx->r[3] = (le32_to_cpuvp(key +  9) >> 6) & 0x3f03fff;
+	dctx->r[4] = (le32_to_cpuvp(key + 12) >> 8) & 0x00fffff;
+}
+
+static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+	dctx->s[0] = le32_to_cpuvp(key +  0);
+	dctx->s[1] = le32_to_cpuvp(key +  4);
+	dctx->s[2] = le32_to_cpuvp(key +  8);
+	dctx->s[3] = le32_to_cpuvp(key + 12);
+}
+
+static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
+				    const u8 *src, unsigned int srclen,
+				    u32 hibit)
+{
+	u32 r0, r1, r2, r3, r4;
+	u32 s1, s2, s3, s4;
+	u32 h0, h1, h2, h3, h4;
+	u64 d0, d1, d2, d3, d4;
+
+	if (unlikely(!dctx->sset)) {
+		if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
+			poly1305_setrkey(dctx, src);
+			src += POLY1305_BLOCK_SIZE;
+			srclen -= POLY1305_BLOCK_SIZE;
+			dctx->rset = true;
+		}
+		if (srclen >= POLY1305_BLOCK_SIZE) {
+			poly1305_setskey(dctx, src);
+			src += POLY1305_BLOCK_SIZE;
+			srclen -= POLY1305_BLOCK_SIZE;
+			dctx->sset = true;
+		}
+	}
+
+	r0 = dctx->r[0];
+	r1 = dctx->r[1];
+	r2 = dctx->r[2];
+	r3 = dctx->r[3];
+	r4 = dctx->r[4];
+
+	s1 = r1 * 5;
+	s2 = r2 * 5;
+	s3 = r3 * 5;
+	s4 = r4 * 5;
+
+	h0 = dctx->h[0];
+	h1 = dctx->h[1];
+	h2 = dctx->h[2];
+	h3 = dctx->h[3];
+	h4 = dctx->h[4];
+
+	while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
+
+		/* h += m[i] */
+		h0 += (le32_to_cpuvp(src +  0) >> 0) & 0x3ffffff;
+		h1 += (le32_to_cpuvp(src +  3) >> 2) & 0x3ffffff;
+		h2 += (le32_to_cpuvp(src +  6) >> 4) & 0x3ffffff;
+		h3 += (le32_to_cpuvp(src +  9) >> 6) & 0x3ffffff;
+		h4 += (le32_to_cpuvp(src + 12) >> 8) | hibit;
+
+		/* h *= r */
+		d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
+		     mlt(h3, s2) + mlt(h4, s1);
+		d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
+		     mlt(h3, s3) + mlt(h4, s2);
+		d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
+		     mlt(h3, s4) + mlt(h4, s3);
+		d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
+		     mlt(h3, r0) + mlt(h4, s4);
+		d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
+		     mlt(h3, r1) + mlt(h4, r0);
+
+		/* (partial) h %= p */
+		d1 += sr(d0, 26);     h0 = and(d0, 0x3ffffff);
+		d2 += sr(d1, 26);     h1 = and(d1, 0x3ffffff);
+		d3 += sr(d2, 26);     h2 = and(d2, 0x3ffffff);
+		d4 += sr(d3, 26);     h3 = and(d3, 0x3ffffff);
+		h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
+		h1 += h0 >> 26;       h0 = h0 & 0x3ffffff;
+
+		src += POLY1305_BLOCK_SIZE;
+		srclen -= POLY1305_BLOCK_SIZE;
+	}
+
+	dctx->h[0] = h0;
+	dctx->h[1] = h1;
+	dctx->h[2] = h2;
+	dctx->h[3] = h3;
+	dctx->h[4] = h4;
+
+	return srclen;
+}
+
+static int poly1305_update(struct shash_desc *desc,
+			   const u8 *src, unsigned int srclen)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+	unsigned int bytes;
+
+	if (unlikely(dctx->buflen)) {
+		bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
+		memcpy(dctx->buf + dctx->buflen, src, bytes);
+		src += bytes;
+		srclen -= bytes;
+		dctx->buflen += bytes;
+
+		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+			poly1305_blocks(dctx, dctx->buf,
+					POLY1305_BLOCK_SIZE, 1 << 24);
+			dctx->buflen = 0;
+		}
+	}
+
+	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
+		bytes = poly1305_blocks(dctx, src, srclen, 1 << 24);
+		src += srclen - bytes;
+		srclen = bytes;
+	}
+
+	if (unlikely(srclen)) {
+		dctx->buflen = srclen;
+		memcpy(dctx->buf, src, srclen);
+	}
+
+	return 0;
+}
+
+static int poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+	__le32 *mac = (__le32 *)dst;
+	u32 h0, h1, h2, h3, h4;
+	u32 g0, g1, g2, g3, g4;
+	u32 mask;
+	u64 f = 0;
+
+	if (unlikely(!dctx->sset))
+		return -ENOKEY;
+
+	if (unlikely(dctx->buflen)) {
+		dctx->buf[dctx->buflen++] = 1;
+		memset(dctx->buf + dctx->buflen, 0,
+		       POLY1305_BLOCK_SIZE - dctx->buflen);
+		poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+	}
+
+	/* fully carry h */
+	h0 = dctx->h[0];
+	h1 = dctx->h[1];
+	h2 = dctx->h[2];
+	h3 = dctx->h[3];
+	h4 = dctx->h[4];
+
+	h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
+	h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
+	h4 += (h3 >> 26);     h3 = h3 & 0x3ffffff;
+	h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
+	h1 += (h0 >> 26);     h0 = h0 & 0x3ffffff;
+
+	/* compute h + -p */
+	g0 = h0 + 5;
+	g1 = h1 + (g0 >> 26);             g0 &= 0x3ffffff;
+	g2 = h2 + (g1 >> 26);             g1 &= 0x3ffffff;
+	g3 = h3 + (g2 >> 26);             g2 &= 0x3ffffff;
+	g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
+
+	/* select h if h < p, or h + -p if h >= p */
+	mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
+	g0 &= mask;
+	g1 &= mask;
+	g2 &= mask;
+	g3 &= mask;
+	g4 &= mask;
+	mask = ~mask;
+	h0 = (h0 & mask) | g0;
+	h1 = (h1 & mask) | g1;
+	h2 = (h2 & mask) | g2;
+	h3 = (h3 & mask) | g3;
+	h4 = (h4 & mask) | g4;
+
+	/* h = h % (2^128) */
+	h0 = (h0 >>  0) | (h1 << 26);
+	h1 = (h1 >>  6) | (h2 << 20);
+	h2 = (h2 >> 12) | (h3 << 14);
+	h3 = (h3 >> 18) | (h4 <<  8);
+
+	/* mac = (h + s) % (2^128) */
+	f = (f >> 32) + h0 + dctx->s[0]; mac[0] = cpu_to_le32(f);
+	f = (f >> 32) + h1 + dctx->s[1]; mac[1] = cpu_to_le32(f);
+	f = (f >> 32) + h2 + dctx->s[2]; mac[2] = cpu_to_le32(f);
+	f = (f >> 32) + h3 + dctx->s[3]; mac[3] = cpu_to_le32(f);
+
+	return 0;
+}
+
+static struct shash_alg poly1305_alg = {
+	.digestsize	= POLY1305_DIGEST_SIZE,
+	.init		= poly1305_init,
+	.update		= poly1305_update,
+	.final		= poly1305_final,
+	.setkey		= poly1305_setkey,
+	.descsize	= sizeof(struct poly1305_desc_ctx),
+	.base		= {
+		.cra_name		= "poly1305",
+		.cra_driver_name	= "poly1305-generic",
+		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_alignmask		= sizeof(u32) - 1,
+		.cra_blocksize		= POLY1305_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	},
+};
+
+static int __init poly1305_mod_init(void)
+{
+	return crypto_register_shash(&poly1305_alg);
+}
+
+static void __exit poly1305_mod_exit(void)
+{
+	crypto_unregister_shash(&poly1305_alg);
+}
+
+module_init(poly1305_mod_init);
+module_exit(poly1305_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("Poly1305 authenticator");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-generic");

+ 0 - 41
crypto/proc.c

@@ -20,47 +20,8 @@
 #include <linux/rwsem.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/sysctl.h>
 #include "internal.h"
 
-#ifdef CONFIG_CRYPTO_FIPS
-static struct ctl_table crypto_sysctl_table[] = {
-	{
-		.procname       = "fips_enabled",
-		.data           = &fips_enabled,
-		.maxlen         = sizeof(int),
-		.mode           = 0444,
-		.proc_handler   = proc_dointvec
-	},
-	{}
-};
-
-static struct ctl_table crypto_dir_table[] = {
-	{
-		.procname       = "crypto",
-		.mode           = 0555,
-		.child          = crypto_sysctl_table
-	},
-	{}
-};
-
-static struct ctl_table_header *crypto_sysctls;
-
-static void crypto_proc_fips_init(void)
-{
-	crypto_sysctls = register_sysctl_table(crypto_dir_table);
-}
-
-static void crypto_proc_fips_exit(void)
-{
-	if (crypto_sysctls)
-		unregister_sysctl_table(crypto_sysctls);
-}
-#else
-#define crypto_proc_fips_init()
-#define crypto_proc_fips_exit()
-#endif
-
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
 	down_read(&crypto_alg_sem);
@@ -148,11 +109,9 @@ static const struct file_operations proc_crypto_ops = {
 void __init crypto_init_proc(void)
 {
 	proc_create("crypto", 0, NULL, &proc_crypto_ops);
-	crypto_proc_fips_init();
 }
 
 void __exit crypto_exit_proc(void)
 {
-	crypto_proc_fips_exit();
 	remove_proc_entry("crypto", NULL);
 }

+ 107 - 25
crypto/rng.c

@@ -4,6 +4,7 @@
  * RNG operations.
  *
  * Copyright (c) 2008 Neil Horman <nhorman@tuxdriver.com>
+ * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -24,12 +25,19 @@
 #include <linux/cryptouser.h>
 #include <net/netlink.h>
 
+#include "internal.h"
+
 static DEFINE_MUTEX(crypto_default_rng_lock);
 struct crypto_rng *crypto_default_rng;
 EXPORT_SYMBOL_GPL(crypto_default_rng);
 static int crypto_default_rng_refcnt;
 
-static int rngapi_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
+static inline struct crypto_rng *__crypto_rng_cast(struct crypto_tfm *tfm)
+{
+	return container_of(tfm, struct crypto_rng, base);
+}
+
+int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
 {
 	u8 *buf = NULL;
 	int err;
@@ -43,21 +51,23 @@ static int rngapi_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
 		seed = buf;
 	}
 
-	err = crypto_rng_alg(tfm)->rng_reset(tfm, seed, slen);
+	err = crypto_rng_alg(tfm)->seed(tfm, seed, slen);
 
-	kfree(buf);
+	kzfree(buf);
 	return err;
 }
+EXPORT_SYMBOL_GPL(crypto_rng_reset);
 
-static int crypto_init_rng_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
+static int crypto_rng_init_tfm(struct crypto_tfm *tfm)
 {
-	struct rng_alg *alg = &tfm->__crt_alg->cra_rng;
-	struct rng_tfm *ops = &tfm->crt_rng;
+	return 0;
+}
 
-	ops->rng_gen_random = alg->rng_make_random;
-	ops->rng_reset = rngapi_reset;
+static unsigned int seedsize(struct crypto_alg *alg)
+{
+	struct rng_alg *ralg = container_of(alg, struct rng_alg, base);
 
-	return 0;
+	return ralg->seedsize;
 }
 
 #ifdef CONFIG_NET
@@ -67,7 +77,7 @@ static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg)
 
 	strncpy(rrng.type, "rng", sizeof(rrng.type));
 
-	rrng.seedsize = alg->cra_rng.seedsize;
+	rrng.seedsize = seedsize(alg);
 
 	if (nla_put(skb, CRYPTOCFGA_REPORT_RNG,
 		    sizeof(struct crypto_report_rng), &rrng))
@@ -89,24 +99,27 @@ static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg)
 static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg)
 {
 	seq_printf(m, "type         : rng\n");
-	seq_printf(m, "seedsize     : %u\n", alg->cra_rng.seedsize);
-}
-
-static unsigned int crypto_rng_ctxsize(struct crypto_alg *alg, u32 type,
-				       u32 mask)
-{
-	return alg->cra_ctxsize;
+	seq_printf(m, "seedsize     : %u\n", seedsize(alg));
 }
 
-const struct crypto_type crypto_rng_type = {
-	.ctxsize = crypto_rng_ctxsize,
-	.init = crypto_init_rng_ops,
+static const struct crypto_type crypto_rng_type = {
+	.extsize = crypto_alg_extsize,
+	.init_tfm = crypto_rng_init_tfm,
 #ifdef CONFIG_PROC_FS
 	.show = crypto_rng_show,
 #endif
 	.report = crypto_rng_report,
+	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
+	.maskset = CRYPTO_ALG_TYPE_MASK,
+	.type = CRYPTO_ALG_TYPE_RNG,
+	.tfmsize = offsetof(struct crypto_rng, base),
 };
-EXPORT_SYMBOL_GPL(crypto_rng_type);
+
+struct crypto_rng *crypto_alloc_rng(const char *alg_name, u32 type, u32 mask)
+{
+	return crypto_alloc_tfm(alg_name, &crypto_rng_type, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_rng);
 
 int crypto_get_default_rng(void)
 {
@@ -142,13 +155,82 @@ EXPORT_SYMBOL_GPL(crypto_get_default_rng);
 void crypto_put_default_rng(void)
 {
 	mutex_lock(&crypto_default_rng_lock);
-	if (!--crypto_default_rng_refcnt) {
-		crypto_free_rng(crypto_default_rng);
-		crypto_default_rng = NULL;
-	}
+	crypto_default_rng_refcnt--;
 	mutex_unlock(&crypto_default_rng_lock);
 }
 EXPORT_SYMBOL_GPL(crypto_put_default_rng);
 
+#if defined(CONFIG_CRYPTO_RNG) || defined(CONFIG_CRYPTO_RNG_MODULE)
+int crypto_del_default_rng(void)
+{
+	int err = -EBUSY;
+
+	mutex_lock(&crypto_default_rng_lock);
+	if (crypto_default_rng_refcnt)
+		goto out;
+
+	crypto_free_rng(crypto_default_rng);
+	crypto_default_rng = NULL;
+
+	err = 0;
+
+out:
+	mutex_unlock(&crypto_default_rng_lock);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_del_default_rng);
+#endif
+
+int crypto_register_rng(struct rng_alg *alg)
+{
+	struct crypto_alg *base = &alg->base;
+
+	if (alg->seedsize > PAGE_SIZE / 8)
+		return -EINVAL;
+
+	base->cra_type = &crypto_rng_type;
+	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
+	base->cra_flags |= CRYPTO_ALG_TYPE_RNG;
+
+	return crypto_register_alg(base);
+}
+EXPORT_SYMBOL_GPL(crypto_register_rng);
+
+void crypto_unregister_rng(struct rng_alg *alg)
+{
+	crypto_unregister_alg(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_rng);
+
+int crypto_register_rngs(struct rng_alg *algs, int count)
+{
+	int i, ret;
+
+	for (i = 0; i < count; i++) {
+		ret = crypto_register_rng(algs + i);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	for (--i; i >= 0; --i)
+		crypto_unregister_rng(algs + i);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_register_rngs);
+
+void crypto_unregister_rngs(struct rng_alg *algs, int count)
+{
+	int i;
+
+	for (i = count - 1; i >= 0; --i)
+		crypto_unregister_rng(algs + i);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_rngs);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Random Number Generator");

+ 315 - 0
crypto/rsa.c

@@ -0,0 +1,315 @@
+/* RSA asymmetric public-key algorithm [RFC3447]
+ *
+ * Copyright (c) 2015, Intel Corporation
+ * Authors: Tadeusz Struk <tadeusz.struk@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <crypto/internal/rsa.h>
+#include <crypto/internal/akcipher.h>
+#include <crypto/akcipher.h>
+
+/*
+ * RSAEP function [RFC3447 sec 5.1.1]
+ * c = m^e mod n;
+ */
+static int _rsa_enc(const struct rsa_key *key, MPI c, MPI m)
+{
+	/* (1) Validate 0 <= m < n */
+	if (mpi_cmp_ui(m, 0) < 0 || mpi_cmp(m, key->n) >= 0)
+		return -EINVAL;
+
+	/* (2) c = m^e mod n */
+	return mpi_powm(c, m, key->e, key->n);
+}
+
+/*
+ * RSADP function [RFC3447 sec 5.1.2]
+ * m = c^d mod n;
+ */
+static int _rsa_dec(const struct rsa_key *key, MPI m, MPI c)
+{
+	/* (1) Validate 0 <= c < n */
+	if (mpi_cmp_ui(c, 0) < 0 || mpi_cmp(c, key->n) >= 0)
+		return -EINVAL;
+
+	/* (2) m = c^d mod n */
+	return mpi_powm(m, c, key->d, key->n);
+}
+
+/*
+ * RSASP1 function [RFC3447 sec 5.2.1]
+ * s = m^d mod n
+ */
+static int _rsa_sign(const struct rsa_key *key, MPI s, MPI m)
+{
+	/* (1) Validate 0 <= m < n */
+	if (mpi_cmp_ui(m, 0) < 0 || mpi_cmp(m, key->n) >= 0)
+		return -EINVAL;
+
+	/* (2) s = m^d mod n */
+	return mpi_powm(s, m, key->d, key->n);
+}
+
+/*
+ * RSAVP1 function [RFC3447 sec 5.2.2]
+ * m = s^e mod n;
+ */
+static int _rsa_verify(const struct rsa_key *key, MPI m, MPI s)
+{
+	/* (1) Validate 0 <= s < n */
+	if (mpi_cmp_ui(s, 0) < 0 || mpi_cmp(s, key->n) >= 0)
+		return -EINVAL;
+
+	/* (2) m = s^e mod n */
+	return mpi_powm(m, s, key->e, key->n);
+}
+
+static inline struct rsa_key *rsa_get_key(struct crypto_akcipher *tfm)
+{
+	return akcipher_tfm_ctx(tfm);
+}
+
+static int rsa_enc(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	const struct rsa_key *pkey = rsa_get_key(tfm);
+	MPI m, c = mpi_alloc(0);
+	int ret = 0;
+	int sign;
+
+	if (!c)
+		return -ENOMEM;
+
+	if (unlikely(!pkey->n || !pkey->e)) {
+		ret = -EINVAL;
+		goto err_free_c;
+	}
+
+	if (req->dst_len < mpi_get_size(pkey->n)) {
+		req->dst_len = mpi_get_size(pkey->n);
+		ret = -EOVERFLOW;
+		goto err_free_c;
+	}
+
+	m = mpi_read_raw_data(req->src, req->src_len);
+	if (!m) {
+		ret = -ENOMEM;
+		goto err_free_c;
+	}
+
+	ret = _rsa_enc(pkey, c, m);
+	if (ret)
+		goto err_free_m;
+
+	ret = mpi_read_buffer(c, req->dst, req->dst_len, &req->dst_len, &sign);
+	if (ret)
+		goto err_free_m;
+
+	if (sign < 0) {
+		ret = -EBADMSG;
+		goto err_free_m;
+	}
+
+err_free_m:
+	mpi_free(m);
+err_free_c:
+	mpi_free(c);
+	return ret;
+}
+
+static int rsa_dec(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	const struct rsa_key *pkey = rsa_get_key(tfm);
+	MPI c, m = mpi_alloc(0);
+	int ret = 0;
+	int sign;
+
+	if (!m)
+		return -ENOMEM;
+
+	if (unlikely(!pkey->n || !pkey->d)) {
+		ret = -EINVAL;
+		goto err_free_m;
+	}
+
+	if (req->dst_len < mpi_get_size(pkey->n)) {
+		req->dst_len = mpi_get_size(pkey->n);
+		ret = -EOVERFLOW;
+		goto err_free_m;
+	}
+
+	c = mpi_read_raw_data(req->src, req->src_len);
+	if (!c) {
+		ret = -ENOMEM;
+		goto err_free_m;
+	}
+
+	ret = _rsa_dec(pkey, m, c);
+	if (ret)
+		goto err_free_c;
+
+	ret = mpi_read_buffer(m, req->dst, req->dst_len, &req->dst_len, &sign);
+	if (ret)
+		goto err_free_c;
+
+	if (sign < 0) {
+		ret = -EBADMSG;
+		goto err_free_c;
+	}
+
+err_free_c:
+	mpi_free(c);
+err_free_m:
+	mpi_free(m);
+	return ret;
+}
+
+static int rsa_sign(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	const struct rsa_key *pkey = rsa_get_key(tfm);
+	MPI m, s = mpi_alloc(0);
+	int ret = 0;
+	int sign;
+
+	if (!s)
+		return -ENOMEM;
+
+	if (unlikely(!pkey->n || !pkey->d)) {
+		ret = -EINVAL;
+		goto err_free_s;
+	}
+
+	if (req->dst_len < mpi_get_size(pkey->n)) {
+		req->dst_len = mpi_get_size(pkey->n);
+		ret = -EOVERFLOW;
+		goto err_free_s;
+	}
+
+	m = mpi_read_raw_data(req->src, req->src_len);
+	if (!m) {
+		ret = -ENOMEM;
+		goto err_free_s;
+	}
+
+	ret = _rsa_sign(pkey, s, m);
+	if (ret)
+		goto err_free_m;
+
+	ret = mpi_read_buffer(s, req->dst, req->dst_len, &req->dst_len, &sign);
+	if (ret)
+		goto err_free_m;
+
+	if (sign < 0) {
+		ret = -EBADMSG;
+		goto err_free_m;
+	}
+
+err_free_m:
+	mpi_free(m);
+err_free_s:
+	mpi_free(s);
+	return ret;
+}
+
+static int rsa_verify(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	const struct rsa_key *pkey = rsa_get_key(tfm);
+	MPI s, m = mpi_alloc(0);
+	int ret = 0;
+	int sign;
+
+	if (!m)
+		return -ENOMEM;
+
+	if (unlikely(!pkey->n || !pkey->e)) {
+		ret = -EINVAL;
+		goto err_free_m;
+	}
+
+	if (req->dst_len < mpi_get_size(pkey->n)) {
+		req->dst_len = mpi_get_size(pkey->n);
+		ret = -EOVERFLOW;
+		goto err_free_m;
+	}
+
+	s = mpi_read_raw_data(req->src, req->src_len);
+	if (!s) {
+		ret = -ENOMEM;
+		goto err_free_m;
+	}
+
+	ret = _rsa_verify(pkey, m, s);
+	if (ret)
+		goto err_free_s;
+
+	ret = mpi_read_buffer(m, req->dst, req->dst_len, &req->dst_len, &sign);
+	if (ret)
+		goto err_free_s;
+
+	if (sign < 0) {
+		ret = -EBADMSG;
+		goto err_free_s;
+	}
+
+err_free_s:
+	mpi_free(s);
+err_free_m:
+	mpi_free(m);
+	return ret;
+}
+
+static int rsa_setkey(struct crypto_akcipher *tfm, const void *key,
+		      unsigned int keylen)
+{
+	struct rsa_key *pkey = akcipher_tfm_ctx(tfm);
+
+	return rsa_parse_key(pkey, key, keylen);
+}
+
+static void rsa_exit_tfm(struct crypto_akcipher *tfm)
+{
+	struct rsa_key *pkey = akcipher_tfm_ctx(tfm);
+
+	rsa_free_key(pkey);
+}
+
+static struct akcipher_alg rsa = {
+	.encrypt = rsa_enc,
+	.decrypt = rsa_dec,
+	.sign = rsa_sign,
+	.verify = rsa_verify,
+	.setkey = rsa_setkey,
+	.exit = rsa_exit_tfm,
+	.base = {
+		.cra_name = "rsa",
+		.cra_driver_name = "rsa-generic",
+		.cra_priority = 100,
+		.cra_module = THIS_MODULE,
+		.cra_ctxsize = sizeof(struct rsa_key),
+	},
+};
+
+static int rsa_init(void)
+{
+	return crypto_register_akcipher(&rsa);
+}
+
+static void rsa_exit(void)
+{
+	crypto_unregister_akcipher(&rsa);
+}
+
+module_init(rsa_init);
+module_exit(rsa_exit);
+MODULE_ALIAS_CRYPTO("rsa");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("RSA generic algorithm");

+ 121 - 0
crypto/rsa_helper.c

@@ -0,0 +1,121 @@
+/*
+ * RSA key extract helper
+ *
+ * Copyright (c) 2015, Intel Corporation
+ * Authors: Tadeusz Struk <tadeusz.struk@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/fips.h>
+#include <crypto/internal/rsa.h>
+#include "rsakey-asn1.h"
+
+int rsa_get_n(void *context, size_t hdrlen, unsigned char tag,
+	      const void *value, size_t vlen)
+{
+	struct rsa_key *key = context;
+
+	key->n = mpi_read_raw_data(value, vlen);
+
+	if (!key->n)
+		return -ENOMEM;
+
+	/* In FIPS mode only allow key size 2K & 3K */
+	if (fips_enabled && (mpi_get_size(key->n) != 256 ||
+			     mpi_get_size(key->n) != 384)) {
+		pr_err("RSA: key size not allowed in FIPS mode\n");
+		mpi_free(key->n);
+		key->n = NULL;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int rsa_get_e(void *context, size_t hdrlen, unsigned char tag,
+	      const void *value, size_t vlen)
+{
+	struct rsa_key *key = context;
+
+	key->e = mpi_read_raw_data(value, vlen);
+
+	if (!key->e)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int rsa_get_d(void *context, size_t hdrlen, unsigned char tag,
+	      const void *value, size_t vlen)
+{
+	struct rsa_key *key = context;
+
+	key->d = mpi_read_raw_data(value, vlen);
+
+	if (!key->d)
+		return -ENOMEM;
+
+	/* In FIPS mode only allow key size 2K & 3K */
+	if (fips_enabled && (mpi_get_size(key->d) != 256 ||
+			     mpi_get_size(key->d) != 384)) {
+		pr_err("RSA: key size not allowed in FIPS mode\n");
+		mpi_free(key->d);
+		key->d = NULL;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void free_mpis(struct rsa_key *key)
+{
+	mpi_free(key->n);
+	mpi_free(key->e);
+	mpi_free(key->d);
+	key->n = NULL;
+	key->e = NULL;
+	key->d = NULL;
+}
+
+/**
+ * rsa_free_key() - frees rsa key allocated by rsa_parse_key()
+ *
+ * @rsa_key:	struct rsa_key key representation
+ */
+void rsa_free_key(struct rsa_key *key)
+{
+	free_mpis(key);
+}
+EXPORT_SYMBOL_GPL(rsa_free_key);
+
+/**
+ * rsa_parse_key() - extracts an rsa key from BER encoded buffer
+ *		     and stores it in the provided struct rsa_key
+ *
+ * @rsa_key:	struct rsa_key key representation
+ * @key:	key in BER format
+ * @key_len:	length of key
+ *
+ * Return:	0 on success or error code in case of error
+ */
+int rsa_parse_key(struct rsa_key *rsa_key, const void *key,
+		  unsigned int key_len)
+{
+	int ret;
+
+	free_mpis(rsa_key);
+	ret = asn1_ber_decoder(&rsakey_decoder, rsa_key, key, key_len);
+	if (ret < 0)
+		goto error;
+
+	return 0;
+error:
+	free_mpis(rsa_key);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rsa_parse_key);

+ 5 - 0
crypto/rsakey.asn1

@@ -0,0 +1,5 @@
+RsaKey ::= SEQUENCE {
+	n INTEGER ({ rsa_get_n }),
+	e INTEGER ({ rsa_get_e }),
+	d INTEGER ({ rsa_get_d })
+}

+ 34 - 11
crypto/scatterwalk.c

@@ -54,7 +54,11 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
 		struct page *page;
 
 		page = sg_page(walk->sg) + ((walk->offset - 1) >> PAGE_SHIFT);
-		if (!PageSlab(page))
+		/* Test ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE first as
+		 * PageSlab cannot be optimised away per se due to
+		 * use of volatile pointer.
+		 */
+		if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE && !PageSlab(page))
 			flush_dcache_page(page);
 	}
 
@@ -104,22 +108,18 @@ void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg,
 			      unsigned int start, unsigned int nbytes, int out)
 {
 	struct scatter_walk walk;
-	unsigned int offset = 0;
+	struct scatterlist tmp[2];
 
 	if (!nbytes)
 		return;
 
-	for (;;) {
-		scatterwalk_start(&walk, sg);
-
-		if (start < offset + sg->length)
-			break;
+	sg = scatterwalk_ffwd(tmp, sg, start);
 
-		offset += sg->length;
-		sg = sg_next(sg);
-	}
+	if (sg_page(sg) == virt_to_page(buf) &&
+	    sg->offset == offset_in_page(buf))
+		return;
 
-	scatterwalk_advance(&walk, start - offset);
+	scatterwalk_start(&walk, sg);
 	scatterwalk_copychunks(buf, &walk, nbytes, out);
 	scatterwalk_done(&walk, out, 0);
 }
@@ -146,3 +146,26 @@ int scatterwalk_bytes_sglen(struct scatterlist *sg, int num_bytes)
 	return n;
 }
 EXPORT_SYMBOL_GPL(scatterwalk_bytes_sglen);
+
+struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2],
+				     struct scatterlist *src,
+				     unsigned int len)
+{
+	for (;;) {
+		if (!len)
+			return src;
+
+		if (src->length > len)
+			break;
+
+		len -= src->length;
+		src = sg_next(src);
+	}
+
+	sg_init_table(dst, 2);
+	sg_set_page(dst, sg_page(src), src->length - len, src->offset + len);
+	scatterwalk_crypto_chain(dst, sg_next(src), 0, 2);
+
+	return dst;
+}
+EXPORT_SYMBOL_GPL(scatterwalk_ffwd);

+ 507 - 80
crypto/seqiv.c

@@ -13,9 +13,11 @@
  *
  */
 
-#include <crypto/internal/aead.h>
+#include <crypto/internal/geniv.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/null.h>
 #include <crypto/rng.h>
+#include <crypto/scatterwalk.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -24,11 +26,25 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 
+struct seqniv_request_ctx {
+	struct scatterlist dst[2];
+	struct aead_request subreq;
+};
+
 struct seqiv_ctx {
 	spinlock_t lock;
 	u8 salt[] __attribute__ ((aligned(__alignof__(u32))));
 };
 
+struct seqiv_aead_ctx {
+	/* aead_geniv_ctx must be first the element */
+	struct aead_geniv_ctx geniv;
+	struct crypto_blkcipher *null;
+	u8 salt[] __attribute__ ((aligned(__alignof__(u32))));
+};
+
+static void seqiv_free(struct crypto_instance *inst);
+
 static void seqiv_complete2(struct skcipher_givcrypt_request *req, int err)
 {
 	struct ablkcipher_request *subreq = skcipher_givcrypt_reqctx(req);
@@ -81,6 +97,77 @@ static void seqiv_aead_complete(struct crypto_async_request *base, int err)
 	aead_givcrypt_complete(req, err);
 }
 
+static void seqiv_aead_encrypt_complete2(struct aead_request *req, int err)
+{
+	struct aead_request *subreq = aead_request_ctx(req);
+	struct crypto_aead *geniv;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	if (err)
+		goto out;
+
+	geniv = crypto_aead_reqtfm(req);
+	memcpy(req->iv, subreq->iv, crypto_aead_ivsize(geniv));
+
+out:
+	kzfree(subreq->iv);
+}
+
+static void seqiv_aead_encrypt_complete(struct crypto_async_request *base,
+					int err)
+{
+	struct aead_request *req = base->data;
+
+	seqiv_aead_encrypt_complete2(req, err);
+	aead_request_complete(req, err);
+}
+
+static void seqniv_aead_encrypt_complete2(struct aead_request *req, int err)
+{
+	unsigned int ivsize = 8;
+	u8 data[20];
+
+	if (err == -EINPROGRESS)
+		return;
+
+	/* Swap IV and ESP header back to correct order. */
+	scatterwalk_map_and_copy(data, req->dst, 0, req->assoclen + ivsize, 0);
+	scatterwalk_map_and_copy(data + ivsize, req->dst, 0, req->assoclen, 1);
+	scatterwalk_map_and_copy(data, req->dst, req->assoclen, ivsize, 1);
+}
+
+static void seqniv_aead_encrypt_complete(struct crypto_async_request *base,
+					int err)
+{
+	struct aead_request *req = base->data;
+
+	seqniv_aead_encrypt_complete2(req, err);
+	aead_request_complete(req, err);
+}
+
+static void seqniv_aead_decrypt_complete2(struct aead_request *req, int err)
+{
+	u8 data[4];
+
+	if (err == -EINPROGRESS)
+		return;
+
+	/* Move ESP header back to correct location. */
+	scatterwalk_map_and_copy(data, req->dst, 16, req->assoclen - 8, 0);
+	scatterwalk_map_and_copy(data, req->dst, 8, req->assoclen - 8, 1);
+}
+
+static void seqniv_aead_decrypt_complete(struct crypto_async_request *base,
+					 int err)
+{
+	struct aead_request *req = base->data;
+
+	seqniv_aead_decrypt_complete2(req, err);
+	aead_request_complete(req, err);
+}
+
 static void seqiv_geniv(struct seqiv_ctx *ctx, u8 *info, u64 seq,
 			unsigned int ivsize)
 {
@@ -186,160 +273,477 @@ static int seqiv_aead_givencrypt(struct aead_givcrypt_request *req)
 	return err;
 }
 
-static int seqiv_givencrypt_first(struct skcipher_givcrypt_request *req)
+static int seqniv_aead_encrypt(struct aead_request *req)
 {
-	struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
-	struct seqiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
-	int err = 0;
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv);
+	struct seqniv_request_ctx *rctx = aead_request_ctx(req);
+	struct aead_request *subreq = &rctx->subreq;
+	struct scatterlist *dst;
+	crypto_completion_t compl;
+	void *data;
+	unsigned int ivsize = 8;
+	u8 buf[20] __attribute__ ((aligned(__alignof__(u32))));
+	int err;
 
-	spin_lock_bh(&ctx->lock);
-	if (crypto_ablkcipher_crt(geniv)->givencrypt != seqiv_givencrypt_first)
-		goto unlock;
+	if (req->cryptlen < ivsize)
+		return -EINVAL;
 
-	crypto_ablkcipher_crt(geniv)->givencrypt = seqiv_givencrypt;
-	err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
-				   crypto_ablkcipher_ivsize(geniv));
+	/* ESP AD is at most 12 bytes (ESN). */
+	if (req->assoclen > 12)
+		return -EINVAL;
 
-unlock:
-	spin_unlock_bh(&ctx->lock);
+	aead_request_set_tfm(subreq, ctx->geniv.child);
 
-	if (err)
-		return err;
+	compl = seqniv_aead_encrypt_complete;
+	data = req;
+
+	if (req->src != req->dst) {
+		struct blkcipher_desc desc = {
+			.tfm = ctx->null,
+		};
+
+		err = crypto_blkcipher_encrypt(&desc, req->dst, req->src,
+					       req->assoclen + req->cryptlen);
+		if (err)
+			return err;
+	}
 
-	return seqiv_givencrypt(req);
+	dst = scatterwalk_ffwd(rctx->dst, req->dst, ivsize);
+
+	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_crypt(subreq, dst, dst,
+			       req->cryptlen - ivsize, req->iv);
+	aead_request_set_ad(subreq, req->assoclen);
+
+	memcpy(buf, req->iv, ivsize);
+	crypto_xor(buf, ctx->salt, ivsize);
+	memcpy(req->iv, buf, ivsize);
+
+	/* Swap order of IV and ESP AD for ICV generation. */
+	scatterwalk_map_and_copy(buf + ivsize, req->dst, 0, req->assoclen, 0);
+	scatterwalk_map_and_copy(buf, req->dst, 0, req->assoclen + ivsize, 1);
+
+	err = crypto_aead_encrypt(subreq);
+	seqniv_aead_encrypt_complete2(req, err);
+	return err;
 }
 
-static int seqiv_aead_givencrypt_first(struct aead_givcrypt_request *req)
+static int seqiv_aead_encrypt(struct aead_request *req)
 {
-	struct crypto_aead *geniv = aead_givcrypt_reqtfm(req);
-	struct seqiv_ctx *ctx = crypto_aead_ctx(geniv);
-	int err = 0;
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv);
+	struct aead_request *subreq = aead_request_ctx(req);
+	crypto_completion_t compl;
+	void *data;
+	u8 *info;
+	unsigned int ivsize = 8;
+	int err;
 
-	spin_lock_bh(&ctx->lock);
-	if (crypto_aead_crt(geniv)->givencrypt != seqiv_aead_givencrypt_first)
-		goto unlock;
+	if (req->cryptlen < ivsize)
+		return -EINVAL;
 
-	crypto_aead_crt(geniv)->givencrypt = seqiv_aead_givencrypt;
-	err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
-				   crypto_aead_ivsize(geniv));
+	aead_request_set_tfm(subreq, ctx->geniv.child);
 
-unlock:
-	spin_unlock_bh(&ctx->lock);
+	compl = req->base.complete;
+	data = req->base.data;
+	info = req->iv;
 
-	if (err)
-		return err;
+	if (req->src != req->dst) {
+		struct blkcipher_desc desc = {
+			.tfm = ctx->null,
+		};
 
-	return seqiv_aead_givencrypt(req);
+		err = crypto_blkcipher_encrypt(&desc, req->dst, req->src,
+					       req->assoclen + req->cryptlen);
+		if (err)
+			return err;
+	}
+
+	if (unlikely(!IS_ALIGNED((unsigned long)info,
+				 crypto_aead_alignmask(geniv) + 1))) {
+		info = kmalloc(ivsize, req->base.flags &
+				       CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL:
+								  GFP_ATOMIC);
+		if (!info)
+			return -ENOMEM;
+
+		memcpy(info, req->iv, ivsize);
+		compl = seqiv_aead_encrypt_complete;
+		data = req;
+	}
+
+	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_crypt(subreq, req->dst, req->dst,
+			       req->cryptlen - ivsize, info);
+	aead_request_set_ad(subreq, req->assoclen + ivsize);
+
+	crypto_xor(info, ctx->salt, ivsize);
+	scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1);
+
+	err = crypto_aead_encrypt(subreq);
+	if (unlikely(info != req->iv))
+		seqiv_aead_encrypt_complete2(req, err);
+	return err;
+}
+
+static int seqniv_aead_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv);
+	struct seqniv_request_ctx *rctx = aead_request_ctx(req);
+	struct aead_request *subreq = &rctx->subreq;
+	struct scatterlist *dst;
+	crypto_completion_t compl;
+	void *data;
+	unsigned int ivsize = 8;
+	u8 buf[20];
+	int err;
+
+	if (req->cryptlen < ivsize + crypto_aead_authsize(geniv))
+		return -EINVAL;
+
+	aead_request_set_tfm(subreq, ctx->geniv.child);
+
+	compl = req->base.complete;
+	data = req->base.data;
+
+	if (req->assoclen > 12)
+		return -EINVAL;
+	else if (req->assoclen > 8) {
+		compl = seqniv_aead_decrypt_complete;
+		data = req;
+	}
+
+	if (req->src != req->dst) {
+		struct blkcipher_desc desc = {
+			.tfm = ctx->null,
+		};
+
+		err = crypto_blkcipher_encrypt(&desc, req->dst, req->src,
+					       req->assoclen + req->cryptlen);
+		if (err)
+			return err;
+	}
+
+	/* Move ESP AD forward for ICV generation. */
+	scatterwalk_map_and_copy(buf, req->dst, 0, req->assoclen + ivsize, 0);
+	memcpy(req->iv, buf + req->assoclen, ivsize);
+	scatterwalk_map_and_copy(buf, req->dst, ivsize, req->assoclen, 1);
+
+	dst = scatterwalk_ffwd(rctx->dst, req->dst, ivsize);
+
+	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_crypt(subreq, dst, dst,
+			       req->cryptlen - ivsize, req->iv);
+	aead_request_set_ad(subreq, req->assoclen);
+
+	err = crypto_aead_decrypt(subreq);
+	if (req->assoclen > 8)
+		seqniv_aead_decrypt_complete2(req, err);
+	return err;
+}
+
+static int seqiv_aead_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv);
+	struct aead_request *subreq = aead_request_ctx(req);
+	crypto_completion_t compl;
+	void *data;
+	unsigned int ivsize = 8;
+
+	if (req->cryptlen < ivsize + crypto_aead_authsize(geniv))
+		return -EINVAL;
+
+	aead_request_set_tfm(subreq, ctx->geniv.child);
+
+	compl = req->base.complete;
+	data = req->base.data;
+
+	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_crypt(subreq, req->src, req->dst,
+			       req->cryptlen - ivsize, req->iv);
+	aead_request_set_ad(subreq, req->assoclen + ivsize);
+
+	scatterwalk_map_and_copy(req->iv, req->src, req->assoclen, ivsize, 0);
+	if (req->src != req->dst)
+		scatterwalk_map_and_copy(req->iv, req->dst,
+					 req->assoclen, ivsize, 1);
+
+	return crypto_aead_decrypt(subreq);
 }
 
 static int seqiv_init(struct crypto_tfm *tfm)
 {
 	struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm);
 	struct seqiv_ctx *ctx = crypto_ablkcipher_ctx(geniv);
+	int err;
 
 	spin_lock_init(&ctx->lock);
 
 	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request);
 
-	return skcipher_geniv_init(tfm);
+	err = 0;
+	if (!crypto_get_default_rng()) {
+		crypto_ablkcipher_crt(geniv)->givencrypt = seqiv_givencrypt;
+		err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
+					   crypto_ablkcipher_ivsize(geniv));
+		crypto_put_default_rng();
+	}
+
+	return err ?: skcipher_geniv_init(tfm);
 }
 
-static int seqiv_aead_init(struct crypto_tfm *tfm)
+static int seqiv_old_aead_init(struct crypto_tfm *tfm)
 {
 	struct crypto_aead *geniv = __crypto_aead_cast(tfm);
 	struct seqiv_ctx *ctx = crypto_aead_ctx(geniv);
+	int err;
 
 	spin_lock_init(&ctx->lock);
 
-	tfm->crt_aead.reqsize = sizeof(struct aead_request);
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+				sizeof(struct aead_request));
+	err = 0;
+	if (!crypto_get_default_rng()) {
+		geniv->givencrypt = seqiv_aead_givencrypt;
+		err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
+					   crypto_aead_ivsize(geniv));
+		crypto_put_default_rng();
+	}
 
-	return aead_geniv_init(tfm);
+	return err ?: aead_geniv_init(tfm);
 }
 
-static struct crypto_template seqiv_tmpl;
-
-static struct crypto_instance *seqiv_ablkcipher_alloc(struct rtattr **tb)
+static int seqiv_aead_init_common(struct crypto_tfm *tfm, unsigned int reqsize)
 {
-	struct crypto_instance *inst;
+	struct crypto_aead *geniv = __crypto_aead_cast(tfm);
+	struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv);
+	int err;
 
-	inst = skcipher_geniv_alloc(&seqiv_tmpl, tb, 0, 0);
+	spin_lock_init(&ctx->geniv.lock);
 
-	if (IS_ERR(inst))
+	crypto_aead_set_reqsize(geniv, sizeof(struct aead_request));
+
+	err = crypto_get_default_rng();
+	if (err)
 		goto out;
 
-	if (inst->alg.cra_ablkcipher.ivsize < sizeof(u64)) {
-		skcipher_geniv_free(inst);
-		inst = ERR_PTR(-EINVAL);
+	err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
+				   crypto_aead_ivsize(geniv));
+	crypto_put_default_rng();
+	if (err)
 		goto out;
-	}
 
-	inst->alg.cra_ablkcipher.givencrypt = seqiv_givencrypt_first;
+	ctx->null = crypto_get_default_null_skcipher();
+	err = PTR_ERR(ctx->null);
+	if (IS_ERR(ctx->null))
+		goto out;
 
-	inst->alg.cra_init = seqiv_init;
-	inst->alg.cra_exit = skcipher_geniv_exit;
+	err = aead_geniv_init(tfm);
+	if (err)
+		goto drop_null;
 
-	inst->alg.cra_ctxsize += inst->alg.cra_ablkcipher.ivsize;
+	ctx->geniv.child = geniv->child;
+	geniv->child = geniv;
 
 out:
-	return inst;
+	return err;
+
+drop_null:
+	crypto_put_default_null_skcipher();
+	goto out;
+}
+
+static int seqiv_aead_init(struct crypto_tfm *tfm)
+{
+	return seqiv_aead_init_common(tfm, sizeof(struct aead_request));
+}
+
+static int seqniv_aead_init(struct crypto_tfm *tfm)
+{
+	return seqiv_aead_init_common(tfm, sizeof(struct seqniv_request_ctx));
+}
+
+static void seqiv_aead_exit(struct crypto_tfm *tfm)
+{
+	struct seqiv_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_aead(ctx->geniv.child);
+	crypto_put_default_null_skcipher();
 }
 
-static struct crypto_instance *seqiv_aead_alloc(struct rtattr **tb)
+static int seqiv_ablkcipher_create(struct crypto_template *tmpl,
+				   struct rtattr **tb)
 {
 	struct crypto_instance *inst;
+	int err;
 
-	inst = aead_geniv_alloc(&seqiv_tmpl, tb, 0, 0);
+	inst = skcipher_geniv_alloc(tmpl, tb, 0, 0);
 
 	if (IS_ERR(inst))
-		goto out;
+		return PTR_ERR(inst);
 
-	if (inst->alg.cra_aead.ivsize < sizeof(u64)) {
-		aead_geniv_free(inst);
-		inst = ERR_PTR(-EINVAL);
-		goto out;
-	}
+	err = -EINVAL;
+	if (inst->alg.cra_ablkcipher.ivsize < sizeof(u64))
+		goto free_inst;
+
+	inst->alg.cra_init = seqiv_init;
+	inst->alg.cra_exit = skcipher_geniv_exit;
+
+	inst->alg.cra_ctxsize += inst->alg.cra_ablkcipher.ivsize;
+	inst->alg.cra_ctxsize += sizeof(struct seqiv_ctx);
+
+	inst->alg.cra_alignmask |= __alignof__(u32) - 1;
 
-	inst->alg.cra_aead.givencrypt = seqiv_aead_givencrypt_first;
+	err = crypto_register_instance(tmpl, inst);
+	if (err)
+		goto free_inst;
 
-	inst->alg.cra_init = seqiv_aead_init;
+out:
+	return err;
+
+free_inst:
+	skcipher_geniv_free(inst);
+	goto out;
+}
+
+static int seqiv_old_aead_create(struct crypto_template *tmpl,
+				 struct aead_instance *aead)
+{
+	struct crypto_instance *inst = aead_crypto_instance(aead);
+	int err = -EINVAL;
+
+	if (inst->alg.cra_aead.ivsize < sizeof(u64))
+		goto free_inst;
+
+	inst->alg.cra_init = seqiv_old_aead_init;
 	inst->alg.cra_exit = aead_geniv_exit;
 
 	inst->alg.cra_ctxsize = inst->alg.cra_aead.ivsize;
+	inst->alg.cra_ctxsize += sizeof(struct seqiv_ctx);
+
+	err = crypto_register_instance(tmpl, inst);
+	if (err)
+		goto free_inst;
 
 out:
-	return inst;
+	return err;
+
+free_inst:
+	aead_geniv_free(aead);
+	goto out;
 }
 
-static struct crypto_instance *seqiv_alloc(struct rtattr **tb)
+static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct aead_instance *inst;
+	struct crypto_aead_spawn *spawn;
+	struct aead_alg *alg;
+	int err;
+
+	inst = aead_geniv_alloc(tmpl, tb, 0, 0);
+
+	if (IS_ERR(inst))
+		return PTR_ERR(inst);
+
+	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
+
+	if (inst->alg.base.cra_aead.encrypt)
+		return seqiv_old_aead_create(tmpl, inst);
+
+	spawn = aead_instance_ctx(inst);
+	alg = crypto_spawn_aead_alg(spawn);
+
+	if (alg->base.cra_aead.encrypt)
+		goto done;
+
+	err = -EINVAL;
+	if (inst->alg.ivsize != sizeof(u64))
+		goto free_inst;
+
+	inst->alg.encrypt = seqiv_aead_encrypt;
+	inst->alg.decrypt = seqiv_aead_decrypt;
+
+	inst->alg.base.cra_init = seqiv_aead_init;
+	inst->alg.base.cra_exit = seqiv_aead_exit;
+
+	inst->alg.base.cra_ctxsize = sizeof(struct seqiv_aead_ctx);
+	inst->alg.base.cra_ctxsize += inst->alg.base.cra_aead.ivsize;
+
+done:
+	err = aead_register_instance(tmpl, inst);
+	if (err)
+		goto free_inst;
+
+out:
+	return err;
+
+free_inst:
+	aead_geniv_free(inst);
+	goto out;
+}
+
+static int seqiv_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
-	struct crypto_instance *inst;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
 	if (IS_ERR(algt))
-		return ERR_CAST(algt);
-
-	err = crypto_get_default_rng();
-	if (err)
-		return ERR_PTR(err);
+		return PTR_ERR(algt);
 
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & CRYPTO_ALG_TYPE_MASK)
-		inst = seqiv_ablkcipher_alloc(tb);
+		err = seqiv_ablkcipher_create(tmpl, tb);
 	else
-		inst = seqiv_aead_alloc(tb);
+		err = seqiv_aead_create(tmpl, tb);
 
+	return err;
+}
+
+static int seqniv_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct aead_instance *inst;
+	struct crypto_aead_spawn *spawn;
+	struct aead_alg *alg;
+	int err;
+
+	inst = aead_geniv_alloc(tmpl, tb, 0, 0);
+	err = PTR_ERR(inst);
 	if (IS_ERR(inst))
-		goto put_rng;
+		goto out;
 
-	inst->alg.cra_alignmask |= __alignof__(u32) - 1;
-	inst->alg.cra_ctxsize += sizeof(struct seqiv_ctx);
+	spawn = aead_instance_ctx(inst);
+	alg = crypto_spawn_aead_alg(spawn);
+
+	if (alg->base.cra_aead.encrypt)
+		goto done;
+
+	err = -EINVAL;
+	if (inst->alg.ivsize != sizeof(u64))
+		goto free_inst;
+
+	inst->alg.encrypt = seqniv_aead_encrypt;
+	inst->alg.decrypt = seqniv_aead_decrypt;
+
+	inst->alg.base.cra_init = seqniv_aead_init;
+	inst->alg.base.cra_exit = seqiv_aead_exit;
+
+	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
+	inst->alg.base.cra_ctxsize = sizeof(struct seqiv_aead_ctx);
+	inst->alg.base.cra_ctxsize += inst->alg.ivsize;
+
+done:
+	err = aead_register_instance(tmpl, inst);
+	if (err)
+		goto free_inst;
 
 out:
-	return inst;
+	return err;
 
-put_rng:
-	crypto_put_default_rng();
+free_inst:
+	aead_geniv_free(inst);
 	goto out;
 }
 
@@ -348,24 +752,46 @@ static void seqiv_free(struct crypto_instance *inst)
 	if ((inst->alg.cra_flags ^ CRYPTO_ALG_TYPE_AEAD) & CRYPTO_ALG_TYPE_MASK)
 		skcipher_geniv_free(inst);
 	else
-		aead_geniv_free(inst);
-	crypto_put_default_rng();
+		aead_geniv_free(aead_instance(inst));
 }
 
 static struct crypto_template seqiv_tmpl = {
 	.name = "seqiv",
-	.alloc = seqiv_alloc,
+	.create = seqiv_create,
+	.free = seqiv_free,
+	.module = THIS_MODULE,
+};
+
+static struct crypto_template seqniv_tmpl = {
+	.name = "seqniv",
+	.create = seqniv_create,
 	.free = seqiv_free,
 	.module = THIS_MODULE,
 };
 
 static int __init seqiv_module_init(void)
 {
-	return crypto_register_template(&seqiv_tmpl);
+	int err;
+
+	err = crypto_register_template(&seqiv_tmpl);
+	if (err)
+		goto out;
+
+	err = crypto_register_template(&seqniv_tmpl);
+	if (err)
+		goto out_undo_niv;
+
+out:
+	return err;
+
+out_undo_niv:
+	crypto_unregister_template(&seqiv_tmpl);
+	goto out;
 }
 
 static void __exit seqiv_module_exit(void)
 {
+	crypto_unregister_template(&seqniv_tmpl);
 	crypto_unregister_template(&seqiv_tmpl);
 }
 
@@ -375,3 +801,4 @@ module_exit(seqiv_module_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Sequence Number IV Generator");
 MODULE_ALIAS_CRYPTO("seqiv");
+MODULE_ALIAS_CRYPTO("seqniv");

+ 1 - 6
crypto/shash.c

@@ -520,11 +520,6 @@ static int crypto_shash_init_tfm(struct crypto_tfm *tfm)
 	return 0;
 }
 
-static unsigned int crypto_shash_extsize(struct crypto_alg *alg)
-{
-	return alg->cra_ctxsize;
-}
-
 #ifdef CONFIG_NET
 static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
@@ -564,7 +559,7 @@ static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg)
 
 static const struct crypto_type crypto_shash_type = {
 	.ctxsize = crypto_shash_ctxsize,
-	.extsize = crypto_shash_extsize,
+	.extsize = crypto_alg_extsize,
 	.init = crypto_init_shash_ops,
 	.init_tfm = crypto_shash_init_tfm,
 #ifdef CONFIG_PROC_FS

+ 21 - 15
crypto/tcrypt.c

@@ -22,8 +22,10 @@
  *
  */
 
+#include <crypto/aead.h>
 #include <crypto/hash.h>
 #include <linux/err.h>
+#include <linux/fips.h>
 #include <linux/init.h>
 #include <linux/gfp.h>
 #include <linux/module.h>
@@ -34,7 +36,6 @@
 #include <linux/timex.h>
 #include <linux/interrupt.h>
 #include "tcrypt.h"
-#include "internal.h"
 
 /*
  * Need slab memory for testing (size in number of pages).
@@ -257,12 +258,12 @@ static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE],
 		rem = buflen % PAGE_SIZE;
 	}
 
-	sg_init_table(sg, np);
+	sg_init_table(sg, np + 1);
 	np--;
 	for (k = 0; k < np; k++)
-		sg_set_buf(&sg[k], xbuf[k], PAGE_SIZE);
+		sg_set_buf(&sg[k + 1], xbuf[k], PAGE_SIZE);
 
-	sg_set_buf(&sg[k], xbuf[k], rem);
+	sg_set_buf(&sg[k + 1], xbuf[k], rem);
 }
 
 static void test_aead_speed(const char *algo, int enc, unsigned int secs,
@@ -276,7 +277,6 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
 	const char *key;
 	struct aead_request *req;
 	struct scatterlist *sg;
-	struct scatterlist *asg;
 	struct scatterlist *sgout;
 	const char *e;
 	void *assoc;
@@ -308,11 +308,10 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
 	if (testmgr_alloc_buf(xoutbuf))
 		goto out_nooutbuf;
 
-	sg = kmalloc(sizeof(*sg) * 8 * 3, GFP_KERNEL);
+	sg = kmalloc(sizeof(*sg) * 9 * 2, GFP_KERNEL);
 	if (!sg)
 		goto out_nosg;
-	asg = &sg[8];
-	sgout = &asg[8];
+	sgout = &sg[9];
 
 	tfm = crypto_alloc_aead(algo, 0, 0);
 
@@ -338,7 +337,6 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
 		do {
 			assoc = axbuf[0];
 			memset(assoc, 0xff, aad_size);
-			sg_init_one(&asg[0], assoc, aad_size);
 
 			if ((*keysize + *b_size) > TVMEMSIZE * PAGE_SIZE) {
 				pr_err("template (%u) too big for tvmem (%lu)\n",
@@ -374,14 +372,17 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
 				goto out;
 			}
 
-			sg_init_aead(&sg[0], xbuf,
+			sg_init_aead(sg, xbuf,
 				    *b_size + (enc ? authsize : 0));
 
-			sg_init_aead(&sgout[0], xoutbuf,
+			sg_init_aead(sgout, xoutbuf,
 				    *b_size + (enc ? authsize : 0));
 
+			sg_set_buf(&sg[0], assoc, aad_size);
+			sg_set_buf(&sgout[0], assoc, aad_size);
+
 			aead_request_set_crypt(req, sg, sgout, *b_size, iv);
-			aead_request_set_assoc(req, asg, aad_size);
+			aead_request_set_ad(req, aad_size);
 
 			if (secs)
 				ret = test_aead_jiffies(req, enc, *b_size,
@@ -808,7 +809,7 @@ static int test_ahash_jiffies(struct ahash_request *req, int blen,
 
 	for (start = jiffies, end = start + secs * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
-		ret = crypto_ahash_init(req);
+		ret = do_one_ahash_op(req, crypto_ahash_init(req));
 		if (ret)
 			return ret;
 		for (pcount = 0; pcount < blen; pcount += plen) {
@@ -877,7 +878,7 @@ static int test_ahash_cycles(struct ahash_request *req, int blen,
 
 	/* Warm-up run. */
 	for (i = 0; i < 4; i++) {
-		ret = crypto_ahash_init(req);
+		ret = do_one_ahash_op(req, crypto_ahash_init(req));
 		if (ret)
 			goto out;
 		for (pcount = 0; pcount < blen; pcount += plen) {
@@ -896,7 +897,7 @@ static int test_ahash_cycles(struct ahash_request *req, int blen,
 
 		start = get_cycles();
 
-		ret = crypto_ahash_init(req);
+		ret = do_one_ahash_op(req, crypto_ahash_init(req));
 		if (ret)
 			goto out;
 		for (pcount = 0; pcount < blen; pcount += plen) {
@@ -1761,6 +1762,11 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 				NULL, 0, 16, 8, aead_speed_template_20);
 		break;
 
+	case 212:
+		test_aead_speed("rfc4309(ccm(aes))", ENCRYPT, sec,
+				NULL, 0, 16, 8, aead_speed_template_19);
+		break;
+
 	case 300:
 		if (alg) {
 			test_hash_speed(alg, sec, generic_hash_speed_template);

+ 1 - 0
crypto/tcrypt.h

@@ -65,6 +65,7 @@ static u8 speed_template_32_64[] = {32, 64, 0};
 /*
  * AEAD speed tests
  */
+static u8 aead_speed_template_19[] = {19, 0};
 static u8 aead_speed_template_20[] = {20, 0};
 
 /*

+ 275 - 39
crypto/testmgr.c

@@ -20,14 +20,17 @@
  *
  */
 
+#include <crypto/aead.h>
 #include <crypto/hash.h>
 #include <linux/err.h>
+#include <linux/fips.h>
 #include <linux/module.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <crypto/rng.h>
 #include <crypto/drbg.h>
+#include <crypto/akcipher.h>
 
 #include "internal.h"
 
@@ -114,6 +117,11 @@ struct drbg_test_suite {
 	unsigned int count;
 };
 
+struct akcipher_test_suite {
+	struct akcipher_testvec *vecs;
+	unsigned int count;
+};
+
 struct alg_test_desc {
 	const char *alg;
 	int (*test)(const struct alg_test_desc *desc, const char *driver,
@@ -128,6 +136,7 @@ struct alg_test_desc {
 		struct hash_test_suite hash;
 		struct cprng_test_suite cprng;
 		struct drbg_test_suite drbg;
+		struct akcipher_test_suite akcipher;
 	} suite;
 };
 
@@ -425,7 +434,6 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
 	char *key;
 	struct aead_request *req;
 	struct scatterlist *sg;
-	struct scatterlist *asg;
 	struct scatterlist *sgout;
 	const char *e, *d;
 	struct tcrypt_result result;
@@ -452,11 +460,10 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
 		goto out_nooutbuf;
 
 	/* avoid "the frame size is larger than 1024 bytes" compiler warning */
-	sg = kmalloc(sizeof(*sg) * 8 * (diff_dst ? 3 : 2), GFP_KERNEL);
+	sg = kmalloc(sizeof(*sg) * 8 * (diff_dst ? 4 : 2), GFP_KERNEL);
 	if (!sg)
 		goto out_nosg;
-	asg = &sg[8];
-	sgout = &asg[8];
+	sgout = &sg[16];
 
 	if (diff_dst)
 		d = "-ddst";
@@ -535,23 +542,27 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
 			goto out;
 		}
 
+		k = !!template[i].alen;
+		sg_init_table(sg, k + 1);
+		sg_set_buf(&sg[0], assoc, template[i].alen);
+		sg_set_buf(&sg[k], input,
+			   template[i].ilen + (enc ? authsize : 0));
+		output = input;
+
 		if (diff_dst) {
+			sg_init_table(sgout, k + 1);
+			sg_set_buf(&sgout[0], assoc, template[i].alen);
+
 			output = xoutbuf[0];
 			output += align_offset;
-			sg_init_one(&sg[0], input, template[i].ilen);
-			sg_init_one(&sgout[0], output, template[i].rlen);
-		} else {
-			sg_init_one(&sg[0], input,
-				    template[i].ilen + (enc ? authsize : 0));
-			output = input;
+			sg_set_buf(&sgout[k], output,
+				   template[i].rlen + (enc ? 0 : authsize));
 		}
 
-		sg_init_one(&asg[0], assoc, template[i].alen);
-
 		aead_request_set_crypt(req, sg, (diff_dst) ? sgout : sg,
 				       template[i].ilen, iv);
 
-		aead_request_set_assoc(req, asg, template[i].alen);
+		aead_request_set_ad(req, template[i].alen);
 
 		ret = enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req);
 
@@ -631,9 +642,29 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
 		authsize = abs(template[i].rlen - template[i].ilen);
 
 		ret = -EINVAL;
-		sg_init_table(sg, template[i].np);
+		sg_init_table(sg, template[i].anp + template[i].np);
 		if (diff_dst)
-			sg_init_table(sgout, template[i].np);
+			sg_init_table(sgout, template[i].anp + template[i].np);
+
+		ret = -EINVAL;
+		for (k = 0, temp = 0; k < template[i].anp; k++) {
+			if (WARN_ON(offset_in_page(IDX[k]) +
+				    template[i].atap[k] > PAGE_SIZE))
+				goto out;
+			sg_set_buf(&sg[k],
+				   memcpy(axbuf[IDX[k] >> PAGE_SHIFT] +
+					  offset_in_page(IDX[k]),
+					  template[i].assoc + temp,
+					  template[i].atap[k]),
+				   template[i].atap[k]);
+			if (diff_dst)
+				sg_set_buf(&sgout[k],
+					   axbuf[IDX[k] >> PAGE_SHIFT] +
+					   offset_in_page(IDX[k]),
+					   template[i].atap[k]);
+			temp += template[i].atap[k];
+		}
+
 		for (k = 0, temp = 0; k < template[i].np; k++) {
 			if (WARN_ON(offset_in_page(IDX[k]) +
 				    template[i].tap[k] > PAGE_SIZE))
@@ -641,7 +672,8 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
 
 			q = xbuf[IDX[k] >> PAGE_SHIFT] + offset_in_page(IDX[k]);
 			memcpy(q, template[i].input + temp, template[i].tap[k]);
-			sg_set_buf(&sg[k], q, template[i].tap[k]);
+			sg_set_buf(&sg[template[i].anp + k],
+				   q, template[i].tap[k]);
 
 			if (diff_dst) {
 				q = xoutbuf[IDX[k] >> PAGE_SHIFT] +
@@ -649,7 +681,8 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
 
 				memset(q, 0, template[i].tap[k]);
 
-				sg_set_buf(&sgout[k], q, template[i].tap[k]);
+				sg_set_buf(&sgout[template[i].anp + k],
+					   q, template[i].tap[k]);
 			}
 
 			n = template[i].tap[k];
@@ -669,39 +702,24 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
 		}
 
 		if (enc) {
-			if (WARN_ON(sg[k - 1].offset +
-				    sg[k - 1].length + authsize >
-				    PAGE_SIZE)) {
+			if (WARN_ON(sg[template[i].anp + k - 1].offset +
+				    sg[template[i].anp + k - 1].length +
+				    authsize > PAGE_SIZE)) {
 				ret = -EINVAL;
 				goto out;
 			}
 
 			if (diff_dst)
-				sgout[k - 1].length += authsize;
-			else
-				sg[k - 1].length += authsize;
-		}
-
-		sg_init_table(asg, template[i].anp);
-		ret = -EINVAL;
-		for (k = 0, temp = 0; k < template[i].anp; k++) {
-			if (WARN_ON(offset_in_page(IDX[k]) +
-				    template[i].atap[k] > PAGE_SIZE))
-				goto out;
-			sg_set_buf(&asg[k],
-				   memcpy(axbuf[IDX[k] >> PAGE_SHIFT] +
-					  offset_in_page(IDX[k]),
-					  template[i].assoc + temp,
-					  template[i].atap[k]),
-				   template[i].atap[k]);
-			temp += template[i].atap[k];
+				sgout[template[i].anp + k - 1].length +=
+					authsize;
+			sg[template[i].anp + k - 1].length += authsize;
 		}
 
 		aead_request_set_crypt(req, sg, (diff_dst) ? sgout : sg,
 				       template[i].ilen,
 				       iv);
 
-		aead_request_set_assoc(req, asg, template[i].alen);
+		aead_request_set_ad(req, template[i].alen);
 
 		ret = enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req);
 
@@ -1814,6 +1832,147 @@ static int alg_test_drbg(const struct alg_test_desc *desc, const char *driver,
 
 }
 
+static int do_test_rsa(struct crypto_akcipher *tfm,
+		       struct akcipher_testvec *vecs)
+{
+	struct akcipher_request *req;
+	void *outbuf_enc = NULL;
+	void *outbuf_dec = NULL;
+	struct tcrypt_result result;
+	unsigned int out_len_max, out_len = 0;
+	int err = -ENOMEM;
+
+	req = akcipher_request_alloc(tfm, GFP_KERNEL);
+	if (!req)
+		return err;
+
+	init_completion(&result.completion);
+	err = crypto_akcipher_setkey(tfm, vecs->key, vecs->key_len);
+	if (err)
+		goto free_req;
+
+	akcipher_request_set_crypt(req, vecs->m, outbuf_enc, vecs->m_size,
+				   out_len);
+	/* expect this to fail, and update the required buf len */
+	crypto_akcipher_encrypt(req);
+	out_len = req->dst_len;
+	if (!out_len) {
+		err = -EINVAL;
+		goto free_req;
+	}
+
+	out_len_max = out_len;
+	err = -ENOMEM;
+	outbuf_enc = kzalloc(out_len_max, GFP_KERNEL);
+	if (!outbuf_enc)
+		goto free_req;
+
+	akcipher_request_set_crypt(req, vecs->m, outbuf_enc, vecs->m_size,
+				   out_len);
+	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      tcrypt_complete, &result);
+
+	/* Run RSA encrypt - c = m^e mod n;*/
+	err = wait_async_op(&result, crypto_akcipher_encrypt(req));
+	if (err) {
+		pr_err("alg: rsa: encrypt test failed. err %d\n", err);
+		goto free_all;
+	}
+	if (out_len != vecs->c_size) {
+		pr_err("alg: rsa: encrypt test failed. Invalid output len\n");
+		err = -EINVAL;
+		goto free_all;
+	}
+	/* verify that encrypted message is equal to expected */
+	if (memcmp(vecs->c, outbuf_enc, vecs->c_size)) {
+		pr_err("alg: rsa: encrypt test failed. Invalid output\n");
+		err = -EINVAL;
+		goto free_all;
+	}
+	/* Don't invoke decrypt for vectors with public key */
+	if (vecs->public_key_vec) {
+		err = 0;
+		goto free_all;
+	}
+	outbuf_dec = kzalloc(out_len_max, GFP_KERNEL);
+	if (!outbuf_dec) {
+		err = -ENOMEM;
+		goto free_all;
+	}
+	init_completion(&result.completion);
+	akcipher_request_set_crypt(req, outbuf_enc, outbuf_dec, vecs->c_size,
+				   out_len);
+
+	/* Run RSA decrypt - m = c^d mod n;*/
+	err = wait_async_op(&result, crypto_akcipher_decrypt(req));
+	if (err) {
+		pr_err("alg: rsa: decrypt test failed. err %d\n", err);
+		goto free_all;
+	}
+	out_len = req->dst_len;
+	if (out_len != vecs->m_size) {
+		pr_err("alg: rsa: decrypt test failed. Invalid output len\n");
+		err = -EINVAL;
+		goto free_all;
+	}
+	/* verify that decrypted message is equal to the original msg */
+	if (memcmp(vecs->m, outbuf_dec, vecs->m_size)) {
+		pr_err("alg: rsa: decrypt test failed. Invalid output\n");
+		err = -EINVAL;
+	}
+free_all:
+	kfree(outbuf_dec);
+	kfree(outbuf_enc);
+free_req:
+	akcipher_request_free(req);
+	return err;
+}
+
+static int test_rsa(struct crypto_akcipher *tfm, struct akcipher_testvec *vecs,
+		    unsigned int tcount)
+{
+	int ret, i;
+
+	for (i = 0; i < tcount; i++) {
+		ret = do_test_rsa(tfm, vecs++);
+		if (ret) {
+			pr_err("alg: rsa: test failed on vector %d, err=%d\n",
+			       i + 1, ret);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+static int test_akcipher(struct crypto_akcipher *tfm, const char *alg,
+			 struct akcipher_testvec *vecs, unsigned int tcount)
+{
+	if (strncmp(alg, "rsa", 3) == 0)
+		return test_rsa(tfm, vecs, tcount);
+
+	return 0;
+}
+
+static int alg_test_akcipher(const struct alg_test_desc *desc,
+			     const char *driver, u32 type, u32 mask)
+{
+	struct crypto_akcipher *tfm;
+	int err = 0;
+
+	tfm = crypto_alloc_akcipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
+	if (IS_ERR(tfm)) {
+		pr_err("alg: akcipher: Failed to load tfm for %s: %ld\n",
+		       driver, PTR_ERR(tfm));
+		return PTR_ERR(tfm);
+	}
+	if (desc->suite.akcipher.vecs)
+		err = test_akcipher(tfm, desc->alg, desc->suite.akcipher.vecs,
+				    desc->suite.akcipher.count);
+
+	crypto_free_akcipher(tfm);
+	return err;
+}
+
 static int alg_test_null(const struct alg_test_desc *desc,
 			     const char *driver, u32 type, u32 mask)
 {
@@ -2296,6 +2455,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "chacha20",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = chacha20_enc_tv_template,
+					.count = CHACHA20_ENC_TEST_VECTORS
+				},
+				.dec = {
+					.vecs = chacha20_enc_tv_template,
+					.count = CHACHA20_ENC_TEST_VECTORS
+				},
+			}
+		}
 	}, {
 		.alg = "cmac(aes)",
 		.test = alg_test_hash,
@@ -2317,6 +2491,15 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 		.alg = "compress_null",
 		.test = alg_test_null,
+	}, {
+		.alg = "crc32",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = {
+				.vecs = crc32_tv_template,
+				.count = CRC32_TEST_VECTORS
+			}
+		}
 	}, {
 		.alg = "crc32c",
 		.test = alg_test_crc32c,
@@ -3094,6 +3277,10 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.count = HMAC_SHA512_TEST_VECTORS
 			}
 		}
+	}, {
+		.alg = "jitterentropy_rng",
+		.fips_allowed = 1,
+		.test = alg_test_null,
 	}, {
 		.alg = "lrw(aes)",
 		.test = alg_test_skcipher,
@@ -3275,6 +3462,15 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "poly1305",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = {
+				.vecs = poly1305_tv_template,
+				.count = POLY1305_TEST_VECTORS
+			}
+		}
 	}, {
 		.alg = "rfc3686(ctr(aes))",
 		.test = alg_test_skcipher,
@@ -3338,6 +3534,36 @@ static const struct alg_test_desc alg_test_descs[] = {
 				},
 			}
 		}
+	}, {
+		.alg = "rfc7539(chacha20,poly1305)",
+		.test = alg_test_aead,
+		.suite = {
+			.aead = {
+				.enc = {
+					.vecs = rfc7539_enc_tv_template,
+					.count = RFC7539_ENC_TEST_VECTORS
+				},
+				.dec = {
+					.vecs = rfc7539_dec_tv_template,
+					.count = RFC7539_DEC_TEST_VECTORS
+				},
+			}
+		}
+	}, {
+		.alg = "rfc7539esp(chacha20,poly1305)",
+		.test = alg_test_aead,
+		.suite = {
+			.aead = {
+				.enc = {
+					.vecs = rfc7539esp_enc_tv_template,
+					.count = RFC7539ESP_ENC_TEST_VECTORS
+				},
+				.dec = {
+					.vecs = rfc7539esp_dec_tv_template,
+					.count = RFC7539ESP_DEC_TEST_VECTORS
+				},
+			}
+		}
 	}, {
 		.alg = "rmd128",
 		.test = alg_test_hash,
@@ -3374,6 +3600,16 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.count = RMD320_TEST_VECTORS
 			}
 		}
+	}, {
+		.alg = "rsa",
+		.test = alg_test_akcipher,
+		.fips_allowed = 1,
+		.suite = {
+			.akcipher = {
+				.vecs = rsa_tv_template,
+				.count = RSA_TEST_VECTORS
+			}
+		}
 	}, {
 		.alg = "salsa20",
 		.test = alg_test_skcipher,

+ 2429 - 14
crypto/testmgr.h

@@ -46,6 +46,24 @@ struct hash_testvec {
 	unsigned char ksize;
 };
 
+/*
+ * cipher_testvec:	structure to describe a cipher test
+ * @key:	A pointer to a key used by the test
+ * @klen:	The length of @key
+ * @iv:		A pointer to the IV used by the test
+ * @input:	A pointer to data used as input
+ * @ilen	The length of data in @input
+ * @result:	A pointer to what the test need to produce
+ * @rlen:	The length of data in @result
+ * @fail:	If set to one, the test need to fail
+ * @wk:		Does the test need CRYPTO_TFM_REQ_WEAK_KEY
+ * 		( e.g. test needs to fail due to a weak key )
+ * @np: 	numbers of SG to distribute data in (from 1 to MAX_TAP)
+ * @tap:	How to distribute data in @np SGs
+ * @also_non_np: 	if set to 1, the test will be also done without
+ * 			splitting data in @np SGs
+ */
+
 struct cipher_testvec {
 	char *key;
 	char *iv;
@@ -54,7 +72,7 @@ struct cipher_testvec {
 	unsigned short tap[MAX_TAP];
 	int np;
 	unsigned char also_non_np;
-	unsigned char fail;
+	bool fail;
 	unsigned char wk; /* weak key flag */
 	unsigned char klen;
 	unsigned short ilen;
@@ -71,7 +89,7 @@ struct aead_testvec {
 	unsigned char atap[MAX_TAP];
 	int np;
 	int anp;
-	unsigned char fail;
+	bool fail;
 	unsigned char novrfy;	/* ccm dec verification failure expected */
 	unsigned char wk; /* weak key flag */
 	unsigned char klen;
@@ -107,8 +125,195 @@ struct drbg_testvec {
 	size_t expectedlen;
 };
 
+struct akcipher_testvec {
+	unsigned char *key;
+	unsigned char *m;
+	unsigned char *c;
+	unsigned int key_len;
+	unsigned int m_size;
+	unsigned int c_size;
+	bool public_key_vec;
+};
+
 static char zeroed_string[48];
 
+/*
+ * RSA test vectors. Borrowed from openSSL.
+ */
+#ifdef CONFIG_CRYPTO_FIPS
+#define RSA_TEST_VECTORS	2
+#else
+#define RSA_TEST_VECTORS	4
+#endif
+static struct akcipher_testvec rsa_tv_template[] = {
+	{
+#ifndef CONFIG_CRYPTO_FIPS
+	.key =
+	"\x30\x81\x88" /* sequence of 136 bytes */
+	"\x02\x41" /* modulus - integer of 65 bytes */
+	"\x00\xAA\x36\xAB\xCE\x88\xAC\xFD\xFF\x55\x52\x3C\x7F\xC4\x52\x3F"
+	"\x90\xEF\xA0\x0D\xF3\x77\x4A\x25\x9F\x2E\x62\xB4\xC5\xD9\x9C\xB5"
+	"\xAD\xB3\x00\xA0\x28\x5E\x53\x01\x93\x0E\x0C\x70\xFB\x68\x76\x93"
+	"\x9C\xE6\x16\xCE\x62\x4A\x11\xE0\x08\x6D\x34\x1E\xBC\xAC\xA0\xA1"
+	"\xF5"
+	"\x02\x01\x11" /* public key - integer of 1 byte */
+	"\x02\x40" /* private key - integer of 64 bytes */
+	"\x0A\x03\x37\x48\x62\x64\x87\x69\x5F\x5F\x30\xBC\x38\xB9\x8B\x44"
+	"\xC2\xCD\x2D\xFF\x43\x40\x98\xCD\x20\xD8\xA1\x38\xD0\x90\xBF\x64"
+	"\x79\x7C\x3F\xA7\xA2\xCD\xCB\x3C\xD1\xE0\xBD\xBA\x26\x54\xB4\xF9"
+	"\xDF\x8E\x8A\xE5\x9D\x73\x3D\x9F\x33\xB3\x01\x62\x4A\xFD\x1D\x51",
+	.m = "\x54\x85\x9b\x34\x2c\x49\xea\x2a",
+	.c =
+	"\x63\x1c\xcd\x7b\xe1\x7e\xe4\xde\xc9\xa8\x89\xa1\x74\xcb\x3c\x63"
+	"\x7d\x24\xec\x83\xc3\x15\xe4\x7f\x73\x05\x34\xd1\xec\x22\xbb\x8a"
+	"\x5e\x32\x39\x6d\xc1\x1d\x7d\x50\x3b\x9f\x7a\xad\xf0\x2e\x25\x53"
+	"\x9f\x6e\xbd\x4c\x55\x84\x0c\x9b\xcf\x1a\x4b\x51\x1e\x9e\x0c\x06",
+	.key_len = 139,
+	.m_size = 8,
+	.c_size = 64,
+	}, {
+	.key =
+	"\x30\x82\x01\x0B" /* sequence of 267 bytes */
+	"\x02\x81\x81" /* modulus - integer of 129 bytes */
+	"\x00\xBB\xF8\x2F\x09\x06\x82\xCE\x9C\x23\x38\xAC\x2B\x9D\xA8\x71"
+	"\xF7\x36\x8D\x07\xEE\xD4\x10\x43\xA4\x40\xD6\xB6\xF0\x74\x54\xF5"
+	"\x1F\xB8\xDF\xBA\xAF\x03\x5C\x02\xAB\x61\xEA\x48\xCE\xEB\x6F\xCD"
+	"\x48\x76\xED\x52\x0D\x60\xE1\xEC\x46\x19\x71\x9D\x8A\x5B\x8B\x80"
+	"\x7F\xAF\xB8\xE0\xA3\xDF\xC7\x37\x72\x3E\xE6\xB4\xB7\xD9\x3A\x25"
+	"\x84\xEE\x6A\x64\x9D\x06\x09\x53\x74\x88\x34\xB2\x45\x45\x98\x39"
+	"\x4E\xE0\xAA\xB1\x2D\x7B\x61\xA5\x1F\x52\x7A\x9A\x41\xF6\xC1\x68"
+	"\x7F\xE2\x53\x72\x98\xCA\x2A\x8F\x59\x46\xF8\xE5\xFD\x09\x1D\xBD"
+	"\xCB"
+	"\x02\x01\x11" /* public key - integer of 1 byte */
+	"\x02\x81\x81"  /* private key - integer of 129 bytes */
+	"\x00\xA5\xDA\xFC\x53\x41\xFA\xF2\x89\xC4\xB9\x88\xDB\x30\xC1\xCD"
+	"\xF8\x3F\x31\x25\x1E\x06\x68\xB4\x27\x84\x81\x38\x01\x57\x96\x41"
+	"\xB2\x94\x10\xB3\xC7\x99\x8D\x6B\xC4\x65\x74\x5E\x5C\x39\x26\x69"
+	"\xD6\x87\x0D\xA2\xC0\x82\xA9\x39\xE3\x7F\xDC\xB8\x2E\xC9\x3E\xDA"
+	"\xC9\x7F\xF3\xAD\x59\x50\xAC\xCF\xBC\x11\x1C\x76\xF1\xA9\x52\x94"
+	"\x44\xE5\x6A\xAF\x68\xC5\x6C\x09\x2C\xD3\x8D\xC3\xBE\xF5\xD2\x0A"
+	"\x93\x99\x26\xED\x4F\x74\xA1\x3E\xDD\xFB\xE1\xA1\xCE\xCC\x48\x94"
+	"\xAF\x94\x28\xC2\xB7\xB8\x88\x3F\xE4\x46\x3A\x4B\xC8\x5B\x1C\xB3"
+	"\xC1",
+	.key_len = 271,
+	.m = "\x54\x85\x9b\x34\x2c\x49\xea\x2a",
+	.c =
+	"\x74\x1b\x55\xac\x47\xb5\x08\x0a\x6e\x2b\x2d\xf7\x94\xb8\x8a\x95"
+	"\xed\xa3\x6b\xc9\x29\xee\xb2\x2c\x80\xc3\x39\x3b\x8c\x62\x45\x72"
+	"\xc2\x7f\x74\x81\x91\x68\x44\x48\x5a\xdc\xa0\x7e\xa7\x0b\x05\x7f"
+	"\x0e\xa0\x6c\xe5\x8f\x19\x4d\xce\x98\x47\x5f\xbd\x5f\xfe\xe5\x34"
+	"\x59\x89\xaf\xf0\xba\x44\xd7\xf1\x1a\x50\x72\xef\x5e\x4a\xb6\xb7"
+	"\x54\x34\xd1\xc4\x83\x09\xdf\x0f\x91\x5f\x7d\x91\x70\x2f\xd4\x13"
+	"\xcc\x5e\xa4\x6c\xc3\x4d\x28\xef\xda\xaf\xec\x14\x92\xfc\xa3\x75"
+	"\x13\xb4\xc1\xa1\x11\xfc\x40\x2f\x4c\x9d\xdf\x16\x76\x11\x20\x6b",
+	.m_size = 8,
+	.c_size = 128,
+	}, {
+#endif
+	.key =
+	"\x30\x82\x02\x0D" /* sequence of 525 bytes */
+	"\x02\x82\x01\x00" /* modulus - integer of 256 bytes */
+	"\xDB\x10\x1A\xC2\xA3\xF1\xDC\xFF\x13\x6B\xED\x44\xDF\xF0\x02\x6D"
+	"\x13\xC7\x88\xDA\x70\x6B\x54\xF1\xE8\x27\xDC\xC3\x0F\x99\x6A\xFA"
+	"\xC6\x67\xFF\x1D\x1E\x3C\x1D\xC1\xB5\x5F\x6C\xC0\xB2\x07\x3A\x6D"
+	"\x41\xE4\x25\x99\xAC\xFC\xD2\x0F\x02\xD3\xD1\x54\x06\x1A\x51\x77"
+	"\xBD\xB6\xBF\xEA\xA7\x5C\x06\xA9\x5D\x69\x84\x45\xD7\xF5\x05\xBA"
+	"\x47\xF0\x1B\xD7\x2B\x24\xEC\xCB\x9B\x1B\x10\x8D\x81\xA0\xBE\xB1"
+	"\x8C\x33\xE4\x36\xB8\x43\xEB\x19\x2A\x81\x8D\xDE\x81\x0A\x99\x48"
+	"\xB6\xF6\xBC\xCD\x49\x34\x3A\x8F\x26\x94\xE3\x28\x82\x1A\x7C\x8F"
+	"\x59\x9F\x45\xE8\x5D\x1A\x45\x76\x04\x56\x05\xA1\xD0\x1B\x8C\x77"
+	"\x6D\xAF\x53\xFA\x71\xE2\x67\xE0\x9A\xFE\x03\xA9\x85\xD2\xC9\xAA"
+	"\xBA\x2A\xBC\xF4\xA0\x08\xF5\x13\x98\x13\x5D\xF0\xD9\x33\x34\x2A"
+	"\x61\xC3\x89\x55\xF0\xAE\x1A\x9C\x22\xEE\x19\x05\x8D\x32\xFE\xEC"
+	"\x9C\x84\xBA\xB7\xF9\x6C\x3A\x4F\x07\xFC\x45\xEB\x12\xE5\x7B\xFD"
+	"\x55\xE6\x29\x69\xD1\xC2\xE8\xB9\x78\x59\xF6\x79\x10\xC6\x4E\xEB"
+	"\x6A\x5E\xB9\x9A\xC7\xC4\x5B\x63\xDA\xA3\x3F\x5E\x92\x7A\x81\x5E"
+	"\xD6\xB0\xE2\x62\x8F\x74\x26\xC2\x0C\xD3\x9A\x17\x47\xE6\x8E\xAB"
+	"\x02\x03\x01\x00\x01" /* public key - integer of 3 bytes */
+	"\x02\x82\x01\x00" /* private key - integer of 256 bytes */
+	"\x52\x41\xF4\xDA\x7B\xB7\x59\x55\xCA\xD4\x2F\x0F\x3A\xCB\xA4\x0D"
+	"\x93\x6C\xCC\x9D\xC1\xB2\xFB\xFD\xAE\x40\x31\xAC\x69\x52\x21\x92"
+	"\xB3\x27\xDF\xEA\xEE\x2C\x82\xBB\xF7\x40\x32\xD5\x14\xC4\x94\x12"
+	"\xEC\xB8\x1F\xCA\x59\xE3\xC1\x78\xF3\x85\xD8\x47\xA5\xD7\x02\x1A"
+	"\x65\x79\x97\x0D\x24\xF4\xF0\x67\x6E\x75\x2D\xBF\x10\x3D\xA8\x7D"
+	"\xEF\x7F\x60\xE4\xE6\x05\x82\x89\x5D\xDF\xC6\xD2\x6C\x07\x91\x33"
+	"\x98\x42\xF0\x02\x00\x25\x38\xC5\x85\x69\x8A\x7D\x2F\x95\x6C\x43"
+	"\x9A\xB8\x81\xE2\xD0\x07\x35\xAA\x05\x41\xC9\x1E\xAF\xE4\x04\x3B"
+	"\x19\xB8\x73\xA2\xAC\x4B\x1E\x66\x48\xD8\x72\x1F\xAC\xF6\xCB\xBC"
+	"\x90\x09\xCA\xEC\x0C\xDC\xF9\x2C\xD7\xEB\xAE\xA3\xA4\x47\xD7\x33"
+	"\x2F\x8A\xCA\xBC\x5E\xF0\x77\xE4\x97\x98\x97\xC7\x10\x91\x7D\x2A"
+	"\xA6\xFF\x46\x83\x97\xDE\xE9\xE2\x17\x03\x06\x14\xE2\xD7\xB1\x1D"
+	"\x77\xAF\x51\x27\x5B\x5E\x69\xB8\x81\xE6\x11\xC5\x43\x23\x81\x04"
+	"\x62\xFF\xE9\x46\xB8\xD8\x44\xDB\xA5\xCC\x31\x54\x34\xCE\x3E\x82"
+	"\xD6\xBF\x7A\x0B\x64\x21\x6D\x88\x7E\x5B\x45\x12\x1E\x63\x8D\x49"
+	"\xA7\x1D\xD9\x1E\x06\xCD\xE8\xBA\x2C\x8C\x69\x32\xEA\xBE\x60\x71",
+	.key_len = 529,
+	.m = "\x54\x85\x9b\x34\x2c\x49\xea\x2a",
+	.c =
+	"\xb2\x97\x76\xb4\xae\x3e\x38\x3c\x7e\x64\x1f\xcc\xa2\x7f\xf6\xbe"
+	"\xcf\x49\xbc\x48\xd3\x6c\x8f\x0a\x0e\xc1\x73\xbd\x7b\x55\x79\x36"
+	"\x0e\xa1\x87\x88\xb9\x2c\x90\xa6\x53\x5e\xe9\xef\xc4\xe2\x4d\xdd"
+	"\xf7\xa6\x69\x82\x3f\x56\xa4\x7b\xfb\x62\xe0\xae\xb8\xd3\x04\xb3"
+	"\xac\x5a\x15\x2a\xe3\x19\x9b\x03\x9a\x0b\x41\xda\x64\xec\x0a\x69"
+	"\xfc\xf2\x10\x92\xf3\xc1\xbf\x84\x7f\xfd\x2c\xae\xc8\xb5\xf6\x41"
+	"\x70\xc5\x47\x03\x8a\xf8\xff\x6f\x3f\xd2\x6f\x09\xb4\x22\xf3\x30"
+	"\xbe\xa9\x85\xcb\x9c\x8d\xf9\x8f\xeb\x32\x91\xa2\x25\x84\x8f\xf5"
+	"\xdc\xc7\x06\x9c\x2d\xe5\x11\x2c\x09\x09\x87\x09\xa9\xf6\x33\x73"
+	"\x90\xf1\x60\xf2\x65\xdd\x30\xa5\x66\xce\x62\x7b\xd0\xf8\x2d\x3d"
+	"\x19\x82\x77\xe3\x0a\x5f\x75\x2f\x8e\xb1\xe5\xe8\x91\x35\x1b\x3b"
+	"\x33\xb7\x66\x92\xd1\xf2\x8e\x6f\xe5\x75\x0c\xad\x36\xfb\x4e\xd0"
+	"\x66\x61\xbd\x49\xfe\xf4\x1a\xa2\x2b\x49\xfe\x03\x4c\x74\x47\x8d"
+	"\x9a\x66\xb2\x49\x46\x4d\x77\xea\x33\x4d\x6b\x3c\xb4\x49\x4a\xc6"
+	"\x7d\x3d\xb5\xb9\x56\x41\x15\x67\x0f\x94\x3c\x93\x65\x27\xe0\x21"
+	"\x5d\x59\xc3\x62\xd5\xa6\xda\x38\x26\x22\x5e\x34\x1c\x94\xaf\x98",
+	.m_size = 8,
+	.c_size = 256,
+	}, {
+	.key =
+	"\x30\x82\x01\x09" /* sequence of 265 bytes */
+	"\x02\x82\x01\x00" /* modulus - integer of 256 bytes */
+	"\xDB\x10\x1A\xC2\xA3\xF1\xDC\xFF\x13\x6B\xED\x44\xDF\xF0\x02\x6D"
+	"\x13\xC7\x88\xDA\x70\x6B\x54\xF1\xE8\x27\xDC\xC3\x0F\x99\x6A\xFA"
+	"\xC6\x67\xFF\x1D\x1E\x3C\x1D\xC1\xB5\x5F\x6C\xC0\xB2\x07\x3A\x6D"
+	"\x41\xE4\x25\x99\xAC\xFC\xD2\x0F\x02\xD3\xD1\x54\x06\x1A\x51\x77"
+	"\xBD\xB6\xBF\xEA\xA7\x5C\x06\xA9\x5D\x69\x84\x45\xD7\xF5\x05\xBA"
+	"\x47\xF0\x1B\xD7\x2B\x24\xEC\xCB\x9B\x1B\x10\x8D\x81\xA0\xBE\xB1"
+	"\x8C\x33\xE4\x36\xB8\x43\xEB\x19\x2A\x81\x8D\xDE\x81\x0A\x99\x48"
+	"\xB6\xF6\xBC\xCD\x49\x34\x3A\x8F\x26\x94\xE3\x28\x82\x1A\x7C\x8F"
+	"\x59\x9F\x45\xE8\x5D\x1A\x45\x76\x04\x56\x05\xA1\xD0\x1B\x8C\x77"
+	"\x6D\xAF\x53\xFA\x71\xE2\x67\xE0\x9A\xFE\x03\xA9\x85\xD2\xC9\xAA"
+	"\xBA\x2A\xBC\xF4\xA0\x08\xF5\x13\x98\x13\x5D\xF0\xD9\x33\x34\x2A"
+	"\x61\xC3\x89\x55\xF0\xAE\x1A\x9C\x22\xEE\x19\x05\x8D\x32\xFE\xEC"
+	"\x9C\x84\xBA\xB7\xF9\x6C\x3A\x4F\x07\xFC\x45\xEB\x12\xE5\x7B\xFD"
+	"\x55\xE6\x29\x69\xD1\xC2\xE8\xB9\x78\x59\xF6\x79\x10\xC6\x4E\xEB"
+	"\x6A\x5E\xB9\x9A\xC7\xC4\x5B\x63\xDA\xA3\x3F\x5E\x92\x7A\x81\x5E"
+	"\xD6\xB0\xE2\x62\x8F\x74\x26\xC2\x0C\xD3\x9A\x17\x47\xE6\x8E\xAB"
+	"\x02\x03\x01\x00\x01", /* public key - integer of 3 bytes */
+	.key_len = 269,
+	.m = "\x54\x85\x9b\x34\x2c\x49\xea\x2a",
+	.c =
+	"\xb2\x97\x76\xb4\xae\x3e\x38\x3c\x7e\x64\x1f\xcc\xa2\x7f\xf6\xbe"
+	"\xcf\x49\xbc\x48\xd3\x6c\x8f\x0a\x0e\xc1\x73\xbd\x7b\x55\x79\x36"
+	"\x0e\xa1\x87\x88\xb9\x2c\x90\xa6\x53\x5e\xe9\xef\xc4\xe2\x4d\xdd"
+	"\xf7\xa6\x69\x82\x3f\x56\xa4\x7b\xfb\x62\xe0\xae\xb8\xd3\x04\xb3"
+	"\xac\x5a\x15\x2a\xe3\x19\x9b\x03\x9a\x0b\x41\xda\x64\xec\x0a\x69"
+	"\xfc\xf2\x10\x92\xf3\xc1\xbf\x84\x7f\xfd\x2c\xae\xc8\xb5\xf6\x41"
+	"\x70\xc5\x47\x03\x8a\xf8\xff\x6f\x3f\xd2\x6f\x09\xb4\x22\xf3\x30"
+	"\xbe\xa9\x85\xcb\x9c\x8d\xf9\x8f\xeb\x32\x91\xa2\x25\x84\x8f\xf5"
+	"\xdc\xc7\x06\x9c\x2d\xe5\x11\x2c\x09\x09\x87\x09\xa9\xf6\x33\x73"
+	"\x90\xf1\x60\xf2\x65\xdd\x30\xa5\x66\xce\x62\x7b\xd0\xf8\x2d\x3d"
+	"\x19\x82\x77\xe3\x0a\x5f\x75\x2f\x8e\xb1\xe5\xe8\x91\x35\x1b\x3b"
+	"\x33\xb7\x66\x92\xd1\xf2\x8e\x6f\xe5\x75\x0c\xad\x36\xfb\x4e\xd0"
+	"\x66\x61\xbd\x49\xfe\xf4\x1a\xa2\x2b\x49\xfe\x03\x4c\x74\x47\x8d"
+	"\x9a\x66\xb2\x49\x46\x4d\x77\xea\x33\x4d\x6b\x3c\xb4\x49\x4a\xc6"
+	"\x7d\x3d\xb5\xb9\x56\x41\x15\x67\x0f\x94\x3c\x93\x65\x27\xe0\x21"
+	"\x5d\x59\xc3\x62\xd5\xa6\xda\x38\x26\x22\x5e\x34\x1c\x94\xaf\x98",
+	.m_size = 8,
+	.c_size = 256,
+	.public_key_vec = true,
+	}
+};
+
 /*
  * MD4 test vectors from RFC1320
  */
@@ -1822,7 +2027,7 @@ static struct hash_testvec tgr128_tv_template[] = {
 	},
 };
 
-#define GHASH_TEST_VECTORS 5
+#define GHASH_TEST_VECTORS 6
 
 static struct hash_testvec ghash_tv_template[] =
 {
@@ -1875,6 +2080,63 @@ static struct hash_testvec ghash_tv_template[] =
 		.psize	= 20,
 		.digest	= "\xf8\x94\x87\x2a\x4b\x63\x99\x28"
 			  "\x23\xf7\x93\xf7\x19\xf5\x96\xd9",
+	}, {
+		.key	= "\x0a\x1b\x2c\x3d\x4e\x5f\x64\x71"
+			"\x82\x93\xa4\xb5\xc6\xd7\xe8\xf9",
+		.ksize	= 16,
+		.plaintext = "\x56\x6f\x72\x20\x6c\x61\x75\x74"
+			"\x65\x72\x20\x4c\x61\x75\x73\x63"
+			"\x68\x65\x6e\x20\x75\x6e\x64\x20"
+			"\x53\x74\x61\x75\x6e\x65\x6e\x20"
+			"\x73\x65\x69\x20\x73\x74\x69\x6c"
+			"\x6c\x2c\x0a\x64\x75\x20\x6d\x65"
+			"\x69\x6e\x20\x74\x69\x65\x66\x74"
+			"\x69\x65\x66\x65\x73\x20\x4c\x65"
+			"\x62\x65\x6e\x3b\x0a\x64\x61\x73"
+			"\x73\x20\x64\x75\x20\x77\x65\x69"
+			"\xc3\x9f\x74\x20\x77\x61\x73\x20"
+			"\x64\x65\x72\x20\x57\x69\x6e\x64"
+			"\x20\x64\x69\x72\x20\x77\x69\x6c"
+			"\x6c\x2c\x0a\x65\x68\x20\x6e\x6f"
+			"\x63\x68\x20\x64\x69\x65\x20\x42"
+			"\x69\x72\x6b\x65\x6e\x20\x62\x65"
+			"\x62\x65\x6e\x2e\x0a\x0a\x55\x6e"
+			"\x64\x20\x77\x65\x6e\x6e\x20\x64"
+			"\x69\x72\x20\x65\x69\x6e\x6d\x61"
+			"\x6c\x20\x64\x61\x73\x20\x53\x63"
+			"\x68\x77\x65\x69\x67\x65\x6e\x20"
+			"\x73\x70\x72\x61\x63\x68\x2c\x0a"
+			"\x6c\x61\x73\x73\x20\x64\x65\x69"
+			"\x6e\x65\x20\x53\x69\x6e\x6e\x65"
+			"\x20\x62\x65\x73\x69\x65\x67\x65"
+			"\x6e\x2e\x0a\x4a\x65\x64\x65\x6d"
+			"\x20\x48\x61\x75\x63\x68\x65\x20"
+			"\x67\x69\x62\x74\x20\x64\x69\x63"
+			"\x68\x2c\x20\x67\x69\x62\x20\x6e"
+			"\x61\x63\x68\x2c\x0a\x65\x72\x20"
+			"\x77\x69\x72\x64\x20\x64\x69\x63"
+			"\x68\x20\x6c\x69\x65\x62\x65\x6e"
+			"\x20\x75\x6e\x64\x20\x77\x69\x65"
+			"\x67\x65\x6e\x2e\x0a\x0a\x55\x6e"
+			"\x64\x20\x64\x61\x6e\x6e\x20\x6d"
+			"\x65\x69\x6e\x65\x20\x53\x65\x65"
+			"\x6c\x65\x20\x73\x65\x69\x74\x20"
+			"\x77\x65\x69\x74\x2c\x20\x73\x65"
+			"\x69\x20\x77\x65\x69\x74\x2c\x0a"
+			"\x64\x61\x73\x73\x20\x64\x69\x72"
+			"\x20\x64\x61\x73\x20\x4c\x65\x62"
+			"\x65\x6e\x20\x67\x65\x6c\x69\x6e"
+			"\x67\x65\x2c\x0a\x62\x72\x65\x69"
+			"\x74\x65\x20\x64\x69\x63\x68\x20"
+			"\x77\x69\x65\x20\x65\x69\x6e\x20"
+			"\x46\x65\x69\x65\x72\x6b\x6c\x65"
+			"\x69\x64\x0a\xc3\xbc\x62\x65\x72"
+			"\x20\x64\x69\x65\x20\x73\x69\x6e"
+			"\x6e\x65\x6e\x64\x65\x6e\x20\x44"
+			"\x69\x6e\x67\x65\x2e\x2e\x2e\x0a",
+		.psize	= 400,
+		.digest = "\xad\xb1\xc1\xe9\x56\x70\x31\x1d"
+			"\xbb\x5b\xdf\x5e\x70\x72\x1a\x57",
 	},
 };
 
@@ -2968,6 +3230,254 @@ static struct hash_testvec hmac_sha512_tv_template[] = {
 	},
 };
 
+/*
+ * Poly1305 test vectors from RFC7539 A.3.
+ */
+
+#define POLY1305_TEST_VECTORS	11
+
+static struct hash_testvec poly1305_tv_template[] = {
+	{ /* Test Vector #1 */
+		.plaintext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.psize		= 96,
+		.digest		= "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00",
+	}, { /* Test Vector #2 */
+		.plaintext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x36\xe5\xf6\xb5\xc5\xe0\x60\x70"
+				  "\xf0\xef\xca\x96\x22\x7a\x86\x3e"
+				  "\x41\x6e\x79\x20\x73\x75\x62\x6d"
+				  "\x69\x73\x73\x69\x6f\x6e\x20\x74"
+				  "\x6f\x20\x74\x68\x65\x20\x49\x45"
+				  "\x54\x46\x20\x69\x6e\x74\x65\x6e"
+				  "\x64\x65\x64\x20\x62\x79\x20\x74"
+				  "\x68\x65\x20\x43\x6f\x6e\x74\x72"
+				  "\x69\x62\x75\x74\x6f\x72\x20\x66"
+				  "\x6f\x72\x20\x70\x75\x62\x6c\x69"
+				  "\x63\x61\x74\x69\x6f\x6e\x20\x61"
+				  "\x73\x20\x61\x6c\x6c\x20\x6f\x72"
+				  "\x20\x70\x61\x72\x74\x20\x6f\x66"
+				  "\x20\x61\x6e\x20\x49\x45\x54\x46"
+				  "\x20\x49\x6e\x74\x65\x72\x6e\x65"
+				  "\x74\x2d\x44\x72\x61\x66\x74\x20"
+				  "\x6f\x72\x20\x52\x46\x43\x20\x61"
+				  "\x6e\x64\x20\x61\x6e\x79\x20\x73"
+				  "\x74\x61\x74\x65\x6d\x65\x6e\x74"
+				  "\x20\x6d\x61\x64\x65\x20\x77\x69"
+				  "\x74\x68\x69\x6e\x20\x74\x68\x65"
+				  "\x20\x63\x6f\x6e\x74\x65\x78\x74"
+				  "\x20\x6f\x66\x20\x61\x6e\x20\x49"
+				  "\x45\x54\x46\x20\x61\x63\x74\x69"
+				  "\x76\x69\x74\x79\x20\x69\x73\x20"
+				  "\x63\x6f\x6e\x73\x69\x64\x65\x72"
+				  "\x65\x64\x20\x61\x6e\x20\x22\x49"
+				  "\x45\x54\x46\x20\x43\x6f\x6e\x74"
+				  "\x72\x69\x62\x75\x74\x69\x6f\x6e"
+				  "\x22\x2e\x20\x53\x75\x63\x68\x20"
+				  "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+				  "\x74\x73\x20\x69\x6e\x63\x6c\x75"
+				  "\x64\x65\x20\x6f\x72\x61\x6c\x20"
+				  "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+				  "\x74\x73\x20\x69\x6e\x20\x49\x45"
+				  "\x54\x46\x20\x73\x65\x73\x73\x69"
+				  "\x6f\x6e\x73\x2c\x20\x61\x73\x20"
+				  "\x77\x65\x6c\x6c\x20\x61\x73\x20"
+				  "\x77\x72\x69\x74\x74\x65\x6e\x20"
+				  "\x61\x6e\x64\x20\x65\x6c\x65\x63"
+				  "\x74\x72\x6f\x6e\x69\x63\x20\x63"
+				  "\x6f\x6d\x6d\x75\x6e\x69\x63\x61"
+				  "\x74\x69\x6f\x6e\x73\x20\x6d\x61"
+				  "\x64\x65\x20\x61\x74\x20\x61\x6e"
+				  "\x79\x20\x74\x69\x6d\x65\x20\x6f"
+				  "\x72\x20\x70\x6c\x61\x63\x65\x2c"
+				  "\x20\x77\x68\x69\x63\x68\x20\x61"
+				  "\x72\x65\x20\x61\x64\x64\x72\x65"
+				  "\x73\x73\x65\x64\x20\x74\x6f",
+		.psize		= 407,
+		.digest		= "\x36\xe5\xf6\xb5\xc5\xe0\x60\x70"
+				  "\xf0\xef\xca\x96\x22\x7a\x86\x3e",
+	}, { /* Test Vector #3 */
+		.plaintext	= "\x36\xe5\xf6\xb5\xc5\xe0\x60\x70"
+				  "\xf0\xef\xca\x96\x22\x7a\x86\x3e"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x41\x6e\x79\x20\x73\x75\x62\x6d"
+				  "\x69\x73\x73\x69\x6f\x6e\x20\x74"
+				  "\x6f\x20\x74\x68\x65\x20\x49\x45"
+				  "\x54\x46\x20\x69\x6e\x74\x65\x6e"
+				  "\x64\x65\x64\x20\x62\x79\x20\x74"
+				  "\x68\x65\x20\x43\x6f\x6e\x74\x72"
+				  "\x69\x62\x75\x74\x6f\x72\x20\x66"
+				  "\x6f\x72\x20\x70\x75\x62\x6c\x69"
+				  "\x63\x61\x74\x69\x6f\x6e\x20\x61"
+				  "\x73\x20\x61\x6c\x6c\x20\x6f\x72"
+				  "\x20\x70\x61\x72\x74\x20\x6f\x66"
+				  "\x20\x61\x6e\x20\x49\x45\x54\x46"
+				  "\x20\x49\x6e\x74\x65\x72\x6e\x65"
+				  "\x74\x2d\x44\x72\x61\x66\x74\x20"
+				  "\x6f\x72\x20\x52\x46\x43\x20\x61"
+				  "\x6e\x64\x20\x61\x6e\x79\x20\x73"
+				  "\x74\x61\x74\x65\x6d\x65\x6e\x74"
+				  "\x20\x6d\x61\x64\x65\x20\x77\x69"
+				  "\x74\x68\x69\x6e\x20\x74\x68\x65"
+				  "\x20\x63\x6f\x6e\x74\x65\x78\x74"
+				  "\x20\x6f\x66\x20\x61\x6e\x20\x49"
+				  "\x45\x54\x46\x20\x61\x63\x74\x69"
+				  "\x76\x69\x74\x79\x20\x69\x73\x20"
+				  "\x63\x6f\x6e\x73\x69\x64\x65\x72"
+				  "\x65\x64\x20\x61\x6e\x20\x22\x49"
+				  "\x45\x54\x46\x20\x43\x6f\x6e\x74"
+				  "\x72\x69\x62\x75\x74\x69\x6f\x6e"
+				  "\x22\x2e\x20\x53\x75\x63\x68\x20"
+				  "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+				  "\x74\x73\x20\x69\x6e\x63\x6c\x75"
+				  "\x64\x65\x20\x6f\x72\x61\x6c\x20"
+				  "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+				  "\x74\x73\x20\x69\x6e\x20\x49\x45"
+				  "\x54\x46\x20\x73\x65\x73\x73\x69"
+				  "\x6f\x6e\x73\x2c\x20\x61\x73\x20"
+				  "\x77\x65\x6c\x6c\x20\x61\x73\x20"
+				  "\x77\x72\x69\x74\x74\x65\x6e\x20"
+				  "\x61\x6e\x64\x20\x65\x6c\x65\x63"
+				  "\x74\x72\x6f\x6e\x69\x63\x20\x63"
+				  "\x6f\x6d\x6d\x75\x6e\x69\x63\x61"
+				  "\x74\x69\x6f\x6e\x73\x20\x6d\x61"
+				  "\x64\x65\x20\x61\x74\x20\x61\x6e"
+				  "\x79\x20\x74\x69\x6d\x65\x20\x6f"
+				  "\x72\x20\x70\x6c\x61\x63\x65\x2c"
+				  "\x20\x77\x68\x69\x63\x68\x20\x61"
+				  "\x72\x65\x20\x61\x64\x64\x72\x65"
+				  "\x73\x73\x65\x64\x20\x74\x6f",
+		.psize		= 407,
+		.digest		= "\xf3\x47\x7e\x7c\xd9\x54\x17\xaf"
+				  "\x89\xa6\xb8\x79\x4c\x31\x0c\xf0",
+	}, { /* Test Vector #4 */
+		.plaintext	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+				  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+				  "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+				  "\x9d\xca\x5c\xbc\x20\x70\x75\xc0"
+				  "\x27\x54\x77\x61\x73\x20\x62\x72"
+				  "\x69\x6c\x6c\x69\x67\x2c\x20\x61"
+				  "\x6e\x64\x20\x74\x68\x65\x20\x73"
+				  "\x6c\x69\x74\x68\x79\x20\x74\x6f"
+				  "\x76\x65\x73\x0a\x44\x69\x64\x20"
+				  "\x67\x79\x72\x65\x20\x61\x6e\x64"
+				  "\x20\x67\x69\x6d\x62\x6c\x65\x20"
+				  "\x69\x6e\x20\x74\x68\x65\x20\x77"
+				  "\x61\x62\x65\x3a\x0a\x41\x6c\x6c"
+				  "\x20\x6d\x69\x6d\x73\x79\x20\x77"
+				  "\x65\x72\x65\x20\x74\x68\x65\x20"
+				  "\x62\x6f\x72\x6f\x67\x6f\x76\x65"
+				  "\x73\x2c\x0a\x41\x6e\x64\x20\x74"
+				  "\x68\x65\x20\x6d\x6f\x6d\x65\x20"
+				  "\x72\x61\x74\x68\x73\x20\x6f\x75"
+				  "\x74\x67\x72\x61\x62\x65\x2e",
+		.psize		= 159,
+		.digest		= "\x45\x41\x66\x9a\x7e\xaa\xee\x61"
+				  "\xe7\x08\xdc\x7c\xbc\xc5\xeb\x62",
+	}, { /* Test Vector #5 */
+		.plaintext	= "\x02\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff",
+		.psize		= 48,
+		.digest		= "\x03\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00",
+	}, { /* Test Vector #6 */
+		.plaintext	= "\x02\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\x02\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.psize		= 48,
+		.digest		= "\x03\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00",
+	}, { /* Test Vector #7 */
+		.plaintext	= "\x01\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xf0\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\x11\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.psize		= 80,
+		.digest		= "\x05\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00",
+	}, { /* Test Vector #8 */
+		.plaintext	= "\x01\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xfb\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
+				  "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
+				  "\x01\x01\x01\x01\x01\x01\x01\x01"
+				  "\x01\x01\x01\x01\x01\x01\x01\x01",
+		.psize		= 80,
+		.digest		= "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00",
+	}, { /* Test Vector #9 */
+		.plaintext	= "\x02\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\xfd\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff",
+		.psize		= 48,
+		.digest		= "\xfa\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff",
+	}, { /* Test Vector #10 */
+		.plaintext	= "\x01\x00\x00\x00\x00\x00\x00\x00"
+				  "\x04\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\xe3\x35\x94\xd7\x50\x5e\x43\xb9"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x33\x94\xd7\x50\x5e\x43\x79\xcd"
+				  "\x01\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x01\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.psize		= 96,
+		.digest		= "\x14\x00\x00\x00\x00\x00\x00\x00"
+				  "\x55\x00\x00\x00\x00\x00\x00\x00",
+	}, { /* Test Vector #11 */
+		.plaintext	= "\x01\x00\x00\x00\x00\x00\x00\x00"
+				  "\x04\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\xe3\x35\x94\xd7\x50\x5e\x43\xb9"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x33\x94\xd7\x50\x5e\x43\x79\xcd"
+				  "\x01\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.psize		= 80,
+		.digest		= "\x13\x00\x00\x00\x00\x00\x00\x00"
+				  "\x00\x00\x00\x00\x00\x00\x00\x00",
+	},
+};
+
 /*
  * DES test vectors.
  */
@@ -3018,7 +3528,7 @@ static struct cipher_testvec des_enc_tv_template[] = {
 			  "\xb4\x99\x26\xf7\x1f\xe1\xd4\x90",
 		.rlen	= 24,
 	}, { /* Weak key */
-		.fail	= 1,
+		.fail	= true,
 		.wk	= 1,
 		.key	= "\x01\x01\x01\x01\x01\x01\x01\x01",
 		.klen	= 8,
@@ -13629,8 +14139,8 @@ static struct cipher_testvec cast6_xts_dec_tv_template[] = {
 #define AES_CTR_3686_DEC_TEST_VECTORS 6
 #define AES_GCM_ENC_TEST_VECTORS 9
 #define AES_GCM_DEC_TEST_VECTORS 8
-#define AES_GCM_4106_ENC_TEST_VECTORS 7
-#define AES_GCM_4106_DEC_TEST_VECTORS 7
+#define AES_GCM_4106_ENC_TEST_VECTORS 23
+#define AES_GCM_4106_DEC_TEST_VECTORS 23
 #define AES_GCM_4543_ENC_TEST_VECTORS 1
 #define AES_GCM_4543_DEC_TEST_VECTORS 2
 #define AES_CCM_ENC_TEST_VECTORS 8
@@ -19789,6 +20299,428 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
 			  "\x37\x08\x1C\xCF\xBA\x5D\x71\x46"
 			  "\x80\x72\xB0\x4C\x82\x0D\x60\x3C",
 		.rlen	= 208,
+	}, { /* From draft-mcgrew-gcm-test-01 */
+		.key	= "\x4C\x80\xCD\xEF\xBB\x5D\x10\xDA"
+			  "\x90\x6A\xC7\x3C\x36\x13\xA6\x34"
+			  "\x2E\x44\x3B\x68",
+		.klen	= 20,
+		.iv	= "\x49\x56\xED\x7E\x3B\x24\x4C\xFE",
+		.input	= "\x45\x00\x00\x48\x69\x9A\x00\x00"
+			  "\x80\x11\x4D\xB7\xC0\xA8\x01\x02"
+			  "\xC0\xA8\x01\x01\x0A\x9B\xF1\x56"
+			  "\x38\xD3\x01\x00\x00\x01\x00\x00"
+			  "\x00\x00\x00\x00\x04\x5F\x73\x69"
+			  "\x70\x04\x5F\x75\x64\x70\x03\x73"
+			  "\x69\x70\x09\x63\x79\x62\x65\x72"
+			  "\x63\x69\x74\x79\x02\x64\x6B\x00"
+			  "\x00\x21\x00\x01\x01\x02\x02\x01",
+		.ilen	= 72,
+		.assoc	= "\x00\x00\x43\x21\x87\x65\x43\x21"
+			  "\x00\x00\x00\x00",
+		.alen	= 12,
+		.result	= "\xFE\xCF\x53\x7E\x72\x9D\x5B\x07"
+			  "\xDC\x30\xDF\x52\x8D\xD2\x2B\x76"
+			  "\x8D\x1B\x98\x73\x66\x96\xA6\xFD"
+			  "\x34\x85\x09\xFA\x13\xCE\xAC\x34"
+			  "\xCF\xA2\x43\x6F\x14\xA3\xF3\xCF"
+			  "\x65\x92\x5B\xF1\xF4\xA1\x3C\x5D"
+			  "\x15\xB2\x1E\x18\x84\xF5\xFF\x62"
+			  "\x47\xAE\xAB\xB7\x86\xB9\x3B\xCE"
+			  "\x61\xBC\x17\xD7\x68\xFD\x97\x32"
+			  "\x45\x90\x18\x14\x8F\x6C\xBE\x72"
+			  "\x2F\xD0\x47\x96\x56\x2D\xFD\xB4",
+		.rlen	= 88,
+	}, {
+		.key	= "\xFE\xFF\xE9\x92\x86\x65\x73\x1C"
+			  "\x6D\x6A\x8F\x94\x67\x30\x83\x08"
+			  "\xCA\xFE\xBA\xBE",
+		.klen	= 20,
+		.iv	= "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+		.input	= "\x45\x00\x00\x3E\x69\x8F\x00\x00"
+			  "\x80\x11\x4D\xCC\xC0\xA8\x01\x02"
+			  "\xC0\xA8\x01\x01\x0A\x98\x00\x35"
+			  "\x00\x2A\x23\x43\xB2\xD0\x01\x00"
+			  "\x00\x01\x00\x00\x00\x00\x00\x00"
+			  "\x03\x73\x69\x70\x09\x63\x79\x62"
+			  "\x65\x72\x63\x69\x74\x79\x02\x64"
+			  "\x6B\x00\x00\x01\x00\x01\x00\x01",
+		.ilen	= 64,
+		.assoc	= "\x00\x00\xA5\xF8\x00\x00\x00\x0A",
+		.alen	= 8,
+		.result	= "\xDE\xB2\x2C\xD9\xB0\x7C\x72\xC1"
+			  "\x6E\x3A\x65\xBE\xEB\x8D\xF3\x04"
+			  "\xA5\xA5\x89\x7D\x33\xAE\x53\x0F"
+			  "\x1B\xA7\x6D\x5D\x11\x4D\x2A\x5C"
+			  "\x3D\xE8\x18\x27\xC1\x0E\x9A\x4F"
+			  "\x51\x33\x0D\x0E\xEC\x41\x66\x42"
+			  "\xCF\xBB\x85\xA5\xB4\x7E\x48\xA4"
+			  "\xEC\x3B\x9B\xA9\x5D\x91\x8B\xD1"
+			  "\x83\xB7\x0D\x3A\xA8\xBC\x6E\xE4"
+			  "\xC3\x09\xE9\xD8\x5A\x41\xAD\x4A",
+		.rlen	= 80,
+	}, {
+		.key	= "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\x11\x22\x33\x44",
+		.klen	= 36,
+		.iv	= "\x01\x02\x03\x04\x05\x06\x07\x08",
+		.input	= "\x45\x00\x00\x30\x69\xA6\x40\x00"
+			  "\x80\x06\x26\x90\xC0\xA8\x01\x02"
+			  "\x93\x89\x15\x5E\x0A\x9E\x00\x8B"
+			  "\x2D\xC5\x7E\xE0\x00\x00\x00\x00"
+			  "\x70\x02\x40\x00\x20\xBF\x00\x00"
+			  "\x02\x04\x05\xB4\x01\x01\x04\x02"
+			  "\x01\x02\x02\x01",
+		.ilen	= 52,
+		.assoc	= "\x4A\x2C\xBF\xE3\x00\x00\x00\x02",
+		.alen	= 8,
+		.result	= "\xFF\x42\x5C\x9B\x72\x45\x99\xDF"
+			  "\x7A\x3B\xCD\x51\x01\x94\xE0\x0D"
+			  "\x6A\x78\x10\x7F\x1B\x0B\x1C\xBF"
+			  "\x06\xEF\xAE\x9D\x65\xA5\xD7\x63"
+			  "\x74\x8A\x63\x79\x85\x77\x1D\x34"
+			  "\x7F\x05\x45\x65\x9F\x14\xE9\x9D"
+			  "\xEF\x84\x2D\x8E\xB3\x35\xF4\xEE"
+			  "\xCF\xDB\xF8\x31\x82\x4B\x4C\x49"
+			  "\x15\x95\x6C\x96",
+		.rlen	= 68,
+	}, {
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00",
+		.klen	= 20,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x45\x00\x00\x3C\x99\xC5\x00\x00"
+			  "\x80\x01\xCB\x7A\x40\x67\x93\x18"
+			  "\x01\x01\x01\x01\x08\x00\x07\x5C"
+			  "\x02\x00\x44\x00\x61\x62\x63\x64"
+			  "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+			  "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+			  "\x75\x76\x77\x61\x62\x63\x64\x65"
+			  "\x66\x67\x68\x69\x01\x02\x02\x01",
+		.ilen	= 64,
+		.assoc	= "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.alen	= 8,
+		.result	= "\x46\x88\xDA\xF2\xF9\x73\xA3\x92"
+			  "\x73\x29\x09\xC3\x31\xD5\x6D\x60"
+			  "\xF6\x94\xAB\xAA\x41\x4B\x5E\x7F"
+			  "\xF5\xFD\xCD\xFF\xF5\xE9\xA2\x84"
+			  "\x45\x64\x76\x49\x27\x19\xFF\xB6"
+			  "\x4D\xE7\xD9\xDC\xA1\xE1\xD8\x94"
+			  "\xBC\x3B\xD5\x78\x73\xED\x4D\x18"
+			  "\x1D\x19\xD4\xD5\xC8\xC1\x8A\xF3"
+			  "\xF8\x21\xD4\x96\xEE\xB0\x96\xE9"
+			  "\x8A\xD2\xB6\x9E\x47\x99\xC7\x1D",
+		.rlen	= 80,
+	}, {
+		.key	= "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+			  "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+			  "\x57\x69\x0E\x43",
+		.klen	= 20,
+		.iv	= "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+		.input	= "\x45\x00\x00\x3C\x99\xC3\x00\x00"
+			  "\x80\x01\xCB\x7C\x40\x67\x93\x18"
+			  "\x01\x01\x01\x01\x08\x00\x08\x5C"
+			  "\x02\x00\x43\x00\x61\x62\x63\x64"
+			  "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+			  "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+			  "\x75\x76\x77\x61\x62\x63\x64\x65"
+			  "\x66\x67\x68\x69\x01\x02\x02\x01",
+		.ilen	= 64,
+		.assoc	= "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
+			  "\x10\x10\x10\x10",
+		.alen	= 12,
+		.result	= "\xFB\xA2\xCA\xA4\x85\x3C\xF9\xF0"
+			  "\xF2\x2C\xB1\x0D\x86\xDD\x83\xB0"
+			  "\xFE\xC7\x56\x91\xCF\x1A\x04\xB0"
+			  "\x0D\x11\x38\xEC\x9C\x35\x79\x17"
+			  "\x65\xAC\xBD\x87\x01\xAD\x79\x84"
+			  "\x5B\xF9\xFE\x3F\xBA\x48\x7B\xC9"
+			  "\x17\x55\xE6\x66\x2B\x4C\x8D\x0D"
+			  "\x1F\x5E\x22\x73\x95\x30\x32\x0A"
+			  "\xE0\xD7\x31\xCC\x97\x8E\xCA\xFA"
+			  "\xEA\xE8\x8F\x00\xE8\x0D\x6E\x48",
+		.rlen	= 80,
+	}, {
+		.key	= "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+			  "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+			  "\x57\x69\x0E\x43",
+		.klen	= 20,
+		.iv	= "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+		.input	= "\x45\x00\x00\x1C\x42\xA2\x00\x00"
+			  "\x80\x01\x44\x1F\x40\x67\x93\xB6"
+			  "\xE0\x00\x00\x02\x0A\x00\xF5\xFF"
+			  "\x01\x02\x02\x01",
+		.ilen	= 28,
+		.assoc	= "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
+			  "\x10\x10\x10\x10",
+		.alen	= 12,
+		.result	= "\xFB\xA2\xCA\x84\x5E\x5D\xF9\xF0"
+			  "\xF2\x2C\x3E\x6E\x86\xDD\x83\x1E"
+			  "\x1F\xC6\x57\x92\xCD\x1A\xF9\x13"
+			  "\x0E\x13\x79\xED\x36\x9F\x07\x1F"
+			  "\x35\xE0\x34\xBE\x95\xF1\x12\xE4"
+			  "\xE7\xD0\x5D\x35",
+		.rlen	= 44,
+	}, {
+		.key	= "\xFE\xFF\xE9\x92\x86\x65\x73\x1C"
+			  "\x6D\x6A\x8F\x94\x67\x30\x83\x08"
+			  "\xFE\xFF\xE9\x92\x86\x65\x73\x1C"
+			  "\xCA\xFE\xBA\xBE",
+		.klen	= 28,
+		.iv	= "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+		.input	= "\x45\x00\x00\x28\xA4\xAD\x40\x00"
+			  "\x40\x06\x78\x80\x0A\x01\x03\x8F"
+			  "\x0A\x01\x06\x12\x80\x23\x06\xB8"
+			  "\xCB\x71\x26\x02\xDD\x6B\xB0\x3E"
+			  "\x50\x10\x16\xD0\x75\x68\x00\x01",
+		.ilen	= 40,
+		.assoc	= "\x00\x00\xA5\xF8\x00\x00\x00\x0A",
+		.alen	= 8,
+		.result	= "\xA5\xB1\xF8\x06\x60\x29\xAE\xA4"
+			  "\x0E\x59\x8B\x81\x22\xDE\x02\x42"
+			  "\x09\x38\xB3\xAB\x33\xF8\x28\xE6"
+			  "\x87\xB8\x85\x8B\x5B\xFB\xDB\xD0"
+			  "\x31\x5B\x27\x45\x21\x44\xCC\x77"
+			  "\x95\x45\x7B\x96\x52\x03\x7F\x53"
+			  "\x18\x02\x7B\x5B\x4C\xD7\xA6\x36",
+		.rlen	= 56,
+	}, {
+		.key	= "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\xDE\xCA\xF8\x88",
+		.klen	= 20,
+		.iv	= "\xCA\xFE\xDE\xBA\xCE\xFA\xCE\x74",
+		.input	= "\x45\x00\x00\x49\x33\xBA\x00\x00"
+			  "\x7F\x11\x91\x06\xC3\xFB\x1D\x10"
+			  "\xC2\xB1\xD3\x26\xC0\x28\x31\xCE"
+			  "\x00\x35\xDD\x7B\x80\x03\x02\xD5"
+			  "\x00\x00\x4E\x20\x00\x1E\x8C\x18"
+			  "\xD7\x5B\x81\xDC\x91\xBA\xA0\x47"
+			  "\x6B\x91\xB9\x24\xB2\x80\x38\x9D"
+			  "\x92\xC9\x63\xBA\xC0\x46\xEC\x95"
+			  "\x9B\x62\x66\xC0\x47\x22\xB1\x49"
+			  "\x23\x01\x01\x01",
+		.ilen	= 76,
+		.assoc	= "\x00\x00\x01\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x01",
+		.alen	= 12,
+		.result	= "\x18\xA6\xFD\x42\xF7\x2C\xBF\x4A"
+			  "\xB2\xA2\xEA\x90\x1F\x73\xD8\x14"
+			  "\xE3\xE7\xF2\x43\xD9\x54\x12\xE1"
+			  "\xC3\x49\xC1\xD2\xFB\xEC\x16\x8F"
+			  "\x91\x90\xFE\xEB\xAF\x2C\xB0\x19"
+			  "\x84\xE6\x58\x63\x96\x5D\x74\x72"
+			  "\xB7\x9D\xA3\x45\xE0\xE7\x80\x19"
+			  "\x1F\x0D\x2F\x0E\x0F\x49\x6C\x22"
+			  "\x6F\x21\x27\xB2\x7D\xB3\x57\x24"
+			  "\xE7\x84\x5D\x68\x65\x1F\x57\xE6"
+			  "\x5F\x35\x4F\x75\xFF\x17\x01\x57"
+			  "\x69\x62\x34\x36",
+		.rlen	= 92,
+	}, {
+		.key	= "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\x73\x61\x6C\x74",
+		.klen	= 36,
+		.iv	= "\x61\x6E\x64\x01\x69\x76\x65\x63",
+		.input	= "\x45\x08\x00\x28\x73\x2C\x00\x00"
+			  "\x40\x06\xE9\xF9\x0A\x01\x06\x12"
+			  "\x0A\x01\x03\x8F\x06\xB8\x80\x23"
+			  "\xDD\x6B\xAF\xBE\xCB\x71\x26\x02"
+			  "\x50\x10\x1F\x64\x6D\x54\x00\x01",
+		.ilen	= 40,
+		.assoc	= "\x17\x40\x5E\x67\x15\x6F\x31\x26"
+			  "\xDD\x0D\xB9\x9B",
+		.alen	= 12,
+		.result	= "\xF2\xD6\x9E\xCD\xBD\x5A\x0D\x5B"
+			  "\x8D\x5E\xF3\x8B\xAD\x4D\xA5\x8D"
+			  "\x1F\x27\x8F\xDE\x98\xEF\x67\x54"
+			  "\x9D\x52\x4A\x30\x18\xD9\xA5\x7F"
+			  "\xF4\xD3\xA3\x1C\xE6\x73\x11\x9E"
+			  "\x45\x16\x26\xC2\x41\x57\x71\xE3"
+			  "\xB7\xEE\xBC\xA6\x14\xC8\x9B\x35",
+		.rlen	= 56,
+	}, {
+		.key	= "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+			  "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+			  "\x57\x69\x0E\x43",
+		.klen	= 20,
+		.iv	= "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+		.input	= "\x45\x00\x00\x49\x33\x3E\x00\x00"
+			  "\x7F\x11\x91\x82\xC3\xFB\x1D\x10"
+			  "\xC2\xB1\xD3\x26\xC0\x28\x31\xCE"
+			  "\x00\x35\xCB\x45\x80\x03\x02\x5B"
+			  "\x00\x00\x01\xE0\x00\x1E\x8C\x18"
+			  "\xD6\x57\x59\xD5\x22\x84\xA0\x35"
+			  "\x2C\x71\x47\x5C\x88\x80\x39\x1C"
+			  "\x76\x4D\x6E\x5E\xE0\x49\x6B\x32"
+			  "\x5A\xE2\x70\xC0\x38\x99\x49\x39"
+			  "\x15\x01\x01\x01",
+		.ilen	= 76,
+		.assoc	= "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
+			  "\x10\x10\x10\x10",
+		.alen	= 12,
+		.result	= "\xFB\xA2\xCA\xD1\x2F\xC1\xF9\xF0"
+			  "\x0D\x3C\xEB\xF3\x05\x41\x0D\xB8"
+			  "\x3D\x77\x84\xB6\x07\x32\x3D\x22"
+			  "\x0F\x24\xB0\xA9\x7D\x54\x18\x28"
+			  "\x00\xCA\xDB\x0F\x68\xD9\x9E\xF0"
+			  "\xE0\xC0\xC8\x9A\xE9\xBE\xA8\x88"
+			  "\x4E\x52\xD6\x5B\xC1\xAF\xD0\x74"
+			  "\x0F\x74\x24\x44\x74\x7B\x5B\x39"
+			  "\xAB\x53\x31\x63\xAA\xD4\x55\x0E"
+			  "\xE5\x16\x09\x75\xCD\xB6\x08\xC5"
+			  "\x76\x91\x89\x60\x97\x63\xB8\xE1"
+			  "\x8C\xAA\x81\xE2",
+		.rlen	= 92,
+	}, {
+		.key	= "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\x73\x61\x6C\x74",
+		.klen	= 36,
+		.iv	= "\x61\x6E\x64\x01\x69\x76\x65\x63",
+		.input	= "\x63\x69\x73\x63\x6F\x01\x72\x75"
+			  "\x6C\x65\x73\x01\x74\x68\x65\x01"
+			  "\x6E\x65\x74\x77\x65\x01\x64\x65"
+			  "\x66\x69\x6E\x65\x01\x74\x68\x65"
+			  "\x74\x65\x63\x68\x6E\x6F\x6C\x6F"
+			  "\x67\x69\x65\x73\x01\x74\x68\x61"
+			  "\x74\x77\x69\x6C\x6C\x01\x64\x65"
+			  "\x66\x69\x6E\x65\x74\x6F\x6D\x6F"
+			  "\x72\x72\x6F\x77\x01\x02\x02\x01",
+		.ilen	= 72,
+		.assoc	= "\x17\x40\x5E\x67\x15\x6F\x31\x26"
+			  "\xDD\x0D\xB9\x9B",
+		.alen	= 12,
+		.result	= "\xD4\xB7\xED\x86\xA1\x77\x7F\x2E"
+			  "\xA1\x3D\x69\x73\xD3\x24\xC6\x9E"
+			  "\x7B\x43\xF8\x26\xFB\x56\x83\x12"
+			  "\x26\x50\x8B\xEB\xD2\xDC\xEB\x18"
+			  "\xD0\xA6\xDF\x10\xE5\x48\x7D\xF0"
+			  "\x74\x11\x3E\x14\xC6\x41\x02\x4E"
+			  "\x3E\x67\x73\xD9\x1A\x62\xEE\x42"
+			  "\x9B\x04\x3A\x10\xE3\xEF\xE6\xB0"
+			  "\x12\xA4\x93\x63\x41\x23\x64\xF8"
+			  "\xC0\xCA\xC5\x87\xF2\x49\xE5\x6B"
+			  "\x11\xE2\x4F\x30\xE4\x4C\xCC\x76",
+		.rlen	= 88,
+	}, {
+		.key	= "\x7D\x77\x3D\x00\xC1\x44\xC5\x25"
+			  "\xAC\x61\x9D\x18\xC8\x4A\x3F\x47"
+			  "\xD9\x66\x42\x67",
+		.klen	= 20,
+		.iv	= "\x43\x45\x7E\x91\x82\x44\x3B\xC6",
+		.input	= "\x01\x02\x02\x01",
+		.ilen	= 4,
+		.assoc	= "\x33\x54\x67\xAE\xFF\xFF\xFF\xFF",
+		.alen	= 8,
+		.result	= "\x43\x7F\x86\x6B\xCB\x3F\x69\x9F"
+			  "\xE9\xB0\x82\x2B\xAC\x96\x1C\x45"
+			  "\x04\xBE\xF2\x70",
+		.rlen	= 20,
+	}, {
+		.key	= "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\xDE\xCA\xF8\x88",
+		.klen	= 20,
+		.iv	= "\xCA\xFE\xDE\xBA\xCE\xFA\xCE\x74",
+		.input	= "\x74\x6F\x01\x62\x65\x01\x6F\x72"
+			  "\x01\x6E\x6F\x74\x01\x74\x6F\x01"
+			  "\x62\x65\x00\x01",
+		.ilen	= 20,
+		.assoc	= "\x00\x00\x01\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x01",
+		.alen	= 12,
+		.result	= "\x29\xC9\xFC\x69\xA1\x97\xD0\x38"
+			  "\xCC\xDD\x14\xE2\xDD\xFC\xAA\x05"
+			  "\x43\x33\x21\x64\x41\x25\x03\x52"
+			  "\x43\x03\xED\x3C\x6C\x5F\x28\x38"
+			  "\x43\xAF\x8C\x3E",
+		.rlen	= 36,
+	}, {
+		.key	= "\x6C\x65\x67\x61\x6C\x69\x7A\x65"
+			  "\x6D\x61\x72\x69\x6A\x75\x61\x6E"
+			  "\x61\x61\x6E\x64\x64\x6F\x69\x74"
+			  "\x62\x65\x66\x6F\x72\x65\x69\x61"
+			  "\x74\x75\x72\x6E",
+		.klen	= 36,
+		.iv	= "\x33\x30\x21\x69\x67\x65\x74\x6D",
+		.input	= "\x45\x00\x00\x30\xDA\x3A\x00\x00"
+			  "\x80\x01\xDF\x3B\xC0\xA8\x00\x05"
+			  "\xC0\xA8\x00\x01\x08\x00\xC6\xCD"
+			  "\x02\x00\x07\x00\x61\x62\x63\x64"
+			  "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+			  "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+			  "\x01\x02\x02\x01",
+		.ilen	= 52,
+		.assoc	= "\x79\x6B\x69\x63\xFF\xFF\xFF\xFF"
+			  "\xFF\xFF\xFF\xFF",
+		.alen	= 12,
+		.result	= "\xF9\x7A\xB2\xAA\x35\x6D\x8E\xDC"
+			  "\xE1\x76\x44\xAC\x8C\x78\xE2\x5D"
+			  "\xD2\x4D\xED\xBB\x29\xEB\xF1\xB6"
+			  "\x4A\x27\x4B\x39\xB4\x9C\x3A\x86"
+			  "\x4C\xD3\xD7\x8C\xA4\xAE\x68\xA3"
+			  "\x2B\x42\x45\x8F\xB5\x7D\xBE\x82"
+			  "\x1D\xCC\x63\xB9\xD0\x93\x7B\xA2"
+			  "\x94\x5F\x66\x93\x68\x66\x1A\x32"
+			  "\x9F\xB4\xC0\x53",
+		.rlen	= 68,
+	}, {
+		.key	= "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+			  "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+			  "\x57\x69\x0E\x43",
+		.klen	= 20,
+		.iv	= "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+		.input	= "\x45\x00\x00\x30\xDA\x3A\x00\x00"
+			  "\x80\x01\xDF\x3B\xC0\xA8\x00\x05"
+			  "\xC0\xA8\x00\x01\x08\x00\xC6\xCD"
+			  "\x02\x00\x07\x00\x61\x62\x63\x64"
+			  "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+			  "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+			  "\x01\x02\x02\x01",
+		.ilen	= 52,
+		.assoc	= "\x3F\x7E\xF6\x42\x10\x10\x10\x10"
+			  "\x10\x10\x10\x10",
+		.alen	= 12,
+		.result	= "\xFB\xA2\xCA\xA8\xC6\xC5\xF9\xF0"
+			  "\xF2\x2C\xA5\x4A\x06\x12\x10\xAD"
+			  "\x3F\x6E\x57\x91\xCF\x1A\xCA\x21"
+			  "\x0D\x11\x7C\xEC\x9C\x35\x79\x17"
+			  "\x65\xAC\xBD\x87\x01\xAD\x79\x84"
+			  "\x5B\xF9\xFE\x3F\xBA\x48\x7B\xC9"
+			  "\x63\x21\x93\x06\x84\xEE\xCA\xDB"
+			  "\x56\x91\x25\x46\xE7\xA9\x5C\x97"
+			  "\x40\xD7\xCB\x05",
+		.rlen	= 68,
+	}, {
+		.key	= "\x4C\x80\xCD\xEF\xBB\x5D\x10\xDA"
+			  "\x90\x6A\xC7\x3C\x36\x13\xA6\x34"
+			  "\x22\x43\x3C\x64",
+		.klen	= 20,
+		.iv	= "\x48\x55\xEC\x7D\x3A\x23\x4B\xFD",
+		.input	= "\x08\x00\xC6\xCD\x02\x00\x07\x00"
+			  "\x61\x62\x63\x64\x65\x66\x67\x68"
+			  "\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70"
+			  "\x71\x72\x73\x74\x01\x02\x02\x01",
+		.ilen	= 32,
+		.assoc	= "\x00\x00\x43\x21\x87\x65\x43\x21"
+			  "\x00\x00\x00\x07",
+		.alen	= 12,
+		.result	= "\x74\x75\x2E\x8A\xEB\x5D\x87\x3C"
+			  "\xD7\xC0\xF4\xAC\xC3\x6C\x4B\xFF"
+			  "\x84\xB7\xD7\xB9\x8F\x0C\xA8\xB6"
+			  "\xAC\xDA\x68\x94\xBC\x61\x90\x69"
+			  "\xEF\x9C\xBC\x28\xFE\x1B\x56\xA7"
+			  "\xC4\xE0\xD5\x8C\x86\xCD\x2B\xC0",
+		.rlen	= 48,
 	}
 };
 
@@ -19964,7 +20896,428 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                           "\xff\xff\xff\xff\xff\xff\xff\xff"
                           "\xff\xff\xff\xff\xff\xff\xff\xff",
                 .rlen   = 192,
-
+	}, {
+		.key	= "\x4C\x80\xCD\xEF\xBB\x5D\x10\xDA"
+			  "\x90\x6A\xC7\x3C\x36\x13\xA6\x34"
+			  "\x2E\x44\x3B\x68",
+		.klen	= 20,
+		.iv	= "\x49\x56\xED\x7E\x3B\x24\x4C\xFE",
+		.result	= "\x45\x00\x00\x48\x69\x9A\x00\x00"
+			  "\x80\x11\x4D\xB7\xC0\xA8\x01\x02"
+			  "\xC0\xA8\x01\x01\x0A\x9B\xF1\x56"
+			  "\x38\xD3\x01\x00\x00\x01\x00\x00"
+			  "\x00\x00\x00\x00\x04\x5F\x73\x69"
+			  "\x70\x04\x5F\x75\x64\x70\x03\x73"
+			  "\x69\x70\x09\x63\x79\x62\x65\x72"
+			  "\x63\x69\x74\x79\x02\x64\x6B\x00"
+			  "\x00\x21\x00\x01\x01\x02\x02\x01",
+		.rlen	= 72,
+		.assoc	= "\x00\x00\x43\x21\x87\x65\x43\x21"
+			  "\x00\x00\x00\x00",
+		.alen	= 12,
+		.input	= "\xFE\xCF\x53\x7E\x72\x9D\x5B\x07"
+			  "\xDC\x30\xDF\x52\x8D\xD2\x2B\x76"
+			  "\x8D\x1B\x98\x73\x66\x96\xA6\xFD"
+			  "\x34\x85\x09\xFA\x13\xCE\xAC\x34"
+			  "\xCF\xA2\x43\x6F\x14\xA3\xF3\xCF"
+			  "\x65\x92\x5B\xF1\xF4\xA1\x3C\x5D"
+			  "\x15\xB2\x1E\x18\x84\xF5\xFF\x62"
+			  "\x47\xAE\xAB\xB7\x86\xB9\x3B\xCE"
+			  "\x61\xBC\x17\xD7\x68\xFD\x97\x32"
+			  "\x45\x90\x18\x14\x8F\x6C\xBE\x72"
+			  "\x2F\xD0\x47\x96\x56\x2D\xFD\xB4",
+		.ilen	= 88,
+	}, {
+		.key	= "\xFE\xFF\xE9\x92\x86\x65\x73\x1C"
+			  "\x6D\x6A\x8F\x94\x67\x30\x83\x08"
+			  "\xCA\xFE\xBA\xBE",
+		.klen	= 20,
+		.iv	= "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+		.result	= "\x45\x00\x00\x3E\x69\x8F\x00\x00"
+			  "\x80\x11\x4D\xCC\xC0\xA8\x01\x02"
+			  "\xC0\xA8\x01\x01\x0A\x98\x00\x35"
+			  "\x00\x2A\x23\x43\xB2\xD0\x01\x00"
+			  "\x00\x01\x00\x00\x00\x00\x00\x00"
+			  "\x03\x73\x69\x70\x09\x63\x79\x62"
+			  "\x65\x72\x63\x69\x74\x79\x02\x64"
+			  "\x6B\x00\x00\x01\x00\x01\x00\x01",
+		.rlen	= 64,
+		.assoc	= "\x00\x00\xA5\xF8\x00\x00\x00\x0A",
+		.alen	= 8,
+		.input	= "\xDE\xB2\x2C\xD9\xB0\x7C\x72\xC1"
+			  "\x6E\x3A\x65\xBE\xEB\x8D\xF3\x04"
+			  "\xA5\xA5\x89\x7D\x33\xAE\x53\x0F"
+			  "\x1B\xA7\x6D\x5D\x11\x4D\x2A\x5C"
+			  "\x3D\xE8\x18\x27\xC1\x0E\x9A\x4F"
+			  "\x51\x33\x0D\x0E\xEC\x41\x66\x42"
+			  "\xCF\xBB\x85\xA5\xB4\x7E\x48\xA4"
+			  "\xEC\x3B\x9B\xA9\x5D\x91\x8B\xD1"
+			  "\x83\xB7\x0D\x3A\xA8\xBC\x6E\xE4"
+			  "\xC3\x09\xE9\xD8\x5A\x41\xAD\x4A",
+		.ilen	= 80,
+	}, {
+		.key	= "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\x11\x22\x33\x44",
+		.klen	= 36,
+		.iv	= "\x01\x02\x03\x04\x05\x06\x07\x08",
+		.result	= "\x45\x00\x00\x30\x69\xA6\x40\x00"
+			  "\x80\x06\x26\x90\xC0\xA8\x01\x02"
+			  "\x93\x89\x15\x5E\x0A\x9E\x00\x8B"
+			  "\x2D\xC5\x7E\xE0\x00\x00\x00\x00"
+			  "\x70\x02\x40\x00\x20\xBF\x00\x00"
+			  "\x02\x04\x05\xB4\x01\x01\x04\x02"
+			  "\x01\x02\x02\x01",
+		.rlen	= 52,
+		.assoc	= "\x4A\x2C\xBF\xE3\x00\x00\x00\x02",
+		.alen	= 8,
+		.input	= "\xFF\x42\x5C\x9B\x72\x45\x99\xDF"
+			  "\x7A\x3B\xCD\x51\x01\x94\xE0\x0D"
+			  "\x6A\x78\x10\x7F\x1B\x0B\x1C\xBF"
+			  "\x06\xEF\xAE\x9D\x65\xA5\xD7\x63"
+			  "\x74\x8A\x63\x79\x85\x77\x1D\x34"
+			  "\x7F\x05\x45\x65\x9F\x14\xE9\x9D"
+			  "\xEF\x84\x2D\x8E\xB3\x35\xF4\xEE"
+			  "\xCF\xDB\xF8\x31\x82\x4B\x4C\x49"
+			  "\x15\x95\x6C\x96",
+		.ilen	= 68,
+	}, {
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00",
+		.klen	= 20,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.result	= "\x45\x00\x00\x3C\x99\xC5\x00\x00"
+			  "\x80\x01\xCB\x7A\x40\x67\x93\x18"
+			  "\x01\x01\x01\x01\x08\x00\x07\x5C"
+			  "\x02\x00\x44\x00\x61\x62\x63\x64"
+			  "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+			  "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+			  "\x75\x76\x77\x61\x62\x63\x64\x65"
+			  "\x66\x67\x68\x69\x01\x02\x02\x01",
+		.rlen	= 64,
+		.assoc	= "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.alen	= 8,
+		.input	= "\x46\x88\xDA\xF2\xF9\x73\xA3\x92"
+			  "\x73\x29\x09\xC3\x31\xD5\x6D\x60"
+			  "\xF6\x94\xAB\xAA\x41\x4B\x5E\x7F"
+			  "\xF5\xFD\xCD\xFF\xF5\xE9\xA2\x84"
+			  "\x45\x64\x76\x49\x27\x19\xFF\xB6"
+			  "\x4D\xE7\xD9\xDC\xA1\xE1\xD8\x94"
+			  "\xBC\x3B\xD5\x78\x73\xED\x4D\x18"
+			  "\x1D\x19\xD4\xD5\xC8\xC1\x8A\xF3"
+			  "\xF8\x21\xD4\x96\xEE\xB0\x96\xE9"
+			  "\x8A\xD2\xB6\x9E\x47\x99\xC7\x1D",
+		.ilen	= 80,
+	}, {
+		.key	= "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+			  "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+			  "\x57\x69\x0E\x43",
+		.klen	= 20,
+		.iv	= "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+		.result	= "\x45\x00\x00\x3C\x99\xC3\x00\x00"
+			  "\x80\x01\xCB\x7C\x40\x67\x93\x18"
+			  "\x01\x01\x01\x01\x08\x00\x08\x5C"
+			  "\x02\x00\x43\x00\x61\x62\x63\x64"
+			  "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+			  "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+			  "\x75\x76\x77\x61\x62\x63\x64\x65"
+			  "\x66\x67\x68\x69\x01\x02\x02\x01",
+		.rlen	= 64,
+		.assoc	= "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
+			  "\x10\x10\x10\x10",
+		.alen	= 12,
+		.input	= "\xFB\xA2\xCA\xA4\x85\x3C\xF9\xF0"
+			  "\xF2\x2C\xB1\x0D\x86\xDD\x83\xB0"
+			  "\xFE\xC7\x56\x91\xCF\x1A\x04\xB0"
+			  "\x0D\x11\x38\xEC\x9C\x35\x79\x17"
+			  "\x65\xAC\xBD\x87\x01\xAD\x79\x84"
+			  "\x5B\xF9\xFE\x3F\xBA\x48\x7B\xC9"
+			  "\x17\x55\xE6\x66\x2B\x4C\x8D\x0D"
+			  "\x1F\x5E\x22\x73\x95\x30\x32\x0A"
+			  "\xE0\xD7\x31\xCC\x97\x8E\xCA\xFA"
+			  "\xEA\xE8\x8F\x00\xE8\x0D\x6E\x48",
+		.ilen	= 80,
+	}, {
+		.key	= "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+			  "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+			  "\x57\x69\x0E\x43",
+		.klen	= 20,
+		.iv	= "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+		.result	= "\x45\x00\x00\x1C\x42\xA2\x00\x00"
+			  "\x80\x01\x44\x1F\x40\x67\x93\xB6"
+			  "\xE0\x00\x00\x02\x0A\x00\xF5\xFF"
+			  "\x01\x02\x02\x01",
+		.rlen	= 28,
+		.assoc	= "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
+			  "\x10\x10\x10\x10",
+		.alen	= 12,
+		.input	= "\xFB\xA2\xCA\x84\x5E\x5D\xF9\xF0"
+			  "\xF2\x2C\x3E\x6E\x86\xDD\x83\x1E"
+			  "\x1F\xC6\x57\x92\xCD\x1A\xF9\x13"
+			  "\x0E\x13\x79\xED\x36\x9F\x07\x1F"
+			  "\x35\xE0\x34\xBE\x95\xF1\x12\xE4"
+			  "\xE7\xD0\x5D\x35",
+		.ilen	= 44,
+	}, {
+		.key	= "\xFE\xFF\xE9\x92\x86\x65\x73\x1C"
+			  "\x6D\x6A\x8F\x94\x67\x30\x83\x08"
+			  "\xFE\xFF\xE9\x92\x86\x65\x73\x1C"
+			  "\xCA\xFE\xBA\xBE",
+		.klen	= 28,
+		.iv	= "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+		.result	= "\x45\x00\x00\x28\xA4\xAD\x40\x00"
+			  "\x40\x06\x78\x80\x0A\x01\x03\x8F"
+			  "\x0A\x01\x06\x12\x80\x23\x06\xB8"
+			  "\xCB\x71\x26\x02\xDD\x6B\xB0\x3E"
+			  "\x50\x10\x16\xD0\x75\x68\x00\x01",
+		.rlen	= 40,
+		.assoc	= "\x00\x00\xA5\xF8\x00\x00\x00\x0A",
+		.alen	= 8,
+		.input	= "\xA5\xB1\xF8\x06\x60\x29\xAE\xA4"
+			  "\x0E\x59\x8B\x81\x22\xDE\x02\x42"
+			  "\x09\x38\xB3\xAB\x33\xF8\x28\xE6"
+			  "\x87\xB8\x85\x8B\x5B\xFB\xDB\xD0"
+			  "\x31\x5B\x27\x45\x21\x44\xCC\x77"
+			  "\x95\x45\x7B\x96\x52\x03\x7F\x53"
+			  "\x18\x02\x7B\x5B\x4C\xD7\xA6\x36",
+		.ilen	= 56,
+	}, {
+		.key	= "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\xDE\xCA\xF8\x88",
+		.klen	= 20,
+		.iv	= "\xCA\xFE\xDE\xBA\xCE\xFA\xCE\x74",
+		.result	= "\x45\x00\x00\x49\x33\xBA\x00\x00"
+			  "\x7F\x11\x91\x06\xC3\xFB\x1D\x10"
+			  "\xC2\xB1\xD3\x26\xC0\x28\x31\xCE"
+			  "\x00\x35\xDD\x7B\x80\x03\x02\xD5"
+			  "\x00\x00\x4E\x20\x00\x1E\x8C\x18"
+			  "\xD7\x5B\x81\xDC\x91\xBA\xA0\x47"
+			  "\x6B\x91\xB9\x24\xB2\x80\x38\x9D"
+			  "\x92\xC9\x63\xBA\xC0\x46\xEC\x95"
+			  "\x9B\x62\x66\xC0\x47\x22\xB1\x49"
+			  "\x23\x01\x01\x01",
+		.rlen	= 76,
+		.assoc	= "\x00\x00\x01\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x01",
+		.alen	= 12,
+		.input	= "\x18\xA6\xFD\x42\xF7\x2C\xBF\x4A"
+			  "\xB2\xA2\xEA\x90\x1F\x73\xD8\x14"
+			  "\xE3\xE7\xF2\x43\xD9\x54\x12\xE1"
+			  "\xC3\x49\xC1\xD2\xFB\xEC\x16\x8F"
+			  "\x91\x90\xFE\xEB\xAF\x2C\xB0\x19"
+			  "\x84\xE6\x58\x63\x96\x5D\x74\x72"
+			  "\xB7\x9D\xA3\x45\xE0\xE7\x80\x19"
+			  "\x1F\x0D\x2F\x0E\x0F\x49\x6C\x22"
+			  "\x6F\x21\x27\xB2\x7D\xB3\x57\x24"
+			  "\xE7\x84\x5D\x68\x65\x1F\x57\xE6"
+			  "\x5F\x35\x4F\x75\xFF\x17\x01\x57"
+			  "\x69\x62\x34\x36",
+		.ilen	= 92,
+	}, {
+		.key	= "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\x73\x61\x6C\x74",
+		.klen	= 36,
+		.iv	= "\x61\x6E\x64\x01\x69\x76\x65\x63",
+		.result	= "\x45\x08\x00\x28\x73\x2C\x00\x00"
+			  "\x40\x06\xE9\xF9\x0A\x01\x06\x12"
+			  "\x0A\x01\x03\x8F\x06\xB8\x80\x23"
+			  "\xDD\x6B\xAF\xBE\xCB\x71\x26\x02"
+			  "\x50\x10\x1F\x64\x6D\x54\x00\x01",
+		.rlen	= 40,
+		.assoc	= "\x17\x40\x5E\x67\x15\x6F\x31\x26"
+			  "\xDD\x0D\xB9\x9B",
+		.alen	= 12,
+		.input	= "\xF2\xD6\x9E\xCD\xBD\x5A\x0D\x5B"
+			  "\x8D\x5E\xF3\x8B\xAD\x4D\xA5\x8D"
+			  "\x1F\x27\x8F\xDE\x98\xEF\x67\x54"
+			  "\x9D\x52\x4A\x30\x18\xD9\xA5\x7F"
+			  "\xF4\xD3\xA3\x1C\xE6\x73\x11\x9E"
+			  "\x45\x16\x26\xC2\x41\x57\x71\xE3"
+			  "\xB7\xEE\xBC\xA6\x14\xC8\x9B\x35",
+		.ilen	= 56,
+	}, {
+		.key	= "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+			  "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+			  "\x57\x69\x0E\x43",
+		.klen	= 20,
+		.iv	= "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+		.result	= "\x45\x00\x00\x49\x33\x3E\x00\x00"
+			  "\x7F\x11\x91\x82\xC3\xFB\x1D\x10"
+			  "\xC2\xB1\xD3\x26\xC0\x28\x31\xCE"
+			  "\x00\x35\xCB\x45\x80\x03\x02\x5B"
+			  "\x00\x00\x01\xE0\x00\x1E\x8C\x18"
+			  "\xD6\x57\x59\xD5\x22\x84\xA0\x35"
+			  "\x2C\x71\x47\x5C\x88\x80\x39\x1C"
+			  "\x76\x4D\x6E\x5E\xE0\x49\x6B\x32"
+			  "\x5A\xE2\x70\xC0\x38\x99\x49\x39"
+			  "\x15\x01\x01\x01",
+		.rlen	= 76,
+		.assoc	= "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
+			  "\x10\x10\x10\x10",
+		.alen	= 12,
+		.input	= "\xFB\xA2\xCA\xD1\x2F\xC1\xF9\xF0"
+			  "\x0D\x3C\xEB\xF3\x05\x41\x0D\xB8"
+			  "\x3D\x77\x84\xB6\x07\x32\x3D\x22"
+			  "\x0F\x24\xB0\xA9\x7D\x54\x18\x28"
+			  "\x00\xCA\xDB\x0F\x68\xD9\x9E\xF0"
+			  "\xE0\xC0\xC8\x9A\xE9\xBE\xA8\x88"
+			  "\x4E\x52\xD6\x5B\xC1\xAF\xD0\x74"
+			  "\x0F\x74\x24\x44\x74\x7B\x5B\x39"
+			  "\xAB\x53\x31\x63\xAA\xD4\x55\x0E"
+			  "\xE5\x16\x09\x75\xCD\xB6\x08\xC5"
+			  "\x76\x91\x89\x60\x97\x63\xB8\xE1"
+			  "\x8C\xAA\x81\xE2",
+		.ilen	= 92,
+	}, {
+		.key	= "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\x73\x61\x6C\x74",
+		.klen	= 36,
+		.iv	= "\x61\x6E\x64\x01\x69\x76\x65\x63",
+		.result	= "\x63\x69\x73\x63\x6F\x01\x72\x75"
+			  "\x6C\x65\x73\x01\x74\x68\x65\x01"
+			  "\x6E\x65\x74\x77\x65\x01\x64\x65"
+			  "\x66\x69\x6E\x65\x01\x74\x68\x65"
+			  "\x74\x65\x63\x68\x6E\x6F\x6C\x6F"
+			  "\x67\x69\x65\x73\x01\x74\x68\x61"
+			  "\x74\x77\x69\x6C\x6C\x01\x64\x65"
+			  "\x66\x69\x6E\x65\x74\x6F\x6D\x6F"
+			  "\x72\x72\x6F\x77\x01\x02\x02\x01",
+		.rlen	= 72,
+		.assoc	= "\x17\x40\x5E\x67\x15\x6F\x31\x26"
+			  "\xDD\x0D\xB9\x9B",
+		.alen	= 12,
+		.input	= "\xD4\xB7\xED\x86\xA1\x77\x7F\x2E"
+			  "\xA1\x3D\x69\x73\xD3\x24\xC6\x9E"
+			  "\x7B\x43\xF8\x26\xFB\x56\x83\x12"
+			  "\x26\x50\x8B\xEB\xD2\xDC\xEB\x18"
+			  "\xD0\xA6\xDF\x10\xE5\x48\x7D\xF0"
+			  "\x74\x11\x3E\x14\xC6\x41\x02\x4E"
+			  "\x3E\x67\x73\xD9\x1A\x62\xEE\x42"
+			  "\x9B\x04\x3A\x10\xE3\xEF\xE6\xB0"
+			  "\x12\xA4\x93\x63\x41\x23\x64\xF8"
+			  "\xC0\xCA\xC5\x87\xF2\x49\xE5\x6B"
+			  "\x11\xE2\x4F\x30\xE4\x4C\xCC\x76",
+		.ilen	= 88,
+	}, {
+		.key	= "\x7D\x77\x3D\x00\xC1\x44\xC5\x25"
+			  "\xAC\x61\x9D\x18\xC8\x4A\x3F\x47"
+			  "\xD9\x66\x42\x67",
+		.klen	= 20,
+		.iv	= "\x43\x45\x7E\x91\x82\x44\x3B\xC6",
+		.result	= "\x01\x02\x02\x01",
+		.rlen	= 4,
+		.assoc	= "\x33\x54\x67\xAE\xFF\xFF\xFF\xFF",
+		.alen	= 8,
+		.input	= "\x43\x7F\x86\x6B\xCB\x3F\x69\x9F"
+			  "\xE9\xB0\x82\x2B\xAC\x96\x1C\x45"
+			  "\x04\xBE\xF2\x70",
+		.ilen	= 20,
+	}, {
+		.key	= "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+			  "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+			  "\xDE\xCA\xF8\x88",
+		.klen	= 20,
+		.iv	= "\xCA\xFE\xDE\xBA\xCE\xFA\xCE\x74",
+		.result	= "\x74\x6F\x01\x62\x65\x01\x6F\x72"
+			  "\x01\x6E\x6F\x74\x01\x74\x6F\x01"
+			  "\x62\x65\x00\x01",
+		.rlen	= 20,
+		.assoc	= "\x00\x00\x01\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x01",
+		.alen	= 12,
+		.input	= "\x29\xC9\xFC\x69\xA1\x97\xD0\x38"
+			  "\xCC\xDD\x14\xE2\xDD\xFC\xAA\x05"
+			  "\x43\x33\x21\x64\x41\x25\x03\x52"
+			  "\x43\x03\xED\x3C\x6C\x5F\x28\x38"
+			  "\x43\xAF\x8C\x3E",
+		.ilen	= 36,
+	}, {
+		.key	= "\x6C\x65\x67\x61\x6C\x69\x7A\x65"
+			  "\x6D\x61\x72\x69\x6A\x75\x61\x6E"
+			  "\x61\x61\x6E\x64\x64\x6F\x69\x74"
+			  "\x62\x65\x66\x6F\x72\x65\x69\x61"
+			  "\x74\x75\x72\x6E",
+		.klen	= 36,
+		.iv	= "\x33\x30\x21\x69\x67\x65\x74\x6D",
+		.result	= "\x45\x00\x00\x30\xDA\x3A\x00\x00"
+			  "\x80\x01\xDF\x3B\xC0\xA8\x00\x05"
+			  "\xC0\xA8\x00\x01\x08\x00\xC6\xCD"
+			  "\x02\x00\x07\x00\x61\x62\x63\x64"
+			  "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+			  "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+			  "\x01\x02\x02\x01",
+		.rlen	= 52,
+		.assoc	= "\x79\x6B\x69\x63\xFF\xFF\xFF\xFF"
+			  "\xFF\xFF\xFF\xFF",
+		.alen	= 12,
+		.input	= "\xF9\x7A\xB2\xAA\x35\x6D\x8E\xDC"
+			  "\xE1\x76\x44\xAC\x8C\x78\xE2\x5D"
+			  "\xD2\x4D\xED\xBB\x29\xEB\xF1\xB6"
+			  "\x4A\x27\x4B\x39\xB4\x9C\x3A\x86"
+			  "\x4C\xD3\xD7\x8C\xA4\xAE\x68\xA3"
+			  "\x2B\x42\x45\x8F\xB5\x7D\xBE\x82"
+			  "\x1D\xCC\x63\xB9\xD0\x93\x7B\xA2"
+			  "\x94\x5F\x66\x93\x68\x66\x1A\x32"
+			  "\x9F\xB4\xC0\x53",
+		.ilen	= 68,
+	}, {
+		.key	= "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+			  "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+			  "\x57\x69\x0E\x43",
+		.klen	= 20,
+		.iv	= "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+		.result	= "\x45\x00\x00\x30\xDA\x3A\x00\x00"
+			  "\x80\x01\xDF\x3B\xC0\xA8\x00\x05"
+			  "\xC0\xA8\x00\x01\x08\x00\xC6\xCD"
+			  "\x02\x00\x07\x00\x61\x62\x63\x64"
+			  "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+			  "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+			  "\x01\x02\x02\x01",
+		.rlen	= 52,
+		.assoc	= "\x3F\x7E\xF6\x42\x10\x10\x10\x10"
+			  "\x10\x10\x10\x10",
+		.alen	= 12,
+		.input	= "\xFB\xA2\xCA\xA8\xC6\xC5\xF9\xF0"
+			  "\xF2\x2C\xA5\x4A\x06\x12\x10\xAD"
+			  "\x3F\x6E\x57\x91\xCF\x1A\xCA\x21"
+			  "\x0D\x11\x7C\xEC\x9C\x35\x79\x17"
+			  "\x65\xAC\xBD\x87\x01\xAD\x79\x84"
+			  "\x5B\xF9\xFE\x3F\xBA\x48\x7B\xC9"
+			  "\x63\x21\x93\x06\x84\xEE\xCA\xDB"
+			  "\x56\x91\x25\x46\xE7\xA9\x5C\x97"
+			  "\x40\xD7\xCB\x05",
+		.ilen	= 68,
+	}, {
+		.key	= "\x4C\x80\xCD\xEF\xBB\x5D\x10\xDA"
+			  "\x90\x6A\xC7\x3C\x36\x13\xA6\x34"
+			  "\x22\x43\x3C\x64",
+		.klen	= 20,
+		.iv	= "\x48\x55\xEC\x7D\x3A\x23\x4B\xFD",
+		.result	= "\x08\x00\xC6\xCD\x02\x00\x07\x00"
+			  "\x61\x62\x63\x64\x65\x66\x67\x68"
+			  "\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70"
+			  "\x71\x72\x73\x74\x01\x02\x02\x01",
+		.rlen	= 32,
+		.assoc	= "\x00\x00\x43\x21\x87\x65\x43\x21"
+			  "\x00\x00\x00\x07",
+		.alen	= 12,
+		.input	= "\x74\x75\x2E\x8A\xEB\x5D\x87\x3C"
+			  "\xD7\xC0\xF4\xAC\xC3\x6C\x4B\xFF"
+			  "\x84\xB7\xD7\xB9\x8F\x0C\xA8\xB6"
+			  "\xAC\xDA\x68\x94\xBC\x61\x90\x69"
+			  "\xEF\x9C\xBC\x28\xFE\x1B\x56\xA7"
+			  "\xC4\xE0\xD5\x8C\x86\xCD\x2B\xC0",
+		.ilen	= 48,
 	}
 };
 
@@ -19975,8 +21328,9 @@ static struct aead_testvec aes_gcm_rfc4543_enc_tv_template[] = {
 			  "\x22\x43\x3c\x64",
 		.klen	= 20,
 		.iv	= zeroed_string,
-		.assoc	= "\x00\x00\x43\x21\x00\x00\x00\x07",
-		.alen	= 8,
+		.assoc	= "\x00\x00\x43\x21\x00\x00\x00\x07"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.alen	= 16,
 		.input	= "\x45\x00\x00\x30\xda\x3a\x00\x00"
 			  "\x80\x01\xdf\x3b\xc0\xa8\x00\x05"
 			  "\xc0\xa8\x00\x01\x08\x00\xc6\xcd"
@@ -20005,8 +21359,9 @@ static struct aead_testvec aes_gcm_rfc4543_dec_tv_template[] = {
 			  "\x22\x43\x3c\x64",
 		.klen	= 20,
 		.iv	= zeroed_string,
-		.assoc	= "\x00\x00\x43\x21\x00\x00\x00\x07",
-		.alen	= 8,
+		.assoc	= "\x00\x00\x43\x21\x00\x00\x00\x07"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.alen	= 16,
 		.input	= "\x45\x00\x00\x30\xda\x3a\x00\x00"
 			  "\x80\x01\xdf\x3b\xc0\xa8\x00\x05"
 			  "\xc0\xa8\x00\x01\x08\x00\xc6\xcd"
@@ -20031,8 +21386,9 @@ static struct aead_testvec aes_gcm_rfc4543_dec_tv_template[] = {
 			  "\x22\x43\x3c\x64",
 		.klen	= 20,
 		.iv	= zeroed_string,
-		.assoc	= "\x00\x00\x43\x21\x00\x00\x00\x07",
-		.alen	= 8,
+		.assoc	= "\x00\x00\x43\x21\x00\x00\x00\x07"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.alen	= 16,
 		.input	= "\x45\x00\x00\x30\xda\x3a\x00\x00"
 			  "\x80\x01\xdf\x3b\xc0\xa8\x00\x05"
 			  "\xc0\xa8\x00\x01\x08\x00\xc6\xcd"
@@ -20703,6 +22059,454 @@ static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = {
 	},
 };
 
+/*
+ * ChaCha20-Poly1305 AEAD test vectors from RFC7539 2.8.2./A.5.
+ */
+#define RFC7539_ENC_TEST_VECTORS 2
+#define RFC7539_DEC_TEST_VECTORS 2
+static struct aead_testvec rfc7539_enc_tv_template[] = {
+	{
+		.key	= "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f",
+		.klen	= 32,
+		.iv	= "\x07\x00\x00\x00\x40\x41\x42\x43"
+			  "\x44\x45\x46\x47",
+		.assoc	= "\x50\x51\x52\x53\xc0\xc1\xc2\xc3"
+			  "\xc4\xc5\xc6\xc7",
+		.alen	= 12,
+		.input	= "\x4c\x61\x64\x69\x65\x73\x20\x61"
+			  "\x6e\x64\x20\x47\x65\x6e\x74\x6c"
+			  "\x65\x6d\x65\x6e\x20\x6f\x66\x20"
+			  "\x74\x68\x65\x20\x63\x6c\x61\x73"
+			  "\x73\x20\x6f\x66\x20\x27\x39\x39"
+			  "\x3a\x20\x49\x66\x20\x49\x20\x63"
+			  "\x6f\x75\x6c\x64\x20\x6f\x66\x66"
+			  "\x65\x72\x20\x79\x6f\x75\x20\x6f"
+			  "\x6e\x6c\x79\x20\x6f\x6e\x65\x20"
+			  "\x74\x69\x70\x20\x66\x6f\x72\x20"
+			  "\x74\x68\x65\x20\x66\x75\x74\x75"
+			  "\x72\x65\x2c\x20\x73\x75\x6e\x73"
+			  "\x63\x72\x65\x65\x6e\x20\x77\x6f"
+			  "\x75\x6c\x64\x20\x62\x65\x20\x69"
+			  "\x74\x2e",
+		.ilen	= 114,
+		.result	= "\xd3\x1a\x8d\x34\x64\x8e\x60\xdb"
+			  "\x7b\x86\xaf\xbc\x53\xef\x7e\xc2"
+			  "\xa4\xad\xed\x51\x29\x6e\x08\xfe"
+			  "\xa9\xe2\xb5\xa7\x36\xee\x62\xd6"
+			  "\x3d\xbe\xa4\x5e\x8c\xa9\x67\x12"
+			  "\x82\xfa\xfb\x69\xda\x92\x72\x8b"
+			  "\x1a\x71\xde\x0a\x9e\x06\x0b\x29"
+			  "\x05\xd6\xa5\xb6\x7e\xcd\x3b\x36"
+			  "\x92\xdd\xbd\x7f\x2d\x77\x8b\x8c"
+			  "\x98\x03\xae\xe3\x28\x09\x1b\x58"
+			  "\xfa\xb3\x24\xe4\xfa\xd6\x75\x94"
+			  "\x55\x85\x80\x8b\x48\x31\xd7\xbc"
+			  "\x3f\xf4\xde\xf0\x8e\x4b\x7a\x9d"
+			  "\xe5\x76\xd2\x65\x86\xce\xc6\x4b"
+			  "\x61\x16\x1a\xe1\x0b\x59\x4f\x09"
+			  "\xe2\x6a\x7e\x90\x2e\xcb\xd0\x60"
+			  "\x06\x91",
+		.rlen	= 130,
+	}, {
+		.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+			  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+			  "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+			  "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x01\x02\x03\x04"
+			  "\x05\x06\x07\x08",
+		.assoc	= "\xf3\x33\x88\x86\x00\x00\x00\x00"
+			  "\x00\x00\x4e\x91",
+		.alen	= 12,
+		.input	= "\x49\x6e\x74\x65\x72\x6e\x65\x74"
+			  "\x2d\x44\x72\x61\x66\x74\x73\x20"
+			  "\x61\x72\x65\x20\x64\x72\x61\x66"
+			  "\x74\x20\x64\x6f\x63\x75\x6d\x65"
+			  "\x6e\x74\x73\x20\x76\x61\x6c\x69"
+			  "\x64\x20\x66\x6f\x72\x20\x61\x20"
+			  "\x6d\x61\x78\x69\x6d\x75\x6d\x20"
+			  "\x6f\x66\x20\x73\x69\x78\x20\x6d"
+			  "\x6f\x6e\x74\x68\x73\x20\x61\x6e"
+			  "\x64\x20\x6d\x61\x79\x20\x62\x65"
+			  "\x20\x75\x70\x64\x61\x74\x65\x64"
+			  "\x2c\x20\x72\x65\x70\x6c\x61\x63"
+			  "\x65\x64\x2c\x20\x6f\x72\x20\x6f"
+			  "\x62\x73\x6f\x6c\x65\x74\x65\x64"
+			  "\x20\x62\x79\x20\x6f\x74\x68\x65"
+			  "\x72\x20\x64\x6f\x63\x75\x6d\x65"
+			  "\x6e\x74\x73\x20\x61\x74\x20\x61"
+			  "\x6e\x79\x20\x74\x69\x6d\x65\x2e"
+			  "\x20\x49\x74\x20\x69\x73\x20\x69"
+			  "\x6e\x61\x70\x70\x72\x6f\x70\x72"
+			  "\x69\x61\x74\x65\x20\x74\x6f\x20"
+			  "\x75\x73\x65\x20\x49\x6e\x74\x65"
+			  "\x72\x6e\x65\x74\x2d\x44\x72\x61"
+			  "\x66\x74\x73\x20\x61\x73\x20\x72"
+			  "\x65\x66\x65\x72\x65\x6e\x63\x65"
+			  "\x20\x6d\x61\x74\x65\x72\x69\x61"
+			  "\x6c\x20\x6f\x72\x20\x74\x6f\x20"
+			  "\x63\x69\x74\x65\x20\x74\x68\x65"
+			  "\x6d\x20\x6f\x74\x68\x65\x72\x20"
+			  "\x74\x68\x61\x6e\x20\x61\x73\x20"
+			  "\x2f\xe2\x80\x9c\x77\x6f\x72\x6b"
+			  "\x20\x69\x6e\x20\x70\x72\x6f\x67"
+			  "\x72\x65\x73\x73\x2e\x2f\xe2\x80"
+			  "\x9d",
+		.ilen	= 265,
+		.result	= "\x64\xa0\x86\x15\x75\x86\x1a\xf4"
+			  "\x60\xf0\x62\xc7\x9b\xe6\x43\xbd"
+			  "\x5e\x80\x5c\xfd\x34\x5c\xf3\x89"
+			  "\xf1\x08\x67\x0a\xc7\x6c\x8c\xb2"
+			  "\x4c\x6c\xfc\x18\x75\x5d\x43\xee"
+			  "\xa0\x9e\xe9\x4e\x38\x2d\x26\xb0"
+			  "\xbd\xb7\xb7\x3c\x32\x1b\x01\x00"
+			  "\xd4\xf0\x3b\x7f\x35\x58\x94\xcf"
+			  "\x33\x2f\x83\x0e\x71\x0b\x97\xce"
+			  "\x98\xc8\xa8\x4a\xbd\x0b\x94\x81"
+			  "\x14\xad\x17\x6e\x00\x8d\x33\xbd"
+			  "\x60\xf9\x82\xb1\xff\x37\xc8\x55"
+			  "\x97\x97\xa0\x6e\xf4\xf0\xef\x61"
+			  "\xc1\x86\x32\x4e\x2b\x35\x06\x38"
+			  "\x36\x06\x90\x7b\x6a\x7c\x02\xb0"
+			  "\xf9\xf6\x15\x7b\x53\xc8\x67\xe4"
+			  "\xb9\x16\x6c\x76\x7b\x80\x4d\x46"
+			  "\xa5\x9b\x52\x16\xcd\xe7\xa4\xe9"
+			  "\x90\x40\xc5\xa4\x04\x33\x22\x5e"
+			  "\xe2\x82\xa1\xb0\xa0\x6c\x52\x3e"
+			  "\xaf\x45\x34\xd7\xf8\x3f\xa1\x15"
+			  "\x5b\x00\x47\x71\x8c\xbc\x54\x6a"
+			  "\x0d\x07\x2b\x04\xb3\x56\x4e\xea"
+			  "\x1b\x42\x22\x73\xf5\x48\x27\x1a"
+			  "\x0b\xb2\x31\x60\x53\xfa\x76\x99"
+			  "\x19\x55\xeb\xd6\x31\x59\x43\x4e"
+			  "\xce\xbb\x4e\x46\x6d\xae\x5a\x10"
+			  "\x73\xa6\x72\x76\x27\x09\x7a\x10"
+			  "\x49\xe6\x17\xd9\x1d\x36\x10\x94"
+			  "\xfa\x68\xf0\xff\x77\x98\x71\x30"
+			  "\x30\x5b\xea\xba\x2e\xda\x04\xdf"
+			  "\x99\x7b\x71\x4d\x6c\x6f\x2c\x29"
+			  "\xa6\xad\x5c\xb4\x02\x2b\x02\x70"
+			  "\x9b\xee\xad\x9d\x67\x89\x0c\xbb"
+			  "\x22\x39\x23\x36\xfe\xa1\x85\x1f"
+			  "\x38",
+		.rlen	= 281,
+	},
+};
+
+static struct aead_testvec rfc7539_dec_tv_template[] = {
+	{
+		.key	= "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f",
+		.klen	= 32,
+		.iv	= "\x07\x00\x00\x00\x40\x41\x42\x43"
+			  "\x44\x45\x46\x47",
+		.assoc	= "\x50\x51\x52\x53\xc0\xc1\xc2\xc3"
+			  "\xc4\xc5\xc6\xc7",
+		.alen	= 12,
+		.input	= "\xd3\x1a\x8d\x34\x64\x8e\x60\xdb"
+			  "\x7b\x86\xaf\xbc\x53\xef\x7e\xc2"
+			  "\xa4\xad\xed\x51\x29\x6e\x08\xfe"
+			  "\xa9\xe2\xb5\xa7\x36\xee\x62\xd6"
+			  "\x3d\xbe\xa4\x5e\x8c\xa9\x67\x12"
+			  "\x82\xfa\xfb\x69\xda\x92\x72\x8b"
+			  "\x1a\x71\xde\x0a\x9e\x06\x0b\x29"
+			  "\x05\xd6\xa5\xb6\x7e\xcd\x3b\x36"
+			  "\x92\xdd\xbd\x7f\x2d\x77\x8b\x8c"
+			  "\x98\x03\xae\xe3\x28\x09\x1b\x58"
+			  "\xfa\xb3\x24\xe4\xfa\xd6\x75\x94"
+			  "\x55\x85\x80\x8b\x48\x31\xd7\xbc"
+			  "\x3f\xf4\xde\xf0\x8e\x4b\x7a\x9d"
+			  "\xe5\x76\xd2\x65\x86\xce\xc6\x4b"
+			  "\x61\x16\x1a\xe1\x0b\x59\x4f\x09"
+			  "\xe2\x6a\x7e\x90\x2e\xcb\xd0\x60"
+			  "\x06\x91",
+		.ilen	= 130,
+		.result	= "\x4c\x61\x64\x69\x65\x73\x20\x61"
+			  "\x6e\x64\x20\x47\x65\x6e\x74\x6c"
+			  "\x65\x6d\x65\x6e\x20\x6f\x66\x20"
+			  "\x74\x68\x65\x20\x63\x6c\x61\x73"
+			  "\x73\x20\x6f\x66\x20\x27\x39\x39"
+			  "\x3a\x20\x49\x66\x20\x49\x20\x63"
+			  "\x6f\x75\x6c\x64\x20\x6f\x66\x66"
+			  "\x65\x72\x20\x79\x6f\x75\x20\x6f"
+			  "\x6e\x6c\x79\x20\x6f\x6e\x65\x20"
+			  "\x74\x69\x70\x20\x66\x6f\x72\x20"
+			  "\x74\x68\x65\x20\x66\x75\x74\x75"
+			  "\x72\x65\x2c\x20\x73\x75\x6e\x73"
+			  "\x63\x72\x65\x65\x6e\x20\x77\x6f"
+			  "\x75\x6c\x64\x20\x62\x65\x20\x69"
+			  "\x74\x2e",
+		.rlen	= 114,
+	}, {
+		.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+			  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+			  "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+			  "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x01\x02\x03\x04"
+			  "\x05\x06\x07\x08",
+		.assoc	= "\xf3\x33\x88\x86\x00\x00\x00\x00"
+			  "\x00\x00\x4e\x91",
+		.alen	= 12,
+		.input	= "\x64\xa0\x86\x15\x75\x86\x1a\xf4"
+			  "\x60\xf0\x62\xc7\x9b\xe6\x43\xbd"
+			  "\x5e\x80\x5c\xfd\x34\x5c\xf3\x89"
+			  "\xf1\x08\x67\x0a\xc7\x6c\x8c\xb2"
+			  "\x4c\x6c\xfc\x18\x75\x5d\x43\xee"
+			  "\xa0\x9e\xe9\x4e\x38\x2d\x26\xb0"
+			  "\xbd\xb7\xb7\x3c\x32\x1b\x01\x00"
+			  "\xd4\xf0\x3b\x7f\x35\x58\x94\xcf"
+			  "\x33\x2f\x83\x0e\x71\x0b\x97\xce"
+			  "\x98\xc8\xa8\x4a\xbd\x0b\x94\x81"
+			  "\x14\xad\x17\x6e\x00\x8d\x33\xbd"
+			  "\x60\xf9\x82\xb1\xff\x37\xc8\x55"
+			  "\x97\x97\xa0\x6e\xf4\xf0\xef\x61"
+			  "\xc1\x86\x32\x4e\x2b\x35\x06\x38"
+			  "\x36\x06\x90\x7b\x6a\x7c\x02\xb0"
+			  "\xf9\xf6\x15\x7b\x53\xc8\x67\xe4"
+			  "\xb9\x16\x6c\x76\x7b\x80\x4d\x46"
+			  "\xa5\x9b\x52\x16\xcd\xe7\xa4\xe9"
+			  "\x90\x40\xc5\xa4\x04\x33\x22\x5e"
+			  "\xe2\x82\xa1\xb0\xa0\x6c\x52\x3e"
+			  "\xaf\x45\x34\xd7\xf8\x3f\xa1\x15"
+			  "\x5b\x00\x47\x71\x8c\xbc\x54\x6a"
+			  "\x0d\x07\x2b\x04\xb3\x56\x4e\xea"
+			  "\x1b\x42\x22\x73\xf5\x48\x27\x1a"
+			  "\x0b\xb2\x31\x60\x53\xfa\x76\x99"
+			  "\x19\x55\xeb\xd6\x31\x59\x43\x4e"
+			  "\xce\xbb\x4e\x46\x6d\xae\x5a\x10"
+			  "\x73\xa6\x72\x76\x27\x09\x7a\x10"
+			  "\x49\xe6\x17\xd9\x1d\x36\x10\x94"
+			  "\xfa\x68\xf0\xff\x77\x98\x71\x30"
+			  "\x30\x5b\xea\xba\x2e\xda\x04\xdf"
+			  "\x99\x7b\x71\x4d\x6c\x6f\x2c\x29"
+			  "\xa6\xad\x5c\xb4\x02\x2b\x02\x70"
+			  "\x9b\xee\xad\x9d\x67\x89\x0c\xbb"
+			  "\x22\x39\x23\x36\xfe\xa1\x85\x1f"
+			  "\x38",
+		.ilen	= 281,
+		.result	= "\x49\x6e\x74\x65\x72\x6e\x65\x74"
+			  "\x2d\x44\x72\x61\x66\x74\x73\x20"
+			  "\x61\x72\x65\x20\x64\x72\x61\x66"
+			  "\x74\x20\x64\x6f\x63\x75\x6d\x65"
+			  "\x6e\x74\x73\x20\x76\x61\x6c\x69"
+			  "\x64\x20\x66\x6f\x72\x20\x61\x20"
+			  "\x6d\x61\x78\x69\x6d\x75\x6d\x20"
+			  "\x6f\x66\x20\x73\x69\x78\x20\x6d"
+			  "\x6f\x6e\x74\x68\x73\x20\x61\x6e"
+			  "\x64\x20\x6d\x61\x79\x20\x62\x65"
+			  "\x20\x75\x70\x64\x61\x74\x65\x64"
+			  "\x2c\x20\x72\x65\x70\x6c\x61\x63"
+			  "\x65\x64\x2c\x20\x6f\x72\x20\x6f"
+			  "\x62\x73\x6f\x6c\x65\x74\x65\x64"
+			  "\x20\x62\x79\x20\x6f\x74\x68\x65"
+			  "\x72\x20\x64\x6f\x63\x75\x6d\x65"
+			  "\x6e\x74\x73\x20\x61\x74\x20\x61"
+			  "\x6e\x79\x20\x74\x69\x6d\x65\x2e"
+			  "\x20\x49\x74\x20\x69\x73\x20\x69"
+			  "\x6e\x61\x70\x70\x72\x6f\x70\x72"
+			  "\x69\x61\x74\x65\x20\x74\x6f\x20"
+			  "\x75\x73\x65\x20\x49\x6e\x74\x65"
+			  "\x72\x6e\x65\x74\x2d\x44\x72\x61"
+			  "\x66\x74\x73\x20\x61\x73\x20\x72"
+			  "\x65\x66\x65\x72\x65\x6e\x63\x65"
+			  "\x20\x6d\x61\x74\x65\x72\x69\x61"
+			  "\x6c\x20\x6f\x72\x20\x74\x6f\x20"
+			  "\x63\x69\x74\x65\x20\x74\x68\x65"
+			  "\x6d\x20\x6f\x74\x68\x65\x72\x20"
+			  "\x74\x68\x61\x6e\x20\x61\x73\x20"
+			  "\x2f\xe2\x80\x9c\x77\x6f\x72\x6b"
+			  "\x20\x69\x6e\x20\x70\x72\x6f\x67"
+			  "\x72\x65\x73\x73\x2e\x2f\xe2\x80"
+			  "\x9d",
+		.rlen	= 265,
+	},
+};
+
+/*
+ * draft-irtf-cfrg-chacha20-poly1305
+ */
+#define RFC7539ESP_DEC_TEST_VECTORS 1
+#define RFC7539ESP_ENC_TEST_VECTORS 1
+static struct aead_testvec rfc7539esp_enc_tv_template[] = {
+	{
+		.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+			  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+			  "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+			  "\x9d\xca\x5c\xbc\x20\x70\x75\xc0"
+			  "\x00\x00\x00\x00",
+		.klen	= 36,
+		.iv	= "\x01\x02\x03\x04\x05\x06\x07\x08",
+		.assoc	= "\xf3\x33\x88\x86\x00\x00\x00\x00"
+			  "\x00\x00\x4e\x91",
+		.alen	= 12,
+		.input	= "\x49\x6e\x74\x65\x72\x6e\x65\x74"
+			  "\x2d\x44\x72\x61\x66\x74\x73\x20"
+			  "\x61\x72\x65\x20\x64\x72\x61\x66"
+			  "\x74\x20\x64\x6f\x63\x75\x6d\x65"
+			  "\x6e\x74\x73\x20\x76\x61\x6c\x69"
+			  "\x64\x20\x66\x6f\x72\x20\x61\x20"
+			  "\x6d\x61\x78\x69\x6d\x75\x6d\x20"
+			  "\x6f\x66\x20\x73\x69\x78\x20\x6d"
+			  "\x6f\x6e\x74\x68\x73\x20\x61\x6e"
+			  "\x64\x20\x6d\x61\x79\x20\x62\x65"
+			  "\x20\x75\x70\x64\x61\x74\x65\x64"
+			  "\x2c\x20\x72\x65\x70\x6c\x61\x63"
+			  "\x65\x64\x2c\x20\x6f\x72\x20\x6f"
+			  "\x62\x73\x6f\x6c\x65\x74\x65\x64"
+			  "\x20\x62\x79\x20\x6f\x74\x68\x65"
+			  "\x72\x20\x64\x6f\x63\x75\x6d\x65"
+			  "\x6e\x74\x73\x20\x61\x74\x20\x61"
+			  "\x6e\x79\x20\x74\x69\x6d\x65\x2e"
+			  "\x20\x49\x74\x20\x69\x73\x20\x69"
+			  "\x6e\x61\x70\x70\x72\x6f\x70\x72"
+			  "\x69\x61\x74\x65\x20\x74\x6f\x20"
+			  "\x75\x73\x65\x20\x49\x6e\x74\x65"
+			  "\x72\x6e\x65\x74\x2d\x44\x72\x61"
+			  "\x66\x74\x73\x20\x61\x73\x20\x72"
+			  "\x65\x66\x65\x72\x65\x6e\x63\x65"
+			  "\x20\x6d\x61\x74\x65\x72\x69\x61"
+			  "\x6c\x20\x6f\x72\x20\x74\x6f\x20"
+			  "\x63\x69\x74\x65\x20\x74\x68\x65"
+			  "\x6d\x20\x6f\x74\x68\x65\x72\x20"
+			  "\x74\x68\x61\x6e\x20\x61\x73\x20"
+			  "\x2f\xe2\x80\x9c\x77\x6f\x72\x6b"
+			  "\x20\x69\x6e\x20\x70\x72\x6f\x67"
+			  "\x72\x65\x73\x73\x2e\x2f\xe2\x80"
+			  "\x9d",
+		.ilen	= 265,
+		.result	= "\x64\xa0\x86\x15\x75\x86\x1a\xf4"
+			  "\x60\xf0\x62\xc7\x9b\xe6\x43\xbd"
+			  "\x5e\x80\x5c\xfd\x34\x5c\xf3\x89"
+			  "\xf1\x08\x67\x0a\xc7\x6c\x8c\xb2"
+			  "\x4c\x6c\xfc\x18\x75\x5d\x43\xee"
+			  "\xa0\x9e\xe9\x4e\x38\x2d\x26\xb0"
+			  "\xbd\xb7\xb7\x3c\x32\x1b\x01\x00"
+			  "\xd4\xf0\x3b\x7f\x35\x58\x94\xcf"
+			  "\x33\x2f\x83\x0e\x71\x0b\x97\xce"
+			  "\x98\xc8\xa8\x4a\xbd\x0b\x94\x81"
+			  "\x14\xad\x17\x6e\x00\x8d\x33\xbd"
+			  "\x60\xf9\x82\xb1\xff\x37\xc8\x55"
+			  "\x97\x97\xa0\x6e\xf4\xf0\xef\x61"
+			  "\xc1\x86\x32\x4e\x2b\x35\x06\x38"
+			  "\x36\x06\x90\x7b\x6a\x7c\x02\xb0"
+			  "\xf9\xf6\x15\x7b\x53\xc8\x67\xe4"
+			  "\xb9\x16\x6c\x76\x7b\x80\x4d\x46"
+			  "\xa5\x9b\x52\x16\xcd\xe7\xa4\xe9"
+			  "\x90\x40\xc5\xa4\x04\x33\x22\x5e"
+			  "\xe2\x82\xa1\xb0\xa0\x6c\x52\x3e"
+			  "\xaf\x45\x34\xd7\xf8\x3f\xa1\x15"
+			  "\x5b\x00\x47\x71\x8c\xbc\x54\x6a"
+			  "\x0d\x07\x2b\x04\xb3\x56\x4e\xea"
+			  "\x1b\x42\x22\x73\xf5\x48\x27\x1a"
+			  "\x0b\xb2\x31\x60\x53\xfa\x76\x99"
+			  "\x19\x55\xeb\xd6\x31\x59\x43\x4e"
+			  "\xce\xbb\x4e\x46\x6d\xae\x5a\x10"
+			  "\x73\xa6\x72\x76\x27\x09\x7a\x10"
+			  "\x49\xe6\x17\xd9\x1d\x36\x10\x94"
+			  "\xfa\x68\xf0\xff\x77\x98\x71\x30"
+			  "\x30\x5b\xea\xba\x2e\xda\x04\xdf"
+			  "\x99\x7b\x71\x4d\x6c\x6f\x2c\x29"
+			  "\xa6\xad\x5c\xb4\x02\x2b\x02\x70"
+			  "\x9b\xee\xad\x9d\x67\x89\x0c\xbb"
+			  "\x22\x39\x23\x36\xfe\xa1\x85\x1f"
+			  "\x38",
+		.rlen	= 281,
+	},
+};
+
+static struct aead_testvec rfc7539esp_dec_tv_template[] = {
+	{
+		.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+			  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+			  "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+			  "\x9d\xca\x5c\xbc\x20\x70\x75\xc0"
+			  "\x00\x00\x00\x00",
+		.klen	= 36,
+		.iv	= "\x01\x02\x03\x04\x05\x06\x07\x08",
+		.assoc	= "\xf3\x33\x88\x86\x00\x00\x00\x00"
+			  "\x00\x00\x4e\x91",
+		.alen	= 12,
+		.input	= "\x64\xa0\x86\x15\x75\x86\x1a\xf4"
+			  "\x60\xf0\x62\xc7\x9b\xe6\x43\xbd"
+			  "\x5e\x80\x5c\xfd\x34\x5c\xf3\x89"
+			  "\xf1\x08\x67\x0a\xc7\x6c\x8c\xb2"
+			  "\x4c\x6c\xfc\x18\x75\x5d\x43\xee"
+			  "\xa0\x9e\xe9\x4e\x38\x2d\x26\xb0"
+			  "\xbd\xb7\xb7\x3c\x32\x1b\x01\x00"
+			  "\xd4\xf0\x3b\x7f\x35\x58\x94\xcf"
+			  "\x33\x2f\x83\x0e\x71\x0b\x97\xce"
+			  "\x98\xc8\xa8\x4a\xbd\x0b\x94\x81"
+			  "\x14\xad\x17\x6e\x00\x8d\x33\xbd"
+			  "\x60\xf9\x82\xb1\xff\x37\xc8\x55"
+			  "\x97\x97\xa0\x6e\xf4\xf0\xef\x61"
+			  "\xc1\x86\x32\x4e\x2b\x35\x06\x38"
+			  "\x36\x06\x90\x7b\x6a\x7c\x02\xb0"
+			  "\xf9\xf6\x15\x7b\x53\xc8\x67\xe4"
+			  "\xb9\x16\x6c\x76\x7b\x80\x4d\x46"
+			  "\xa5\x9b\x52\x16\xcd\xe7\xa4\xe9"
+			  "\x90\x40\xc5\xa4\x04\x33\x22\x5e"
+			  "\xe2\x82\xa1\xb0\xa0\x6c\x52\x3e"
+			  "\xaf\x45\x34\xd7\xf8\x3f\xa1\x15"
+			  "\x5b\x00\x47\x71\x8c\xbc\x54\x6a"
+			  "\x0d\x07\x2b\x04\xb3\x56\x4e\xea"
+			  "\x1b\x42\x22\x73\xf5\x48\x27\x1a"
+			  "\x0b\xb2\x31\x60\x53\xfa\x76\x99"
+			  "\x19\x55\xeb\xd6\x31\x59\x43\x4e"
+			  "\xce\xbb\x4e\x46\x6d\xae\x5a\x10"
+			  "\x73\xa6\x72\x76\x27\x09\x7a\x10"
+			  "\x49\xe6\x17\xd9\x1d\x36\x10\x94"
+			  "\xfa\x68\xf0\xff\x77\x98\x71\x30"
+			  "\x30\x5b\xea\xba\x2e\xda\x04\xdf"
+			  "\x99\x7b\x71\x4d\x6c\x6f\x2c\x29"
+			  "\xa6\xad\x5c\xb4\x02\x2b\x02\x70"
+			  "\x9b\xee\xad\x9d\x67\x89\x0c\xbb"
+			  "\x22\x39\x23\x36\xfe\xa1\x85\x1f"
+			  "\x38",
+		.ilen	= 281,
+		.result	= "\x49\x6e\x74\x65\x72\x6e\x65\x74"
+			  "\x2d\x44\x72\x61\x66\x74\x73\x20"
+			  "\x61\x72\x65\x20\x64\x72\x61\x66"
+			  "\x74\x20\x64\x6f\x63\x75\x6d\x65"
+			  "\x6e\x74\x73\x20\x76\x61\x6c\x69"
+			  "\x64\x20\x66\x6f\x72\x20\x61\x20"
+			  "\x6d\x61\x78\x69\x6d\x75\x6d\x20"
+			  "\x6f\x66\x20\x73\x69\x78\x20\x6d"
+			  "\x6f\x6e\x74\x68\x73\x20\x61\x6e"
+			  "\x64\x20\x6d\x61\x79\x20\x62\x65"
+			  "\x20\x75\x70\x64\x61\x74\x65\x64"
+			  "\x2c\x20\x72\x65\x70\x6c\x61\x63"
+			  "\x65\x64\x2c\x20\x6f\x72\x20\x6f"
+			  "\x62\x73\x6f\x6c\x65\x74\x65\x64"
+			  "\x20\x62\x79\x20\x6f\x74\x68\x65"
+			  "\x72\x20\x64\x6f\x63\x75\x6d\x65"
+			  "\x6e\x74\x73\x20\x61\x74\x20\x61"
+			  "\x6e\x79\x20\x74\x69\x6d\x65\x2e"
+			  "\x20\x49\x74\x20\x69\x73\x20\x69"
+			  "\x6e\x61\x70\x70\x72\x6f\x70\x72"
+			  "\x69\x61\x74\x65\x20\x74\x6f\x20"
+			  "\x75\x73\x65\x20\x49\x6e\x74\x65"
+			  "\x72\x6e\x65\x74\x2d\x44\x72\x61"
+			  "\x66\x74\x73\x20\x61\x73\x20\x72"
+			  "\x65\x66\x65\x72\x65\x6e\x63\x65"
+			  "\x20\x6d\x61\x74\x65\x72\x69\x61"
+			  "\x6c\x20\x6f\x72\x20\x74\x6f\x20"
+			  "\x63\x69\x74\x65\x20\x74\x68\x65"
+			  "\x6d\x20\x6f\x74\x68\x65\x72\x20"
+			  "\x74\x68\x61\x6e\x20\x61\x73\x20"
+			  "\x2f\xe2\x80\x9c\x77\x6f\x72\x6b"
+			  "\x20\x69\x6e\x20\x70\x72\x6f\x67"
+			  "\x72\x65\x73\x73\x2e\x2f\xe2\x80"
+			  "\x9d",
+		.rlen	= 265,
+	},
+};
+
 /*
  * ANSI X9.31 Continuous Pseudo-Random Number Generator (AES mode)
  * test vectors, taken from Appendix B.2.9 and B.2.10:
@@ -28370,6 +30174,183 @@ static struct cipher_testvec salsa20_stream_enc_tv_template[] = {
 	},
 };
 
+#define CHACHA20_ENC_TEST_VECTORS 3
+static struct cipher_testvec chacha20_enc_tv_template[] = {
+	{ /* RFC7539 A.2. Test Vector #1 */
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 32,
+		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ilen	= 64,
+		.result	= "\x76\xb8\xe0\xad\xa0\xf1\x3d\x90"
+			  "\x40\x5d\x6a\xe5\x53\x86\xbd\x28"
+			  "\xbd\xd2\x19\xb8\xa0\x8d\xed\x1a"
+			  "\xa8\x36\xef\xcc\x8b\x77\x0d\xc7"
+			  "\xda\x41\x59\x7c\x51\x57\x48\x8d"
+			  "\x77\x24\xe0\x3f\xb8\xd8\x4a\x37"
+			  "\x6a\x43\xb8\xf4\x15\x18\xa1\x1c"
+			  "\xc3\x87\xb6\x69\xb2\xee\x65\x86",
+		.rlen	= 64,
+	}, { /* RFC7539 A.2. Test Vector #2 */
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x01",
+		.klen	= 32,
+		.iv     = "\x01\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x02",
+		.input	= "\x41\x6e\x79\x20\x73\x75\x62\x6d"
+			  "\x69\x73\x73\x69\x6f\x6e\x20\x74"
+			  "\x6f\x20\x74\x68\x65\x20\x49\x45"
+			  "\x54\x46\x20\x69\x6e\x74\x65\x6e"
+			  "\x64\x65\x64\x20\x62\x79\x20\x74"
+			  "\x68\x65\x20\x43\x6f\x6e\x74\x72"
+			  "\x69\x62\x75\x74\x6f\x72\x20\x66"
+			  "\x6f\x72\x20\x70\x75\x62\x6c\x69"
+			  "\x63\x61\x74\x69\x6f\x6e\x20\x61"
+			  "\x73\x20\x61\x6c\x6c\x20\x6f\x72"
+			  "\x20\x70\x61\x72\x74\x20\x6f\x66"
+			  "\x20\x61\x6e\x20\x49\x45\x54\x46"
+			  "\x20\x49\x6e\x74\x65\x72\x6e\x65"
+			  "\x74\x2d\x44\x72\x61\x66\x74\x20"
+			  "\x6f\x72\x20\x52\x46\x43\x20\x61"
+			  "\x6e\x64\x20\x61\x6e\x79\x20\x73"
+			  "\x74\x61\x74\x65\x6d\x65\x6e\x74"
+			  "\x20\x6d\x61\x64\x65\x20\x77\x69"
+			  "\x74\x68\x69\x6e\x20\x74\x68\x65"
+			  "\x20\x63\x6f\x6e\x74\x65\x78\x74"
+			  "\x20\x6f\x66\x20\x61\x6e\x20\x49"
+			  "\x45\x54\x46\x20\x61\x63\x74\x69"
+			  "\x76\x69\x74\x79\x20\x69\x73\x20"
+			  "\x63\x6f\x6e\x73\x69\x64\x65\x72"
+			  "\x65\x64\x20\x61\x6e\x20\x22\x49"
+			  "\x45\x54\x46\x20\x43\x6f\x6e\x74"
+			  "\x72\x69\x62\x75\x74\x69\x6f\x6e"
+			  "\x22\x2e\x20\x53\x75\x63\x68\x20"
+			  "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+			  "\x74\x73\x20\x69\x6e\x63\x6c\x75"
+			  "\x64\x65\x20\x6f\x72\x61\x6c\x20"
+			  "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+			  "\x74\x73\x20\x69\x6e\x20\x49\x45"
+			  "\x54\x46\x20\x73\x65\x73\x73\x69"
+			  "\x6f\x6e\x73\x2c\x20\x61\x73\x20"
+			  "\x77\x65\x6c\x6c\x20\x61\x73\x20"
+			  "\x77\x72\x69\x74\x74\x65\x6e\x20"
+			  "\x61\x6e\x64\x20\x65\x6c\x65\x63"
+			  "\x74\x72\x6f\x6e\x69\x63\x20\x63"
+			  "\x6f\x6d\x6d\x75\x6e\x69\x63\x61"
+			  "\x74\x69\x6f\x6e\x73\x20\x6d\x61"
+			  "\x64\x65\x20\x61\x74\x20\x61\x6e"
+			  "\x79\x20\x74\x69\x6d\x65\x20\x6f"
+			  "\x72\x20\x70\x6c\x61\x63\x65\x2c"
+			  "\x20\x77\x68\x69\x63\x68\x20\x61"
+			  "\x72\x65\x20\x61\x64\x64\x72\x65"
+			  "\x73\x73\x65\x64\x20\x74\x6f",
+		.ilen	= 375,
+		.result	= "\xa3\xfb\xf0\x7d\xf3\xfa\x2f\xde"
+			  "\x4f\x37\x6c\xa2\x3e\x82\x73\x70"
+			  "\x41\x60\x5d\x9f\x4f\x4f\x57\xbd"
+			  "\x8c\xff\x2c\x1d\x4b\x79\x55\xec"
+			  "\x2a\x97\x94\x8b\xd3\x72\x29\x15"
+			  "\xc8\xf3\xd3\x37\xf7\xd3\x70\x05"
+			  "\x0e\x9e\x96\xd6\x47\xb7\xc3\x9f"
+			  "\x56\xe0\x31\xca\x5e\xb6\x25\x0d"
+			  "\x40\x42\xe0\x27\x85\xec\xec\xfa"
+			  "\x4b\x4b\xb5\xe8\xea\xd0\x44\x0e"
+			  "\x20\xb6\xe8\xdb\x09\xd8\x81\xa7"
+			  "\xc6\x13\x2f\x42\x0e\x52\x79\x50"
+			  "\x42\xbd\xfa\x77\x73\xd8\xa9\x05"
+			  "\x14\x47\xb3\x29\x1c\xe1\x41\x1c"
+			  "\x68\x04\x65\x55\x2a\xa6\xc4\x05"
+			  "\xb7\x76\x4d\x5e\x87\xbe\xa8\x5a"
+			  "\xd0\x0f\x84\x49\xed\x8f\x72\xd0"
+			  "\xd6\x62\xab\x05\x26\x91\xca\x66"
+			  "\x42\x4b\xc8\x6d\x2d\xf8\x0e\xa4"
+			  "\x1f\x43\xab\xf9\x37\xd3\x25\x9d"
+			  "\xc4\xb2\xd0\xdf\xb4\x8a\x6c\x91"
+			  "\x39\xdd\xd7\xf7\x69\x66\xe9\x28"
+			  "\xe6\x35\x55\x3b\xa7\x6c\x5c\x87"
+			  "\x9d\x7b\x35\xd4\x9e\xb2\xe6\x2b"
+			  "\x08\x71\xcd\xac\x63\x89\x39\xe2"
+			  "\x5e\x8a\x1e\x0e\xf9\xd5\x28\x0f"
+			  "\xa8\xca\x32\x8b\x35\x1c\x3c\x76"
+			  "\x59\x89\xcb\xcf\x3d\xaa\x8b\x6c"
+			  "\xcc\x3a\xaf\x9f\x39\x79\xc9\x2b"
+			  "\x37\x20\xfc\x88\xdc\x95\xed\x84"
+			  "\xa1\xbe\x05\x9c\x64\x99\xb9\xfd"
+			  "\xa2\x36\xe7\xe8\x18\xb0\x4b\x0b"
+			  "\xc3\x9c\x1e\x87\x6b\x19\x3b\xfe"
+			  "\x55\x69\x75\x3f\x88\x12\x8c\xc0"
+			  "\x8a\xaa\x9b\x63\xd1\xa1\x6f\x80"
+			  "\xef\x25\x54\xd7\x18\x9c\x41\x1f"
+			  "\x58\x69\xca\x52\xc5\xb8\x3f\xa3"
+			  "\x6f\xf2\x16\xb9\xc1\xd3\x00\x62"
+			  "\xbe\xbc\xfd\x2d\xc5\xbc\xe0\x91"
+			  "\x19\x34\xfd\xa7\x9a\x86\xf6\xe6"
+			  "\x98\xce\xd7\x59\xc3\xff\x9b\x64"
+			  "\x77\x33\x8f\x3d\xa4\xf9\xcd\x85"
+			  "\x14\xea\x99\x82\xcc\xaf\xb3\x41"
+			  "\xb2\x38\x4d\xd9\x02\xf3\xd1\xab"
+			  "\x7a\xc6\x1d\xd2\x9c\x6f\x21\xba"
+			  "\x5b\x86\x2f\x37\x30\xe3\x7c\xfd"
+			  "\xc4\xfd\x80\x6c\x22\xf2\x21",
+		.rlen	= 375,
+	}, { /* RFC7539 A.2. Test Vector #3 */
+		.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+			  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+			  "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+			  "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+		.klen	= 32,
+		.iv     = "\x2a\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x02",
+		.input	= "\x27\x54\x77\x61\x73\x20\x62\x72"
+			  "\x69\x6c\x6c\x69\x67\x2c\x20\x61"
+			  "\x6e\x64\x20\x74\x68\x65\x20\x73"
+			  "\x6c\x69\x74\x68\x79\x20\x74\x6f"
+			  "\x76\x65\x73\x0a\x44\x69\x64\x20"
+			  "\x67\x79\x72\x65\x20\x61\x6e\x64"
+			  "\x20\x67\x69\x6d\x62\x6c\x65\x20"
+			  "\x69\x6e\x20\x74\x68\x65\x20\x77"
+			  "\x61\x62\x65\x3a\x0a\x41\x6c\x6c"
+			  "\x20\x6d\x69\x6d\x73\x79\x20\x77"
+			  "\x65\x72\x65\x20\x74\x68\x65\x20"
+			  "\x62\x6f\x72\x6f\x67\x6f\x76\x65"
+			  "\x73\x2c\x0a\x41\x6e\x64\x20\x74"
+			  "\x68\x65\x20\x6d\x6f\x6d\x65\x20"
+			  "\x72\x61\x74\x68\x73\x20\x6f\x75"
+			  "\x74\x67\x72\x61\x62\x65\x2e",
+		.ilen	= 127,
+		.result	= "\x62\xe6\x34\x7f\x95\xed\x87\xa4"
+			  "\x5f\xfa\xe7\x42\x6f\x27\xa1\xdf"
+			  "\x5f\xb6\x91\x10\x04\x4c\x0d\x73"
+			  "\x11\x8e\xff\xa9\x5b\x01\xe5\xcf"
+			  "\x16\x6d\x3d\xf2\xd7\x21\xca\xf9"
+			  "\xb2\x1e\x5f\xb1\x4c\x61\x68\x71"
+			  "\xfd\x84\xc5\x4f\x9d\x65\xb2\x83"
+			  "\x19\x6c\x7f\xe4\xf6\x05\x53\xeb"
+			  "\xf3\x9c\x64\x02\xc4\x22\x34\xe3"
+			  "\x2a\x35\x6b\x3e\x76\x43\x12\xa6"
+			  "\x1a\x55\x32\x05\x57\x16\xea\xd6"
+			  "\x96\x25\x68\xf8\x7d\x3f\x3f\x77"
+			  "\x04\xc6\xa8\xd1\xbc\xd1\xbf\x4d"
+			  "\x50\xd6\x15\x4b\x6d\xa7\x31\xb1"
+			  "\x87\xb5\x8d\xfd\x72\x8a\xfa\x36"
+			  "\x75\x7a\x79\x7a\xc1\x88\xd1",
+		.rlen	= 127,
+	},
+};
+
 /*
  * CTS (Cipher Text Stealing) mode tests
  */
@@ -28591,7 +30572,7 @@ struct comp_testvec {
 };
 
 struct pcomp_testvec {
-	void *params;
+	const void *params;
 	unsigned int paramsize;
 	int inlen, outlen;
 	char input[COMP_BUF_SIZE];
@@ -28945,6 +30926,440 @@ static struct hash_testvec michael_mic_tv_template[] = {
 	}
 };
 
+/*
+ * CRC32 test vectors
+ */
+#define CRC32_TEST_VECTORS 14
+
+static struct hash_testvec crc32_tv_template[] = {
+	{
+		.key = "\x87\xa9\xcb\xed",
+		.ksize = 4,
+		.psize = 0,
+		.digest = "\x87\xa9\xcb\xed",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08"
+			     "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+			     "\x11\x12\x13\x14\x15\x16\x17\x18"
+			     "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20"
+			     "\x21\x22\x23\x24\x25\x26\x27\x28",
+		.psize = 40,
+		.digest = "\x3a\xdf\x4b\xb0",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30"
+			     "\x31\x32\x33\x34\x35\x36\x37\x38"
+			     "\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40"
+			     "\x41\x42\x43\x44\x45\x46\x47\x48"
+			     "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50",
+		.psize = 40,
+		.digest = "\xa9\x7a\x7f\x7b",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\x51\x52\x53\x54\x55\x56\x57\x58"
+			     "\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60"
+			     "\x61\x62\x63\x64\x65\x66\x67\x68"
+			     "\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
+			     "\x71\x72\x73\x74\x75\x76\x77\x78",
+		.psize = 40,
+		.digest = "\xba\xd3\xf8\x1c",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80"
+			     "\x81\x82\x83\x84\x85\x86\x87\x88"
+			     "\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90"
+			     "\x91\x92\x93\x94\x95\x96\x97\x98"
+			     "\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0",
+		.psize = 40,
+		.digest = "\xa8\xa9\xc2\x02",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8"
+			     "\xa9\xaa\xab\xac\xad\xae\xaf\xb0"
+			     "\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8"
+			     "\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0"
+			     "\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8",
+		.psize = 40,
+		.digest = "\x27\xf0\x57\xe2",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0"
+			     "\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8"
+			     "\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0"
+			     "\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
+			     "\xe9\xea\xeb\xec\xed\xee\xef\xf0",
+		.psize = 40,
+		.digest = "\x49\x78\x10\x08",
+	},
+	{
+		.key = "\x80\xea\xd3\xf1",
+		.ksize = 4,
+		.plaintext = "\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30"
+			     "\x31\x32\x33\x34\x35\x36\x37\x38"
+			     "\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40"
+			     "\x41\x42\x43\x44\x45\x46\x47\x48"
+			     "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50",
+		.psize = 40,
+		.digest = "\x9a\xb1\xdc\xf0",
+	},
+	{
+		.key = "\xf3\x4a\x1d\x5d",
+		.ksize = 4,
+		.plaintext = "\x51\x52\x53\x54\x55\x56\x57\x58"
+			     "\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60"
+			     "\x61\x62\x63\x64\x65\x66\x67\x68"
+			     "\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
+			     "\x71\x72\x73\x74\x75\x76\x77\x78",
+		.psize = 40,
+		.digest = "\xb4\x97\xcc\xd4",
+	},
+	{
+		.key = "\x2e\x80\x04\x59",
+		.ksize = 4,
+		.plaintext = "\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80"
+			     "\x81\x82\x83\x84\x85\x86\x87\x88"
+			     "\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90"
+			     "\x91\x92\x93\x94\x95\x96\x97\x98"
+			     "\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0",
+		.psize = 40,
+		.digest = "\x67\x9b\xfa\x79",
+	},
+	{
+		.key = "\xa6\xcc\x19\x85",
+		.ksize = 4,
+		.plaintext = "\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8"
+			     "\xa9\xaa\xab\xac\xad\xae\xaf\xb0"
+			     "\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8"
+			     "\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0"
+			     "\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8",
+		.psize = 40,
+		.digest = "\x24\xb5\x16\xef",
+	},
+	{
+		.key = "\x41\xfc\xfe\x2d",
+		.ksize = 4,
+		.plaintext = "\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0"
+			     "\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8"
+			     "\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0"
+			     "\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
+			     "\xe9\xea\xeb\xec\xed\xee\xef\xf0",
+		.psize = 40,
+		.digest = "\x15\x94\x80\x39",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08"
+			     "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+			     "\x11\x12\x13\x14\x15\x16\x17\x18"
+			     "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20"
+			     "\x21\x22\x23\x24\x25\x26\x27\x28"
+			     "\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30"
+			     "\x31\x32\x33\x34\x35\x36\x37\x38"
+			     "\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40"
+			     "\x41\x42\x43\x44\x45\x46\x47\x48"
+			     "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50"
+			     "\x51\x52\x53\x54\x55\x56\x57\x58"
+			     "\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60"
+			     "\x61\x62\x63\x64\x65\x66\x67\x68"
+			     "\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
+			     "\x71\x72\x73\x74\x75\x76\x77\x78"
+			     "\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80"
+			     "\x81\x82\x83\x84\x85\x86\x87\x88"
+			     "\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90"
+			     "\x91\x92\x93\x94\x95\x96\x97\x98"
+			     "\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0"
+			     "\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8"
+			     "\xa9\xaa\xab\xac\xad\xae\xaf\xb0"
+			     "\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8"
+			     "\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0"
+			     "\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8"
+			     "\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0"
+			     "\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8"
+			     "\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0"
+			     "\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
+			     "\xe9\xea\xeb\xec\xed\xee\xef\xf0",
+		.psize = 240,
+		.digest = "\x6c\xc6\x56\xde",
+		.np = 2,
+		.tap = { 31, 209 }
+	}, {
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext =	"\x6e\x05\x79\x10\xa7\x1b\xb2\x49"
+				"\xe0\x54\xeb\x82\x19\x8d\x24\xbb"
+				"\x2f\xc6\x5d\xf4\x68\xff\x96\x0a"
+				"\xa1\x38\xcf\x43\xda\x71\x08\x7c"
+				"\x13\xaa\x1e\xb5\x4c\xe3\x57\xee"
+				"\x85\x1c\x90\x27\xbe\x32\xc9\x60"
+				"\xf7\x6b\x02\x99\x0d\xa4\x3b\xd2"
+				"\x46\xdd\x74\x0b\x7f\x16\xad\x21"
+				"\xb8\x4f\xe6\x5a\xf1\x88\x1f\x93"
+				"\x2a\xc1\x35\xcc\x63\xfa\x6e\x05"
+				"\x9c\x10\xa7\x3e\xd5\x49\xe0\x77"
+				"\x0e\x82\x19\xb0\x24\xbb\x52\xe9"
+				"\x5d\xf4\x8b\x22\x96\x2d\xc4\x38"
+				"\xcf\x66\xfd\x71\x08\x9f\x13\xaa"
+				"\x41\xd8\x4c\xe3\x7a\x11\x85\x1c"
+				"\xb3\x27\xbe\x55\xec\x60\xf7\x8e"
+				"\x02\x99\x30\xc7\x3b\xd2\x69\x00"
+				"\x74\x0b\xa2\x16\xad\x44\xdb\x4f"
+				"\xe6\x7d\x14\x88\x1f\xb6\x2a\xc1"
+				"\x58\xef\x63\xfa\x91\x05\x9c\x33"
+				"\xca\x3e\xd5\x6c\x03\x77\x0e\xa5"
+				"\x19\xb0\x47\xde\x52\xe9\x80\x17"
+				"\x8b\x22\xb9\x2d\xc4\x5b\xf2\x66"
+				"\xfd\x94\x08\x9f\x36\xcd\x41\xd8"
+				"\x6f\x06\x7a\x11\xa8\x1c\xb3\x4a"
+				"\xe1\x55\xec\x83\x1a\x8e\x25\xbc"
+				"\x30\xc7\x5e\xf5\x69\x00\x97\x0b"
+				"\xa2\x39\xd0\x44\xdb\x72\x09\x7d"
+				"\x14\xab\x1f\xb6\x4d\xe4\x58\xef"
+				"\x86\x1d\x91\x28\xbf\x33\xca\x61"
+				"\xf8\x6c\x03\x9a\x0e\xa5\x3c\xd3"
+				"\x47\xde\x75\x0c\x80\x17\xae\x22"
+				"\xb9\x50\xe7\x5b\xf2\x89\x20\x94"
+				"\x2b\xc2\x36\xcd\x64\xfb\x6f\x06"
+				"\x9d\x11\xa8\x3f\xd6\x4a\xe1\x78"
+				"\x0f\x83\x1a\xb1\x25\xbc\x53\xea"
+				"\x5e\xf5\x8c\x00\x97\x2e\xc5\x39"
+				"\xd0\x67\xfe\x72\x09\xa0\x14\xab"
+				"\x42\xd9\x4d\xe4\x7b\x12\x86\x1d"
+				"\xb4\x28\xbf\x56\xed\x61\xf8\x8f"
+				"\x03\x9a\x31\xc8\x3c\xd3\x6a\x01"
+				"\x75\x0c\xa3\x17\xae\x45\xdc\x50"
+				"\xe7\x7e\x15\x89\x20\xb7\x2b\xc2"
+				"\x59\xf0\x64\xfb\x92\x06\x9d\x34"
+				"\xcb\x3f\xd6\x6d\x04\x78\x0f\xa6"
+				"\x1a\xb1\x48\xdf\x53\xea\x81\x18"
+				"\x8c\x23\xba\x2e\xc5\x5c\xf3\x67"
+				"\xfe\x95\x09\xa0\x37\xce\x42\xd9"
+				"\x70\x07\x7b\x12\xa9\x1d\xb4\x4b"
+				"\xe2\x56\xed\x84\x1b\x8f\x26\xbd"
+				"\x31\xc8\x5f\xf6\x6a\x01\x98\x0c"
+				"\xa3\x3a\xd1\x45\xdc\x73\x0a\x7e"
+				"\x15\xac\x20\xb7\x4e\xe5\x59\xf0"
+				"\x87\x1e\x92\x29\xc0\x34\xcb\x62"
+				"\xf9\x6d\x04\x9b\x0f\xa6\x3d\xd4"
+				"\x48\xdf\x76\x0d\x81\x18\xaf\x23"
+				"\xba\x51\xe8\x5c\xf3\x8a\x21\x95"
+				"\x2c\xc3\x37\xce\x65\xfc\x70\x07"
+				"\x9e\x12\xa9\x40\xd7\x4b\xe2\x79"
+				"\x10\x84\x1b\xb2\x26\xbd\x54\xeb"
+				"\x5f\xf6\x8d\x01\x98\x2f\xc6\x3a"
+				"\xd1\x68\xff\x73\x0a\xa1\x15\xac"
+				"\x43\xda\x4e\xe5\x7c\x13\x87\x1e"
+				"\xb5\x29\xc0\x57\xee\x62\xf9\x90"
+				"\x04\x9b\x32\xc9\x3d\xd4\x6b\x02"
+				"\x76\x0d\xa4\x18\xaf\x46\xdd\x51"
+				"\xe8\x7f\x16\x8a\x21\xb8\x2c\xc3"
+				"\x5a\xf1\x65\xfc\x93\x07\x9e\x35"
+				"\xcc\x40\xd7\x6e\x05\x79\x10\xa7"
+				"\x1b\xb2\x49\xe0\x54\xeb\x82\x19"
+				"\x8d\x24\xbb\x2f\xc6\x5d\xf4\x68"
+				"\xff\x96\x0a\xa1\x38\xcf\x43\xda"
+				"\x71\x08\x7c\x13\xaa\x1e\xb5\x4c"
+				"\xe3\x57\xee\x85\x1c\x90\x27\xbe"
+				"\x32\xc9\x60\xf7\x6b\x02\x99\x0d"
+				"\xa4\x3b\xd2\x46\xdd\x74\x0b\x7f"
+				"\x16\xad\x21\xb8\x4f\xe6\x5a\xf1"
+				"\x88\x1f\x93\x2a\xc1\x35\xcc\x63"
+				"\xfa\x6e\x05\x9c\x10\xa7\x3e\xd5"
+				"\x49\xe0\x77\x0e\x82\x19\xb0\x24"
+				"\xbb\x52\xe9\x5d\xf4\x8b\x22\x96"
+				"\x2d\xc4\x38\xcf\x66\xfd\x71\x08"
+				"\x9f\x13\xaa\x41\xd8\x4c\xe3\x7a"
+				"\x11\x85\x1c\xb3\x27\xbe\x55\xec"
+				"\x60\xf7\x8e\x02\x99\x30\xc7\x3b"
+				"\xd2\x69\x00\x74\x0b\xa2\x16\xad"
+				"\x44\xdb\x4f\xe6\x7d\x14\x88\x1f"
+				"\xb6\x2a\xc1\x58\xef\x63\xfa\x91"
+				"\x05\x9c\x33\xca\x3e\xd5\x6c\x03"
+				"\x77\x0e\xa5\x19\xb0\x47\xde\x52"
+				"\xe9\x80\x17\x8b\x22\xb9\x2d\xc4"
+				"\x5b\xf2\x66\xfd\x94\x08\x9f\x36"
+				"\xcd\x41\xd8\x6f\x06\x7a\x11\xa8"
+				"\x1c\xb3\x4a\xe1\x55\xec\x83\x1a"
+				"\x8e\x25\xbc\x30\xc7\x5e\xf5\x69"
+				"\x00\x97\x0b\xa2\x39\xd0\x44\xdb"
+				"\x72\x09\x7d\x14\xab\x1f\xb6\x4d"
+				"\xe4\x58\xef\x86\x1d\x91\x28\xbf"
+				"\x33\xca\x61\xf8\x6c\x03\x9a\x0e"
+				"\xa5\x3c\xd3\x47\xde\x75\x0c\x80"
+				"\x17\xae\x22\xb9\x50\xe7\x5b\xf2"
+				"\x89\x20\x94\x2b\xc2\x36\xcd\x64"
+				"\xfb\x6f\x06\x9d\x11\xa8\x3f\xd6"
+				"\x4a\xe1\x78\x0f\x83\x1a\xb1\x25"
+				"\xbc\x53\xea\x5e\xf5\x8c\x00\x97"
+				"\x2e\xc5\x39\xd0\x67\xfe\x72\x09"
+				"\xa0\x14\xab\x42\xd9\x4d\xe4\x7b"
+				"\x12\x86\x1d\xb4\x28\xbf\x56\xed"
+				"\x61\xf8\x8f\x03\x9a\x31\xc8\x3c"
+				"\xd3\x6a\x01\x75\x0c\xa3\x17\xae"
+				"\x45\xdc\x50\xe7\x7e\x15\x89\x20"
+				"\xb7\x2b\xc2\x59\xf0\x64\xfb\x92"
+				"\x06\x9d\x34\xcb\x3f\xd6\x6d\x04"
+				"\x78\x0f\xa6\x1a\xb1\x48\xdf\x53"
+				"\xea\x81\x18\x8c\x23\xba\x2e\xc5"
+				"\x5c\xf3\x67\xfe\x95\x09\xa0\x37"
+				"\xce\x42\xd9\x70\x07\x7b\x12\xa9"
+				"\x1d\xb4\x4b\xe2\x56\xed\x84\x1b"
+				"\x8f\x26\xbd\x31\xc8\x5f\xf6\x6a"
+				"\x01\x98\x0c\xa3\x3a\xd1\x45\xdc"
+				"\x73\x0a\x7e\x15\xac\x20\xb7\x4e"
+				"\xe5\x59\xf0\x87\x1e\x92\x29\xc0"
+				"\x34\xcb\x62\xf9\x6d\x04\x9b\x0f"
+				"\xa6\x3d\xd4\x48\xdf\x76\x0d\x81"
+				"\x18\xaf\x23\xba\x51\xe8\x5c\xf3"
+				"\x8a\x21\x95\x2c\xc3\x37\xce\x65"
+				"\xfc\x70\x07\x9e\x12\xa9\x40\xd7"
+				"\x4b\xe2\x79\x10\x84\x1b\xb2\x26"
+				"\xbd\x54\xeb\x5f\xf6\x8d\x01\x98"
+				"\x2f\xc6\x3a\xd1\x68\xff\x73\x0a"
+				"\xa1\x15\xac\x43\xda\x4e\xe5\x7c"
+				"\x13\x87\x1e\xb5\x29\xc0\x57\xee"
+				"\x62\xf9\x90\x04\x9b\x32\xc9\x3d"
+				"\xd4\x6b\x02\x76\x0d\xa4\x18\xaf"
+				"\x46\xdd\x51\xe8\x7f\x16\x8a\x21"
+				"\xb8\x2c\xc3\x5a\xf1\x65\xfc\x93"
+				"\x07\x9e\x35\xcc\x40\xd7\x6e\x05"
+				"\x79\x10\xa7\x1b\xb2\x49\xe0\x54"
+				"\xeb\x82\x19\x8d\x24\xbb\x2f\xc6"
+				"\x5d\xf4\x68\xff\x96\x0a\xa1\x38"
+				"\xcf\x43\xda\x71\x08\x7c\x13\xaa"
+				"\x1e\xb5\x4c\xe3\x57\xee\x85\x1c"
+				"\x90\x27\xbe\x32\xc9\x60\xf7\x6b"
+				"\x02\x99\x0d\xa4\x3b\xd2\x46\xdd"
+				"\x74\x0b\x7f\x16\xad\x21\xb8\x4f"
+				"\xe6\x5a\xf1\x88\x1f\x93\x2a\xc1"
+				"\x35\xcc\x63\xfa\x6e\x05\x9c\x10"
+				"\xa7\x3e\xd5\x49\xe0\x77\x0e\x82"
+				"\x19\xb0\x24\xbb\x52\xe9\x5d\xf4"
+				"\x8b\x22\x96\x2d\xc4\x38\xcf\x66"
+				"\xfd\x71\x08\x9f\x13\xaa\x41\xd8"
+				"\x4c\xe3\x7a\x11\x85\x1c\xb3\x27"
+				"\xbe\x55\xec\x60\xf7\x8e\x02\x99"
+				"\x30\xc7\x3b\xd2\x69\x00\x74\x0b"
+				"\xa2\x16\xad\x44\xdb\x4f\xe6\x7d"
+				"\x14\x88\x1f\xb6\x2a\xc1\x58\xef"
+				"\x63\xfa\x91\x05\x9c\x33\xca\x3e"
+				"\xd5\x6c\x03\x77\x0e\xa5\x19\xb0"
+				"\x47\xde\x52\xe9\x80\x17\x8b\x22"
+				"\xb9\x2d\xc4\x5b\xf2\x66\xfd\x94"
+				"\x08\x9f\x36\xcd\x41\xd8\x6f\x06"
+				"\x7a\x11\xa8\x1c\xb3\x4a\xe1\x55"
+				"\xec\x83\x1a\x8e\x25\xbc\x30\xc7"
+				"\x5e\xf5\x69\x00\x97\x0b\xa2\x39"
+				"\xd0\x44\xdb\x72\x09\x7d\x14\xab"
+				"\x1f\xb6\x4d\xe4\x58\xef\x86\x1d"
+				"\x91\x28\xbf\x33\xca\x61\xf8\x6c"
+				"\x03\x9a\x0e\xa5\x3c\xd3\x47\xde"
+				"\x75\x0c\x80\x17\xae\x22\xb9\x50"
+				"\xe7\x5b\xf2\x89\x20\x94\x2b\xc2"
+				"\x36\xcd\x64\xfb\x6f\x06\x9d\x11"
+				"\xa8\x3f\xd6\x4a\xe1\x78\x0f\x83"
+				"\x1a\xb1\x25\xbc\x53\xea\x5e\xf5"
+				"\x8c\x00\x97\x2e\xc5\x39\xd0\x67"
+				"\xfe\x72\x09\xa0\x14\xab\x42\xd9"
+				"\x4d\xe4\x7b\x12\x86\x1d\xb4\x28"
+				"\xbf\x56\xed\x61\xf8\x8f\x03\x9a"
+				"\x31\xc8\x3c\xd3\x6a\x01\x75\x0c"
+				"\xa3\x17\xae\x45\xdc\x50\xe7\x7e"
+				"\x15\x89\x20\xb7\x2b\xc2\x59\xf0"
+				"\x64\xfb\x92\x06\x9d\x34\xcb\x3f"
+				"\xd6\x6d\x04\x78\x0f\xa6\x1a\xb1"
+				"\x48\xdf\x53\xea\x81\x18\x8c\x23"
+				"\xba\x2e\xc5\x5c\xf3\x67\xfe\x95"
+				"\x09\xa0\x37\xce\x42\xd9\x70\x07"
+				"\x7b\x12\xa9\x1d\xb4\x4b\xe2\x56"
+				"\xed\x84\x1b\x8f\x26\xbd\x31\xc8"
+				"\x5f\xf6\x6a\x01\x98\x0c\xa3\x3a"
+				"\xd1\x45\xdc\x73\x0a\x7e\x15\xac"
+				"\x20\xb7\x4e\xe5\x59\xf0\x87\x1e"
+				"\x92\x29\xc0\x34\xcb\x62\xf9\x6d"
+				"\x04\x9b\x0f\xa6\x3d\xd4\x48\xdf"
+				"\x76\x0d\x81\x18\xaf\x23\xba\x51"
+				"\xe8\x5c\xf3\x8a\x21\x95\x2c\xc3"
+				"\x37\xce\x65\xfc\x70\x07\x9e\x12"
+				"\xa9\x40\xd7\x4b\xe2\x79\x10\x84"
+				"\x1b\xb2\x26\xbd\x54\xeb\x5f\xf6"
+				"\x8d\x01\x98\x2f\xc6\x3a\xd1\x68"
+				"\xff\x73\x0a\xa1\x15\xac\x43\xda"
+				"\x4e\xe5\x7c\x13\x87\x1e\xb5\x29"
+				"\xc0\x57\xee\x62\xf9\x90\x04\x9b"
+				"\x32\xc9\x3d\xd4\x6b\x02\x76\x0d"
+				"\xa4\x18\xaf\x46\xdd\x51\xe8\x7f"
+				"\x16\x8a\x21\xb8\x2c\xc3\x5a\xf1"
+				"\x65\xfc\x93\x07\x9e\x35\xcc\x40"
+				"\xd7\x6e\x05\x79\x10\xa7\x1b\xb2"
+				"\x49\xe0\x54\xeb\x82\x19\x8d\x24"
+				"\xbb\x2f\xc6\x5d\xf4\x68\xff\x96"
+				"\x0a\xa1\x38\xcf\x43\xda\x71\x08"
+				"\x7c\x13\xaa\x1e\xb5\x4c\xe3\x57"
+				"\xee\x85\x1c\x90\x27\xbe\x32\xc9"
+				"\x60\xf7\x6b\x02\x99\x0d\xa4\x3b"
+				"\xd2\x46\xdd\x74\x0b\x7f\x16\xad"
+				"\x21\xb8\x4f\xe6\x5a\xf1\x88\x1f"
+				"\x93\x2a\xc1\x35\xcc\x63\xfa\x6e"
+				"\x05\x9c\x10\xa7\x3e\xd5\x49\xe0"
+				"\x77\x0e\x82\x19\xb0\x24\xbb\x52"
+				"\xe9\x5d\xf4\x8b\x22\x96\x2d\xc4"
+				"\x38\xcf\x66\xfd\x71\x08\x9f\x13"
+				"\xaa\x41\xd8\x4c\xe3\x7a\x11\x85"
+				"\x1c\xb3\x27\xbe\x55\xec\x60\xf7"
+				"\x8e\x02\x99\x30\xc7\x3b\xd2\x69"
+				"\x00\x74\x0b\xa2\x16\xad\x44\xdb"
+				"\x4f\xe6\x7d\x14\x88\x1f\xb6\x2a"
+				"\xc1\x58\xef\x63\xfa\x91\x05\x9c"
+				"\x33\xca\x3e\xd5\x6c\x03\x77\x0e"
+				"\xa5\x19\xb0\x47\xde\x52\xe9\x80"
+				"\x17\x8b\x22\xb9\x2d\xc4\x5b\xf2"
+				"\x66\xfd\x94\x08\x9f\x36\xcd\x41"
+				"\xd8\x6f\x06\x7a\x11\xa8\x1c\xb3"
+				"\x4a\xe1\x55\xec\x83\x1a\x8e\x25"
+				"\xbc\x30\xc7\x5e\xf5\x69\x00\x97"
+				"\x0b\xa2\x39\xd0\x44\xdb\x72\x09"
+				"\x7d\x14\xab\x1f\xb6\x4d\xe4\x58"
+				"\xef\x86\x1d\x91\x28\xbf\x33\xca"
+				"\x61\xf8\x6c\x03\x9a\x0e\xa5\x3c"
+				"\xd3\x47\xde\x75\x0c\x80\x17\xae"
+				"\x22\xb9\x50\xe7\x5b\xf2\x89\x20"
+				"\x94\x2b\xc2\x36\xcd\x64\xfb\x6f"
+				"\x06\x9d\x11\xa8\x3f\xd6\x4a\xe1"
+				"\x78\x0f\x83\x1a\xb1\x25\xbc\x53"
+				"\xea\x5e\xf5\x8c\x00\x97\x2e\xc5"
+				"\x39\xd0\x67\xfe\x72\x09\xa0\x14"
+				"\xab\x42\xd9\x4d\xe4\x7b\x12\x86"
+				"\x1d\xb4\x28\xbf\x56\xed\x61\xf8"
+				"\x8f\x03\x9a\x31\xc8\x3c\xd3\x6a"
+				"\x01\x75\x0c\xa3\x17\xae\x45\xdc"
+				"\x50\xe7\x7e\x15\x89\x20\xb7\x2b"
+				"\xc2\x59\xf0\x64\xfb\x92\x06\x9d"
+				"\x34\xcb\x3f\xd6\x6d\x04\x78\x0f"
+				"\xa6\x1a\xb1\x48\xdf\x53\xea\x81"
+				"\x18\x8c\x23\xba\x2e\xc5\x5c\xf3"
+				"\x67\xfe\x95\x09\xa0\x37\xce\x42"
+				"\xd9\x70\x07\x7b\x12\xa9\x1d\xb4"
+				"\x4b\xe2\x56\xed\x84\x1b\x8f\x26"
+				"\xbd\x31\xc8\x5f\xf6\x6a\x01\x98",
+		.psize = 2048,
+		.digest = "\xfb\x3a\x7a\xda",
+	}
+};
+
 /*
  * CRC32C test vectors
  */

+ 2 - 2
crypto/zlib.c

@@ -78,7 +78,7 @@ static void zlib_exit(struct crypto_tfm *tfm)
 }
 
 
-static int zlib_compress_setup(struct crypto_pcomp *tfm, void *params,
+static int zlib_compress_setup(struct crypto_pcomp *tfm, const void *params,
 			       unsigned int len)
 {
 	struct zlib_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
@@ -209,7 +209,7 @@ static int zlib_compress_final(struct crypto_pcomp *tfm,
 }
 
 
-static int zlib_decompress_setup(struct crypto_pcomp *tfm, void *params,
+static int zlib_decompress_setup(struct crypto_pcomp *tfm, const void *params,
 				 unsigned int len)
 {
 	struct zlib_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));

+ 117 - 0
drivers/bus/mvebu-mbus.c

@@ -57,6 +57,7 @@
 #include <linux/of_address.h>
 #include <linux/debugfs.h>
 #include <linux/log2.h>
+#include <linux/memblock.h>
 #include <linux/syscore_ops.h>
 
 /*
@@ -152,13 +153,39 @@ struct mvebu_mbus_state {
 
 static struct mvebu_mbus_state mbus_state;
 
+/*
+ * We provide two variants of the mv_mbus_dram_info() function:
+ *
+ * - The normal one, where the described DRAM ranges may overlap with
+ *   the I/O windows, but for which the DRAM ranges are guaranteed to
+ *   have a power of two size. Such ranges are suitable for the DMA
+ *   masters that only DMA between the RAM and the device, which is
+ *   actually all devices except the crypto engines.
+ *
+ * - The 'nooverlap' one, where the described DRAM ranges are
+ *   guaranteed to not overlap with the I/O windows, but for which the
+ *   DRAM ranges will not have power of two sizes. They will only be
+ *   aligned on a 64 KB boundary, and have a size multiple of 64
+ *   KB. Such ranges are suitable for the DMA masters that DMA between
+ *   the crypto SRAM (which is mapped through an I/O window) and a
+ *   device. This is the case for the crypto engines.
+ */
+
 static struct mbus_dram_target_info mvebu_mbus_dram_info;
+static struct mbus_dram_target_info mvebu_mbus_dram_info_nooverlap;
+
 const struct mbus_dram_target_info *mv_mbus_dram_info(void)
 {
 	return &mvebu_mbus_dram_info;
 }
 EXPORT_SYMBOL_GPL(mv_mbus_dram_info);
 
+const struct mbus_dram_target_info *mv_mbus_dram_info_nooverlap(void)
+{
+	return &mvebu_mbus_dram_info_nooverlap;
+}
+EXPORT_SYMBOL_GPL(mv_mbus_dram_info_nooverlap);
+
 /* Checks whether the given window has remap capability */
 static bool mvebu_mbus_window_is_remappable(struct mvebu_mbus_state *mbus,
 					    const int win)
@@ -576,6 +603,95 @@ static unsigned int armada_xp_mbus_win_remap_offset(int win)
 		return MVEBU_MBUS_NO_REMAP;
 }
 
+/*
+ * Use the memblock information to find the MBus bridge hole in the
+ * physical address space.
+ */
+static void __init
+mvebu_mbus_find_bridge_hole(uint64_t *start, uint64_t *end)
+{
+	struct memblock_region *r;
+	uint64_t s = 0;
+
+	for_each_memblock(memory, r) {
+		/*
+		 * This part of the memory is above 4 GB, so we don't
+		 * care for the MBus bridge hole.
+		 */
+		if (r->base >= 0x100000000ULL)
+			continue;
+
+		/*
+		 * The MBus bridge hole is at the end of the RAM under
+		 * the 4 GB limit.
+		 */
+		if (r->base + r->size > s)
+			s = r->base + r->size;
+	}
+
+	*start = s;
+	*end = 0x100000000ULL;
+}
+
+/*
+ * This function fills in the mvebu_mbus_dram_info_nooverlap data
+ * structure, by looking at the mvebu_mbus_dram_info data, and
+ * removing the parts of it that overlap with I/O windows.
+ */
+static void __init
+mvebu_mbus_setup_cpu_target_nooverlap(struct mvebu_mbus_state *mbus)
+{
+	uint64_t mbus_bridge_base, mbus_bridge_end;
+	int cs_nooverlap = 0;
+	int i;
+
+	mvebu_mbus_find_bridge_hole(&mbus_bridge_base, &mbus_bridge_end);
+
+	for (i = 0; i < mvebu_mbus_dram_info.num_cs; i++) {
+		struct mbus_dram_window *w;
+		u64 base, size, end;
+
+		w = &mvebu_mbus_dram_info.cs[i];
+		base = w->base;
+		size = w->size;
+		end = base + size;
+
+		/*
+		 * The CS is fully enclosed inside the MBus bridge
+		 * area, so ignore it.
+		 */
+		if (base >= mbus_bridge_base && end <= mbus_bridge_end)
+			continue;
+
+		/*
+		 * Beginning of CS overlaps with end of MBus, raise CS
+		 * base address, and shrink its size.
+		 */
+		if (base >= mbus_bridge_base && end > mbus_bridge_end) {
+			size -= mbus_bridge_end - base;
+			base = mbus_bridge_end;
+		}
+
+		/*
+		 * End of CS overlaps with beginning of MBus, shrink
+		 * CS size.
+		 */
+		if (base < mbus_bridge_base && end > mbus_bridge_base)
+			size -= end - mbus_bridge_base;
+
+		w = &mvebu_mbus_dram_info_nooverlap.cs[cs_nooverlap++];
+		w->cs_index = i;
+		w->mbus_attr = 0xf & ~(1 << i);
+		if (mbus->hw_io_coherency)
+			w->mbus_attr |= ATTR_HW_COHERENCY;
+		w->base = base;
+		w->size = size;
+	}
+
+	mvebu_mbus_dram_info_nooverlap.mbus_dram_target_id = TARGET_DDR;
+	mvebu_mbus_dram_info_nooverlap.num_cs = cs_nooverlap;
+}
+
 static void __init
 mvebu_mbus_default_setup_cpu_target(struct mvebu_mbus_state *mbus)
 {
@@ -964,6 +1080,7 @@ static int __init mvebu_mbus_common_init(struct mvebu_mbus_state *mbus,
 		mvebu_mbus_disable_window(mbus, win);
 
 	mbus->soc->setup_cpu_target(mbus);
+	mvebu_mbus_setup_cpu_target_nooverlap(mbus);
 
 	if (is_coherent)
 		writel(UNIT_SYNC_BARRIER_ALL,

+ 79 - 1
drivers/char/random.c

@@ -409,6 +409,9 @@ static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
 static DECLARE_WAIT_QUEUE_HEAD(urandom_init_wait);
 static struct fasync_struct *fasync;
 
+static DEFINE_SPINLOCK(random_ready_list_lock);
+static LIST_HEAD(random_ready_list);
+
 /**********************************************************************
  *
  * OS independent entropy store.   Here are the functions which handle
@@ -589,6 +592,22 @@ static void fast_mix(struct fast_pool *f)
 	f->count++;
 }
 
+static void process_random_ready_list(void)
+{
+	unsigned long flags;
+	struct random_ready_callback *rdy, *tmp;
+
+	spin_lock_irqsave(&random_ready_list_lock, flags);
+	list_for_each_entry_safe(rdy, tmp, &random_ready_list, list) {
+		struct module *owner = rdy->owner;
+
+		list_del_init(&rdy->list);
+		rdy->func(rdy);
+		module_put(owner);
+	}
+	spin_unlock_irqrestore(&random_ready_list_lock, flags);
+}
+
 /*
  * Credit (or debit) the entropy store with n bits of entropy.
  * Use credit_entropy_bits_safe() if the value comes from userspace
@@ -660,7 +679,8 @@ retry:
 		r->entropy_total = 0;
 		if (r == &nonblocking_pool) {
 			prandom_reseed_late();
-			wake_up_interruptible(&urandom_init_wait);
+			process_random_ready_list();
+			wake_up_all(&urandom_init_wait);
 			pr_notice("random: %s pool is initialized\n", r->name);
 		}
 	}
@@ -1244,6 +1264,64 @@ void get_random_bytes(void *buf, int nbytes)
 }
 EXPORT_SYMBOL(get_random_bytes);
 
+/*
+ * Add a callback function that will be invoked when the nonblocking
+ * pool is initialised.
+ *
+ * returns: 0 if callback is successfully added
+ *	    -EALREADY if pool is already initialised (callback not called)
+ *	    -ENOENT if module for callback is not alive
+ */
+int add_random_ready_callback(struct random_ready_callback *rdy)
+{
+	struct module *owner;
+	unsigned long flags;
+	int err = -EALREADY;
+
+	if (likely(nonblocking_pool.initialized))
+		return err;
+
+	owner = rdy->owner;
+	if (!try_module_get(owner))
+		return -ENOENT;
+
+	spin_lock_irqsave(&random_ready_list_lock, flags);
+	if (nonblocking_pool.initialized)
+		goto out;
+
+	owner = NULL;
+
+	list_add(&rdy->list, &random_ready_list);
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&random_ready_list_lock, flags);
+
+	module_put(owner);
+
+	return err;
+}
+EXPORT_SYMBOL(add_random_ready_callback);
+
+/*
+ * Delete a previously registered readiness callback function.
+ */
+void del_random_ready_callback(struct random_ready_callback *rdy)
+{
+	unsigned long flags;
+	struct module *owner = NULL;
+
+	spin_lock_irqsave(&random_ready_list_lock, flags);
+	if (!list_empty(&rdy->list)) {
+		list_del_init(&rdy->list);
+		owner = rdy->owner;
+	}
+	spin_unlock_irqrestore(&random_ready_list_lock, flags);
+
+	module_put(owner);
+}
+EXPORT_SYMBOL(del_random_ready_callback);
+
 /*
  * This function will use the architecture-specific hardware random
  * number generator if it is available.  The arch-specific hw RNG will

+ 54 - 33
drivers/crypto/Kconfig

@@ -162,10 +162,10 @@ config CRYPTO_GHASH_S390
 config CRYPTO_DEV_MV_CESA
 	tristate "Marvell's Cryptographic Engine"
 	depends on PLAT_ORION
-	select CRYPTO_ALGAPI
 	select CRYPTO_AES
-	select CRYPTO_BLKCIPHER2
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_HASH
+	select SRAM
 	help
 	  This driver allows you to utilize the Cryptographic Engines and
 	  Security Accelerator (CESA) which can be found on the Marvell Orion
@@ -173,10 +173,27 @@ config CRYPTO_DEV_MV_CESA
 
 	  Currently the driver supports AES in ECB and CBC mode without DMA.
 
+config CRYPTO_DEV_MARVELL_CESA
+	tristate "New Marvell's Cryptographic Engine driver"
+	depends on PLAT_ORION || ARCH_MVEBU
+	select CRYPTO_AES
+	select CRYPTO_DES
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_HASH
+	select SRAM
+	help
+	  This driver allows you to utilize the Cryptographic Engines and
+	  Security Accelerator (CESA) which can be found on the Armada 370.
+	  This driver supports CPU offload through DMA transfers.
+
+	  This driver is aimed at replacing the mv_cesa driver. This will only
+	  happen once it has received proper testing.
+
 config CRYPTO_DEV_NIAGARA2
        tristate "Niagara2 Stream Processing Unit driver"
        select CRYPTO_DES
-       select CRYPTO_ALGAPI
+       select CRYPTO_BLKCIPHER
+       select CRYPTO_HASH
        depends on SPARC64
        help
 	  Each core of a Niagara2 processor contains a Stream
@@ -189,7 +206,6 @@ config CRYPTO_DEV_NIAGARA2
 config CRYPTO_DEV_HIFN_795X
 	tristate "Driver HIFN 795x crypto accelerator chips"
 	select CRYPTO_DES
-	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
 	select HW_RANDOM if CRYPTO_DEV_HIFN_795X_RNG
 	depends on PCI
@@ -208,8 +224,10 @@ source drivers/crypto/caam/Kconfig
 
 config CRYPTO_DEV_TALITOS
 	tristate "Talitos Freescale Security Engine (SEC)"
-	select CRYPTO_ALGAPI
+	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_HASH
 	select HW_RANDOM
 	depends on FSL_SOC
 	help
@@ -222,11 +240,29 @@ config CRYPTO_DEV_TALITOS
 	  To compile this driver as a module, choose M here: the module
 	  will be called talitos.
 
+config CRYPTO_DEV_TALITOS1
+	bool "SEC1 (SEC 1.0 and SEC Lite 1.2)"
+	depends on CRYPTO_DEV_TALITOS
+	depends on PPC_8xx || PPC_82xx
+	default y
+	help
+	  Say 'Y' here to use the Freescale Security Engine (SEC) version 1.0
+	  found on MPC82xx or the Freescale Security Engine (SEC Lite)
+	  version 1.2 found on MPC8xx
+
+config CRYPTO_DEV_TALITOS2
+	bool "SEC2+ (SEC version 2.0 or upper)"
+	depends on CRYPTO_DEV_TALITOS
+	default y if !PPC_8xx
+	help
+	  Say 'Y' here to use the Freescale Security Engine (SEC)
+	  version 2 and following as found on MPC83xx, MPC85xx, etc ...
+
 config CRYPTO_DEV_IXP4XX
 	tristate "Driver for IXP4xx crypto hardware acceleration"
 	depends on ARCH_IXP4XX && IXP4XX_QMGR && IXP4XX_NPE
 	select CRYPTO_DES
-	select CRYPTO_ALGAPI
+	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
 	select CRYPTO_BLKCIPHER
 	help
@@ -236,7 +272,6 @@ config CRYPTO_DEV_PPC4XX
 	tristate "Driver AMCC PPC4xx crypto accelerator"
 	depends on PPC && 4xx
 	select CRYPTO_HASH
-	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
 	help
 	  This option allows you to have support for AMCC crypto acceleration.
@@ -257,7 +292,7 @@ config CRYPTO_DEV_OMAP_AES
 	tristate "Support for OMAP AES hw engine"
 	depends on ARCH_OMAP2 || ARCH_OMAP3 || ARCH_OMAP2PLUS
 	select CRYPTO_AES
-	select CRYPTO_BLKCIPHER2
+	select CRYPTO_BLKCIPHER
 	help
 	  OMAP processors have AES module accelerator. Select this if you
 	  want to use the OMAP module for AES algorithms.
@@ -266,7 +301,7 @@ config CRYPTO_DEV_OMAP_DES
 	tristate "Support for OMAP DES3DES hw engine"
 	depends on ARCH_OMAP2PLUS
 	select CRYPTO_DES
-	select CRYPTO_BLKCIPHER2
+	select CRYPTO_BLKCIPHER
 	help
 	  OMAP processors have DES/3DES module accelerator. Select this if you
 	  want to use the OMAP module for DES and 3DES algorithms. Currently
@@ -276,9 +311,10 @@ config CRYPTO_DEV_OMAP_DES
 config CRYPTO_DEV_PICOXCELL
 	tristate "Support for picoXcell IPSEC and Layer2 crypto engines"
 	depends on ARCH_PICOXCELL && HAVE_CLK
+	select CRYPTO_AEAD
 	select CRYPTO_AES
 	select CRYPTO_AUTHENC
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_DES
 	select CRYPTO_CBC
 	select CRYPTO_ECB
@@ -304,7 +340,6 @@ config CRYPTO_DEV_S5P
 	tristate "Support for Samsung S5PV210/Exynos crypto accelerator"
 	depends on ARCH_S5PV210 || ARCH_EXYNOS
 	select CRYPTO_AES
-	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
 	help
 	  This option allows you to have support for S5P crypto acceleration.
@@ -312,11 +347,13 @@ config CRYPTO_DEV_S5P
 	  algorithms execution.
 
 config CRYPTO_DEV_NX
-	bool "Support for IBM Power7+ in-Nest cryptographic acceleration"
-	depends on PPC64 && IBMVIO && !CPU_LITTLE_ENDIAN
-	default n
+	bool "Support for IBM PowerPC Nest (NX) cryptographic acceleration"
+	depends on PPC64
 	help
-	  Support for Power7+ in-Nest cryptographic acceleration.
+	  This enables support for the NX hardware cryptographic accelerator
+	  coprocessor that is in IBM PowerPC P7+ or later processors.  This
+	  does not actually enable any drivers, it only allows you to select
+	  which acceleration type (encryption and/or compression) to enable.
 
 if CRYPTO_DEV_NX
 	source "drivers/crypto/nx/Kconfig"
@@ -325,7 +362,6 @@ endif
 config CRYPTO_DEV_UX500
 	tristate "Driver for ST-Ericsson UX500 crypto hardware acceleration"
 	depends on ARCH_U8500
-	select CRYPTO_ALGAPI
 	help
 	  Driver for ST-Ericsson UX500 crypto engine.
 
@@ -343,10 +379,7 @@ config CRYPTO_DEV_BFIN_CRC
 config CRYPTO_DEV_ATMEL_AES
 	tristate "Support for Atmel AES hw accelerator"
 	depends on ARCH_AT91
-	select CRYPTO_CBC
-	select CRYPTO_ECB
 	select CRYPTO_AES
-	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
 	select AT_HDMAC
 	help
@@ -361,9 +394,6 @@ config CRYPTO_DEV_ATMEL_TDES
 	tristate "Support for Atmel DES/TDES hw accelerator"
 	depends on ARCH_AT91
 	select CRYPTO_DES
-	select CRYPTO_CBC
-	select CRYPTO_ECB
-	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
 	help
 	  Some Atmel processors have DES/TDES hw accelerator.
@@ -376,10 +406,7 @@ config CRYPTO_DEV_ATMEL_TDES
 config CRYPTO_DEV_ATMEL_SHA
 	tristate "Support for Atmel SHA hw accelerator"
 	depends on ARCH_AT91
-	select CRYPTO_SHA1
-	select CRYPTO_SHA256
-	select CRYPTO_SHA512
-	select CRYPTO_ALGAPI
+	select CRYPTO_HASH
 	help
 	  Some Atmel processors have SHA1/SHA224/SHA256/SHA384/SHA512
 	  hw accelerator.
@@ -392,7 +419,6 @@ config CRYPTO_DEV_ATMEL_SHA
 config CRYPTO_DEV_CCP
 	bool "Support for AMD Cryptographic Coprocessor"
 	depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM
-	default n
 	help
 	  The AMD Cryptographic Coprocessor provides hardware support
 	  for encryption, hashing and related operations.
@@ -404,13 +430,11 @@ endif
 config CRYPTO_DEV_MXS_DCP
 	tristate "Support for Freescale MXS DCP"
 	depends on ARCH_MXS
-	select CRYPTO_SHA1
-	select CRYPTO_SHA256
 	select CRYPTO_CBC
 	select CRYPTO_ECB
 	select CRYPTO_AES
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_ALGAPI
+	select CRYPTO_HASH
 	help
 	  The Freescale i.MX23/i.MX28 has SHA1/SHA256 and AES128 CBC/ECB
 	  co-processor on the die.
@@ -429,7 +453,6 @@ config CRYPTO_DEV_QCE
 	select CRYPTO_CBC
 	select CRYPTO_XTS
 	select CRYPTO_CTR
-	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
 	help
 	  This driver supports Qualcomm crypto engine accelerator
@@ -439,7 +462,6 @@ config CRYPTO_DEV_QCE
 config CRYPTO_DEV_VMX
 	bool "Support for VMX cryptographic acceleration instructions"
 	depends on PPC64
-	default n
 	help
 	  Support for VMX cryptographic acceleration instructions.
 
@@ -449,7 +471,6 @@ config CRYPTO_DEV_IMGTEC_HASH
 	tristate "Imagination Technologies hardware hash accelerator"
 	depends on MIPS || COMPILE_TEST
 	depends on HAS_DMA
-	select CRYPTO_ALGAPI
 	select CRYPTO_MD5
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256

+ 1 - 0
drivers/crypto/Makefile

@@ -9,6 +9,7 @@ obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
 obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
 obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
 obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
+obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell/
 obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
 obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
 n2_crypto-y := n2_core.o n2_asm.o

+ 2 - 3
drivers/crypto/caam/Kconfig

@@ -45,7 +45,6 @@ config CRYPTO_DEV_FSL_CAAM_RINGSIZE
 config CRYPTO_DEV_FSL_CAAM_INTC
 	bool "Job Ring interrupt coalescing"
 	depends on CRYPTO_DEV_FSL_CAAM_JR
-	default n
 	help
 	  Enable the Job Ring's interrupt coalescing feature.
 
@@ -77,8 +76,9 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
 	tristate "Register algorithm implementations with the Crypto API"
 	depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
 	default y
-	select CRYPTO_ALGAPI
+	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
+	select CRYPTO_BLKCIPHER
 	help
 	  Selecting this will offload crypto for users of the
 	  scatterlist crypto API (such as the linux native IPSec
@@ -115,7 +115,6 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API
 config CRYPTO_DEV_FSL_CAAM_DEBUG
 	bool "Enable debug output in CAAM driver"
 	depends on CRYPTO_DEV_FSL_CAAM
-	default n
 	help
 	  Selecting this will enable printing of various debug
 	  information in the CAAM driver.

+ 771 - 726
drivers/crypto/caam/caamalg.c

@@ -65,6 +65,10 @@
 /* max IV is max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
 #define CAAM_MAX_IV_LENGTH		16
 
+#define AEAD_DESC_JOB_IO_LEN		(DESC_JOB_IO_LEN + CAAM_CMD_SZ * 2)
+#define GCM_DESC_JOB_IO_LEN		(AEAD_DESC_JOB_IO_LEN + \
+					 CAAM_CMD_SZ * 4)
+
 /* length of descriptors text */
 #define DESC_AEAD_BASE			(4 * CAAM_CMD_SZ)
 #define DESC_AEAD_ENC_LEN		(DESC_AEAD_BASE + 15 * CAAM_CMD_SZ)
@@ -79,18 +83,16 @@
 #define DESC_AEAD_NULL_DEC_LEN		(DESC_AEAD_NULL_BASE + 17 * CAAM_CMD_SZ)
 
 #define DESC_GCM_BASE			(3 * CAAM_CMD_SZ)
-#define DESC_GCM_ENC_LEN		(DESC_GCM_BASE + 23 * CAAM_CMD_SZ)
-#define DESC_GCM_DEC_LEN		(DESC_GCM_BASE + 19 * CAAM_CMD_SZ)
+#define DESC_GCM_ENC_LEN		(DESC_GCM_BASE + 16 * CAAM_CMD_SZ)
+#define DESC_GCM_DEC_LEN		(DESC_GCM_BASE + 12 * CAAM_CMD_SZ)
 
 #define DESC_RFC4106_BASE		(3 * CAAM_CMD_SZ)
-#define DESC_RFC4106_ENC_LEN		(DESC_RFC4106_BASE + 15 * CAAM_CMD_SZ)
-#define DESC_RFC4106_DEC_LEN		(DESC_RFC4106_BASE + 14 * CAAM_CMD_SZ)
-#define DESC_RFC4106_GIVENC_LEN		(DESC_RFC4106_BASE + 21 * CAAM_CMD_SZ)
+#define DESC_RFC4106_ENC_LEN		(DESC_RFC4106_BASE + 10 * CAAM_CMD_SZ)
+#define DESC_RFC4106_DEC_LEN		(DESC_RFC4106_BASE + 10 * CAAM_CMD_SZ)
 
 #define DESC_RFC4543_BASE		(3 * CAAM_CMD_SZ)
-#define DESC_RFC4543_ENC_LEN		(DESC_RFC4543_BASE + 25 * CAAM_CMD_SZ)
-#define DESC_RFC4543_DEC_LEN		(DESC_RFC4543_BASE + 27 * CAAM_CMD_SZ)
-#define DESC_RFC4543_GIVENC_LEN		(DESC_RFC4543_BASE + 30 * CAAM_CMD_SZ)
+#define DESC_RFC4543_ENC_LEN		(DESC_RFC4543_BASE + 11 * CAAM_CMD_SZ)
+#define DESC_RFC4543_DEC_LEN		(DESC_RFC4543_BASE + 12 * CAAM_CMD_SZ)
 
 #define DESC_ABLKCIPHER_BASE		(3 * CAAM_CMD_SZ)
 #define DESC_ABLKCIPHER_ENC_LEN		(DESC_ABLKCIPHER_BASE + \
@@ -98,8 +100,7 @@
 #define DESC_ABLKCIPHER_DEC_LEN		(DESC_ABLKCIPHER_BASE + \
 					 15 * CAAM_CMD_SZ)
 
-#define DESC_MAX_USED_BYTES		(DESC_RFC4543_GIVENC_LEN + \
-					 CAAM_MAX_KEY_SIZE)
+#define DESC_MAX_USED_BYTES		(CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN)
 #define DESC_MAX_USED_LEN		(DESC_MAX_USED_BYTES / CAAM_CMD_SZ)
 
 #ifdef DEBUG
@@ -258,7 +259,7 @@ static void init_sh_desc_key_aead(u32 *desc, struct caam_ctx *ctx,
 
 static int aead_null_set_sh_desc(struct crypto_aead *aead)
 {
-	struct aead_tfm *tfm = &aead->base.crt_aead;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	bool keys_fit_inline = false;
@@ -273,7 +274,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 	    ctx->split_key_pad_len <= CAAM_DESC_BYTES_MAX)
 		keys_fit_inline = true;
 
-	/* aead_encrypt shared descriptor */
+	/* old_aead_encrypt shared descriptor */
 	desc = ctx->sh_desc_enc;
 
 	init_sh_desc(desc, HDR_SHARE_SERIAL);
@@ -362,7 +363,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 
 	desc = ctx->sh_desc_dec;
 
-	/* aead_decrypt shared descriptor */
+	/* old_aead_decrypt shared descriptor */
 	init_sh_desc(desc, HDR_SHARE_SERIAL);
 
 	/* Skip if already shared */
@@ -383,7 +384,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 
 	/* assoclen + cryptlen = seqinlen - ivsize - authsize */
 	append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM,
-				ctx->authsize + tfm->ivsize);
+				ctx->authsize + ivsize);
 	/* assoclen = (assoclen + cryptlen) - cryptlen */
 	append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 	append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ);
@@ -449,7 +450,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 
 static int aead_set_sh_desc(struct crypto_aead *aead)
 {
-	struct aead_tfm *tfm = &aead->base.crt_aead;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct crypto_tfm *ctfm = crypto_aead_tfm(aead);
 	const char *alg_name = crypto_tfm_alg_name(ctfm);
@@ -496,7 +497,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	    CAAM_DESC_BYTES_MAX)
 		keys_fit_inline = true;
 
-	/* aead_encrypt shared descriptor */
+	/* old_aead_encrypt shared descriptor */
 	desc = ctx->sh_desc_enc;
 
 	/* Note: Context registers are saved. */
@@ -510,7 +511,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
 
 	/* assoclen + cryptlen = seqinlen - ivsize */
-	append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize);
+	append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, ivsize);
 
 	/* assoclen = (assoclen + cryptlen) - cryptlen */
 	append_math_sub(desc, VARSEQINLEN, REG2, REG3, CAAM_CMD_SZ);
@@ -518,7 +519,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	/* read assoc before reading payload */
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
 			     KEY_VLF);
-	aead_append_ld_iv(desc, tfm->ivsize, ctx1_iv_off);
+	aead_append_ld_iv(desc, ivsize, ctx1_iv_off);
 
 	/* Load Counter into CONTEXT1 reg */
 	if (is_rfc3686)
@@ -565,7 +566,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	    CAAM_DESC_BYTES_MAX)
 		keys_fit_inline = true;
 
-	/* aead_decrypt shared descriptor */
+	/* old_aead_decrypt shared descriptor */
 	desc = ctx->sh_desc_dec;
 
 	/* Note: Context registers are saved. */
@@ -577,7 +578,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 
 	/* assoclen + cryptlen = seqinlen - ivsize - authsize */
 	append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM,
-				ctx->authsize + tfm->ivsize);
+				ctx->authsize + ivsize);
 	/* assoclen = (assoclen + cryptlen) - cryptlen */
 	append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 	append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ);
@@ -586,7 +587,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
 			     KEY_VLF);
 
-	aead_append_ld_iv(desc, tfm->ivsize, ctx1_iv_off);
+	aead_append_ld_iv(desc, ivsize, ctx1_iv_off);
 
 	/* Load Counter into CONTEXT1 reg */
 	if (is_rfc3686)
@@ -645,20 +646,20 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	/* Generate IV */
 	geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
 		NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 |
-		NFIFOENTRY_PTYPE_RND | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT);
+		NFIFOENTRY_PTYPE_RND | (ivsize << NFIFOENTRY_DLEN_SHIFT);
 	append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
 			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
 	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
 	append_move(desc, MOVE_WAITCOMP |
 		    MOVE_SRC_INFIFO | MOVE_DEST_CLASS1CTX |
 		    (ctx1_iv_off << MOVE_OFFSET_SHIFT) |
-		    (tfm->ivsize << MOVE_LEN_SHIFT));
+		    (ivsize << MOVE_LEN_SHIFT));
 	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
 
 	/* Copy IV to class 1 context */
 	append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_OUTFIFO |
 		    (ctx1_iv_off << MOVE_OFFSET_SHIFT) |
-		    (tfm->ivsize << MOVE_LEN_SHIFT));
+		    (ivsize << MOVE_LEN_SHIFT));
 
 	/* Return to encryption */
 	append_operation(desc, ctx->class2_alg_type |
@@ -676,10 +677,10 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 
 	/* Copy iv from outfifo to class 2 fifo */
 	moveiv = NFIFOENTRY_STYPE_OFIFO | NFIFOENTRY_DEST_CLASS2 |
-		 NFIFOENTRY_DTYPE_MSG | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT);
+		 NFIFOENTRY_DTYPE_MSG | (ivsize << NFIFOENTRY_DLEN_SHIFT);
 	append_load_imm_u32(desc, moveiv, LDST_CLASS_IND_CCB |
 			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
-	append_load_imm_u32(desc, tfm->ivsize, LDST_CLASS_2_CCB |
+	append_load_imm_u32(desc, ivsize, LDST_CLASS_2_CCB |
 			    LDST_SRCDST_WORD_DATASZ_REG | LDST_IMM);
 
 	/* Load Counter into CONTEXT1 reg */
@@ -698,7 +699,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
 
 	/* Not need to reload iv */
-	append_seq_fifo_load(desc, tfm->ivsize,
+	append_seq_fifo_load(desc, ivsize,
 			     FIFOLD_CLASS_SKIP);
 
 	/* Will read cryptlen */
@@ -738,7 +739,6 @@ static int aead_setauthsize(struct crypto_aead *authenc,
 
 static int gcm_set_sh_desc(struct crypto_aead *aead)
 {
-	struct aead_tfm *tfm = &aead->base.crt_aead;
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	bool keys_fit_inline = false;
@@ -754,7 +754,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptor
 	 * must fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_GCM_ENC_LEN + DESC_JOB_IO_LEN +
+	if (DESC_GCM_ENC_LEN + GCM_DESC_JOB_IO_LEN +
 	    ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
 		keys_fit_inline = true;
 
@@ -777,34 +777,34 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	append_operation(desc, ctx->class1_alg_type |
 			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
 
-	/* cryptlen = seqoutlen - authsize */
-	append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
+	/* if assoclen + cryptlen is ZERO, skip to ICV write */
+	append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL |
+						 JUMP_COND_MATH_Z);
 
-	/* assoclen + cryptlen = seqinlen - ivsize */
-	append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize);
+	/* if assoclen is ZERO, skip reading the assoc data */
+	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
+						 JUMP_COND_MATH_Z);
 
-	/* assoclen = (assoclen + cryptlen) - cryptlen */
-	append_math_sub(desc, REG1, REG2, REG3, CAAM_CMD_SZ);
+	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+	/* skip assoc data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+	/* cryptlen = seqinlen - assoclen */
+	append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG3, CAAM_CMD_SZ);
 
 	/* if cryptlen is ZERO jump to zero-payload commands */
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 	zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL |
 					    JUMP_COND_MATH_Z);
-	/* read IV */
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 |
-			     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
-
-	/* if assoclen is ZERO, skip reading the assoc data */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG1, CAAM_CMD_SZ);
-	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
-					   JUMP_COND_MATH_Z);
 
 	/* read assoc data */
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
 	set_jump_tgt_here(desc, zero_assoc_jump_cmd1);
 
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
 
 	/* write encrypted data */
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
@@ -814,31 +814,17 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
 
 	/* jump the zero-payload commands */
-	append_jump(desc, JUMP_TEST_ALL | 7);
+	append_jump(desc, JUMP_TEST_ALL | 2);
 
 	/* zero-payload commands */
 	set_jump_tgt_here(desc, zero_payload_jump_cmd);
 
-	/* if assoclen is ZERO, jump to IV reading - is the only input data */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG1, CAAM_CMD_SZ);
-	zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL |
-					   JUMP_COND_MATH_Z);
-	/* read IV */
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 |
-			     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
-
 	/* read assoc data */
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST1);
 
-	/* jump to ICV writing */
-	append_jump(desc, JUMP_TEST_ALL | 2);
-
-	/* read IV - is the only input data */
+	/* There is no input data */
 	set_jump_tgt_here(desc, zero_assoc_jump_cmd2);
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 |
-			     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 |
-			     FIFOLD_TYPE_LAST1);
 
 	/* write ICV */
 	append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB |
@@ -862,7 +848,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
 	keys_fit_inline = false;
-	if (DESC_GCM_DEC_LEN + DESC_JOB_IO_LEN +
+	if (DESC_GCM_DEC_LEN + GCM_DESC_JOB_IO_LEN +
 	    ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
 		keys_fit_inline = true;
 
@@ -886,33 +872,30 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	append_operation(desc, ctx->class1_alg_type |
 			 OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 
-	/* assoclen + cryptlen = seqinlen - ivsize - icvsize */
-	append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM,
-				ctx->authsize + tfm->ivsize);
-
-	/* assoclen = (assoclen + cryptlen) - cryptlen */
-	append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-	append_math_sub(desc, REG1, REG3, REG2, CAAM_CMD_SZ);
+	/* if assoclen is ZERO, skip reading the assoc data */
+	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
+						 JUMP_COND_MATH_Z);
 
-	/* read IV */
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 |
-			     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
-	/* jump to zero-payload command if cryptlen is zero */
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ);
-	zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL |
-					    JUMP_COND_MATH_Z);
+	/* skip assoc data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
 
-	append_math_add(desc, VARSEQINLEN, ZERO, REG1, CAAM_CMD_SZ);
-	/* if asoclen is ZERO, skip reading assoc data */
-	zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
-					   JUMP_COND_MATH_Z);
 	/* read assoc data */
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
+
 	set_jump_tgt_here(desc, zero_assoc_jump_cmd1);
 
-	append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ);
+	/* cryptlen = seqoutlen - assoclen */
+	append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+	/* jump to zero-payload command if cryptlen is zero */
+	zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL |
+					    JUMP_COND_MATH_Z);
+
+	append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 
 	/* store encrypted data */
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
@@ -921,21 +904,9 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
 
-	/* jump the zero-payload commands */
-	append_jump(desc, JUMP_TEST_ALL | 4);
-
 	/* zero-payload command */
 	set_jump_tgt_here(desc, zero_payload_jump_cmd);
 
-	/* if assoclen is ZERO, jump to ICV reading */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG1, CAAM_CMD_SZ);
-	zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL |
-					   JUMP_COND_MATH_Z);
-	/* read assoc data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
-	set_jump_tgt_here(desc, zero_assoc_jump_cmd2);
-
 	/* read ICV */
 	append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS1 |
 			     FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
@@ -968,13 +939,11 @@ static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 
 static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 {
-	struct aead_tfm *tfm = &aead->base.crt_aead;
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	bool keys_fit_inline = false;
-	u32 *key_jump_cmd, *move_cmd, *write_iv_cmd;
+	u32 *key_jump_cmd;
 	u32 *desc;
-	u32 geniv;
 
 	if (!ctx->enckeylen || !ctx->authsize)
 		return 0;
@@ -984,7 +953,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptor
 	 * must fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_RFC4106_ENC_LEN + DESC_JOB_IO_LEN +
+	if (DESC_RFC4106_ENC_LEN + GCM_DESC_JOB_IO_LEN +
 	    ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
 		keys_fit_inline = true;
 
@@ -1007,29 +976,21 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	append_operation(desc, ctx->class1_alg_type |
 			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
 
-	/* cryptlen = seqoutlen - authsize */
-	append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
+	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
 	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
-	/* assoclen + cryptlen = seqinlen - ivsize */
-	append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize);
-
-	/* assoclen = (assoclen + cryptlen) - cryptlen */
-	append_math_sub(desc, VARSEQINLEN, REG2, REG3, CAAM_CMD_SZ);
-
-	/* Read Salt */
-	append_fifo_load_as_imm(desc, (void *)(ctx->key + ctx->enckeylen),
-				4, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV);
-	/* Read AES-GCM-ESP IV */
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 |
-			     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	/* Skip assoc data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
 
 	/* Read assoc data */
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
 
+	/* cryptlen = seqoutlen - assoclen */
+	append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+
 	/* Will read cryptlen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
 
 	/* Write encrypted data */
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
@@ -1083,30 +1044,21 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	append_operation(desc, ctx->class1_alg_type |
 			 OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 
-	/* assoclen + cryptlen = seqinlen - ivsize - icvsize */
-	append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM,
-				ctx->authsize + tfm->ivsize);
-
-	/* assoclen = (assoclen + cryptlen) - cryptlen */
-	append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-	append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ);
-
-	/* Will write cryptlen bytes */
-	append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
-	/* Read Salt */
-	append_fifo_load_as_imm(desc, (void *)(ctx->key + ctx->enckeylen),
-				4, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV);
-	/* Read AES-GCM-ESP IV */
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 |
-			     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
+	/* Skip assoc data */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
 
 	/* Read assoc data */
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
 
+	/* Will write cryptlen bytes */
+	append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
 	/* Will read cryptlen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ);
+	append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 
 	/* Store payload data */
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
@@ -1132,107 +1084,6 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 		       desc_bytes(desc), 1);
 #endif
 
-	/*
-	 * Job Descriptor and Shared Descriptors
-	 * must all fit into the 64-word Descriptor h/w Buffer
-	 */
-	keys_fit_inline = false;
-	if (DESC_RFC4106_GIVENC_LEN + DESC_JOB_IO_LEN +
-	    ctx->split_key_pad_len + ctx->enckeylen <=
-	    CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = true;
-
-	/* rfc4106_givencrypt shared descriptor */
-	desc = ctx->sh_desc_givenc;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* Skip key loading if it is loaded due to sharing */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD);
-	if (keys_fit_inline)
-		append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-				  ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-	else
-		append_key(desc, ctx->key_dma, ctx->enckeylen,
-			   CLASS_1 | KEY_DEST_CLASS_REG);
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/* Generate IV */
-	geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
-		NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 |
-		NFIFOENTRY_PTYPE_RND | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT);
-	append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
-			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
-	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-	move_cmd = append_move(desc, MOVE_SRC_INFIFO | MOVE_DEST_DESCBUF |
-			       (tfm->ivsize << MOVE_LEN_SHIFT));
-	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
-
-	/* Copy generated IV to OFIFO */
-	write_iv_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_OUTFIFO |
-				   (tfm->ivsize << MOVE_LEN_SHIFT));
-
-	/* Class 1 operation */
-	append_operation(desc, ctx->class1_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-	/* ivsize + cryptlen = seqoutlen - authsize */
-	append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
-
-	/* assoclen = seqinlen - (ivsize + cryptlen) */
-	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG3, CAAM_CMD_SZ);
-
-	/* Will write ivsize + cryptlen */
-	append_math_add(desc, VARSEQOUTLEN, REG3, REG0, CAAM_CMD_SZ);
-
-	/* Read Salt and generated IV */
-	append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV |
-		   FIFOLD_TYPE_FLUSH1 | IMMEDIATE | 12);
-	/* Append Salt */
-	append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4);
-	set_move_tgt_here(desc, move_cmd);
-	set_move_tgt_here(desc, write_iv_cmd);
-	/* Blank commands. Will be overwritten by generated IV. */
-	append_cmd(desc, 0x00000000);
-	append_cmd(desc, 0x00000000);
-	/* End of blank commands */
-
-	/* No need to reload iv */
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_SKIP);
-
-	/* Read assoc data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
-
-	/* Will read cryptlen */
-	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-	/* Store generated IV and encrypted data */
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
-
-	/* Read payload data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
-
-	/* Write ICV */
-	append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB |
-			 LDST_SRCDST_BYTE_CONTEXT);
-
-	ctx->sh_desc_givenc_dma = dma_map_single(jrdev, desc,
-						 desc_bytes(desc),
-						 DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR,
-		       "rfc4106 givenc shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
-
 	return 0;
 }
 
@@ -1249,14 +1100,12 @@ static int rfc4106_setauthsize(struct crypto_aead *authenc,
 
 static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 {
-	struct aead_tfm *tfm = &aead->base.crt_aead;
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	bool keys_fit_inline = false;
-	u32 *key_jump_cmd, *write_iv_cmd, *write_aad_cmd;
+	u32 *key_jump_cmd;
 	u32 *read_move_cmd, *write_move_cmd;
 	u32 *desc;
-	u32 geniv;
 
 	if (!ctx->enckeylen || !ctx->authsize)
 		return 0;
@@ -1266,7 +1115,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptor
 	 * must fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_RFC4543_ENC_LEN + DESC_JOB_IO_LEN +
+	if (DESC_RFC4543_ENC_LEN + GCM_DESC_JOB_IO_LEN +
 	    ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
 		keys_fit_inline = true;
 
@@ -1289,48 +1138,8 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	append_operation(desc, ctx->class1_alg_type |
 			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
 
-	/* Load AES-GMAC ESP IV into Math1 register */
-	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_WORD_DECO_MATH1 |
-		   LDST_CLASS_DECO | tfm->ivsize);
-
-	/* Wait the DMA transaction to finish */
-	append_jump(desc, JUMP_TEST_ALL | JUMP_COND_CALM |
-		    (1 << JUMP_OFFSET_SHIFT));
-
-	/* Overwrite blank immediate AES-GMAC ESP IV data */
-	write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF |
-				   (tfm->ivsize << MOVE_LEN_SHIFT));
-
-	/* Overwrite blank immediate AAD data */
-	write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF |
-				    (tfm->ivsize << MOVE_LEN_SHIFT));
-
-	/* cryptlen = seqoutlen - authsize */
-	append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
-
-	/* assoclen = (seqinlen - ivsize) - cryptlen */
-	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG3, CAAM_CMD_SZ);
-
-	/* Read Salt and AES-GMAC ESP IV */
-	append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE |
-		   FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize));
-	/* Append Salt */
-	append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4);
-	set_move_tgt_here(desc, write_iv_cmd);
-	/* Blank commands. Will be overwritten by AES-GMAC ESP IV. */
-	append_cmd(desc, 0x00000000);
-	append_cmd(desc, 0x00000000);
-	/* End of blank commands */
-
-	/* Read assoc data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_AAD);
-
-	/* Will read cryptlen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-	/* Will write cryptlen bytes */
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+	/* assoclen + cryptlen = seqinlen */
+	append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ);
 
 	/*
 	 * MOVE_LEN opcode is not available in all SEC HW revisions,
@@ -1342,16 +1151,13 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
 				     (0x8 << MOVE_LEN_SHIFT));
 
-	/* Authenticate AES-GMAC ESP IV  */
-	append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE |
-		   FIFOLD_TYPE_AAD | tfm->ivsize);
-	set_move_tgt_here(desc, write_aad_cmd);
-	/* Blank commands. Will be overwritten by AES-GMAC ESP IV. */
-	append_cmd(desc, 0x00000000);
-	append_cmd(desc, 0x00000000);
-	/* End of blank commands */
+	/* Will read assoclen + cryptlen bytes */
+	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
 
-	/* Read and write cryptlen bytes */
+	/* Will write assoclen + cryptlen bytes */
+	append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+	/* Read and write assoclen + cryptlen bytes */
 	aead_append_src_dst(desc, FIFOLD_TYPE_AAD);
 
 	set_move_tgt_here(desc, read_move_cmd);
@@ -1382,7 +1188,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
 	keys_fit_inline = false;
-	if (DESC_RFC4543_DEC_LEN + DESC_JOB_IO_LEN +
+	if (DESC_RFC4543_DEC_LEN + GCM_DESC_JOB_IO_LEN +
 	    ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
 		keys_fit_inline = true;
 
@@ -1405,28 +1211,8 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	append_operation(desc, ctx->class1_alg_type |
 			 OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 
-	/* Load AES-GMAC ESP IV into Math1 register */
-	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_WORD_DECO_MATH1 |
-		   LDST_CLASS_DECO | tfm->ivsize);
-
-	/* Wait the DMA transaction to finish */
-	append_jump(desc, JUMP_TEST_ALL | JUMP_COND_CALM |
-		    (1 << JUMP_OFFSET_SHIFT));
-
-	/* assoclen + cryptlen = (seqinlen - ivsize) - icvsize */
-	append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, ctx->authsize);
-
-	/* Overwrite blank immediate AES-GMAC ESP IV data */
-	write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF |
-				   (tfm->ivsize << MOVE_LEN_SHIFT));
-
-	/* Overwrite blank immediate AAD data */
-	write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF |
-				    (tfm->ivsize << MOVE_LEN_SHIFT));
-
-	/* assoclen = (assoclen + cryptlen) - cryptlen */
-	append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-	append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ);
+	/* assoclen + cryptlen = seqoutlen */
+	append_math_sub(desc, REG3, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 
 	/*
 	 * MOVE_LEN opcode is not available in all SEC HW revisions,
@@ -1438,40 +1224,16 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
 				     (0x8 << MOVE_LEN_SHIFT));
 
-	/* Read Salt and AES-GMAC ESP IV */
-	append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE |
-		   FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize));
-	/* Append Salt */
-	append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4);
-	set_move_tgt_here(desc, write_iv_cmd);
-	/* Blank commands. Will be overwritten by AES-GMAC ESP IV. */
-	append_cmd(desc, 0x00000000);
-	append_cmd(desc, 0x00000000);
-	/* End of blank commands */
-
-	/* Read assoc data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_AAD);
-
-	/* Will read cryptlen bytes */
-	append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ);
-
-	/* Will write cryptlen bytes */
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ);
+	/* Will read assoclen + cryptlen bytes */
+	append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 
-	/* Authenticate AES-GMAC ESP IV  */
-	append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE |
-		   FIFOLD_TYPE_AAD | tfm->ivsize);
-	set_move_tgt_here(desc, write_aad_cmd);
-	/* Blank commands. Will be overwritten by AES-GMAC ESP IV. */
-	append_cmd(desc, 0x00000000);
-	append_cmd(desc, 0x00000000);
-	/* End of blank commands */
+	/* Will write assoclen + cryptlen bytes */
+	append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 
 	/* Store payload data */
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
 
-	/* In-snoop cryptlen data */
+	/* In-snoop assoclen + cryptlen data */
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | FIFOLDST_VLF |
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST2FLUSH1);
 
@@ -1499,156 +1261,27 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 		       desc_bytes(desc), 1);
 #endif
 
-	/*
-	 * Job Descriptor and Shared Descriptors
-	 * must all fit into the 64-word Descriptor h/w Buffer
-	 */
-	keys_fit_inline = false;
-	if (DESC_RFC4543_GIVENC_LEN + DESC_JOB_IO_LEN +
-	    ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
-		keys_fit_inline = true;
-
-	/* rfc4543_givencrypt shared descriptor */
-	desc = ctx->sh_desc_givenc;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* Skip key loading if it is loaded due to sharing */
-	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD);
-	if (keys_fit_inline)
-		append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-				  ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-	else
-		append_key(desc, ctx->key_dma, ctx->enckeylen,
-			   CLASS_1 | KEY_DEST_CLASS_REG);
-	set_jump_tgt_here(desc, key_jump_cmd);
-
-	/* Generate IV */
-	geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
-		NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 |
-		NFIFOENTRY_PTYPE_RND | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT);
-	append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
-			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
-	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-	/* Move generated IV to Math1 register */
-	append_move(desc, MOVE_SRC_INFIFO | MOVE_DEST_MATH1 |
-		    (tfm->ivsize << MOVE_LEN_SHIFT));
-	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
+	return 0;
+}
 
-	/* Overwrite blank immediate AES-GMAC IV data */
-	write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF |
-				   (tfm->ivsize << MOVE_LEN_SHIFT));
+static int rfc4543_setauthsize(struct crypto_aead *authenc,
+			       unsigned int authsize)
+{
+	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
 
-	/* Overwrite blank immediate AAD data */
-	write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF |
-				    (tfm->ivsize << MOVE_LEN_SHIFT));
+	ctx->authsize = authsize;
+	rfc4543_set_sh_desc(authenc);
 
-	/* Copy generated IV to OFIFO */
-	append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_OUTFIFO |
-		    (tfm->ivsize << MOVE_LEN_SHIFT));
+	return 0;
+}
 
-	/* Class 1 operation */
-	append_operation(desc, ctx->class1_alg_type |
-			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-	/* ivsize + cryptlen = seqoutlen - authsize */
-	append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
-
-	/* assoclen = seqinlen - (ivsize + cryptlen) */
-	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG3, CAAM_CMD_SZ);
-
-	/* Will write ivsize + cryptlen */
-	append_math_add(desc, VARSEQOUTLEN, REG3, REG0, CAAM_CMD_SZ);
-
-	/*
-	 * MOVE_LEN opcode is not available in all SEC HW revisions,
-	 * thus need to do some magic, i.e. self-patch the descriptor
-	 * buffer.
-	 */
-	read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
-				    (0x6 << MOVE_LEN_SHIFT));
-	write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
-				     (0x8 << MOVE_LEN_SHIFT));
-
-	/* Read Salt and AES-GMAC generated IV */
-	append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE |
-		   FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize));
-	/* Append Salt */
-	append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4);
-	set_move_tgt_here(desc, write_iv_cmd);
-	/* Blank commands. Will be overwritten by AES-GMAC generated IV. */
-	append_cmd(desc, 0x00000000);
-	append_cmd(desc, 0x00000000);
-	/* End of blank commands */
-
-	/* No need to reload iv */
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_SKIP);
-
-	/* Read assoc data */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-			     FIFOLD_TYPE_AAD);
-
-	/* Will read cryptlen */
-	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-	/* Authenticate AES-GMAC IV  */
-	append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE |
-		   FIFOLD_TYPE_AAD | tfm->ivsize);
-	set_move_tgt_here(desc, write_aad_cmd);
-	/* Blank commands. Will be overwritten by AES-GMAC IV. */
-	append_cmd(desc, 0x00000000);
-	append_cmd(desc, 0x00000000);
-	/* End of blank commands */
-
-	/* Read and write cryptlen bytes */
-	aead_append_src_dst(desc, FIFOLD_TYPE_AAD);
-
-	set_move_tgt_here(desc, read_move_cmd);
-	set_move_tgt_here(desc, write_move_cmd);
-	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-	/* Move payload data to OFIFO */
-	append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO);
-
-	/* Write ICV */
-	append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB |
-			 LDST_SRCDST_BYTE_CONTEXT);
-
-	ctx->sh_desc_givenc_dma = dma_map_single(jrdev, desc,
-						 desc_bytes(desc),
-						 DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) {
-		dev_err(jrdev, "unable to map shared descriptor\n");
-		return -ENOMEM;
-	}
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR,
-		       "rfc4543 givenc shdesc@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, desc,
-		       desc_bytes(desc), 1);
-#endif
-
-	return 0;
-}
-
-static int rfc4543_setauthsize(struct crypto_aead *authenc,
-			       unsigned int authsize)
-{
-	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
-
-	ctx->authsize = authsize;
-	rfc4543_set_sh_desc(authenc);
-
-	return 0;
-}
-
-static u32 gen_split_aead_key(struct caam_ctx *ctx, const u8 *key_in,
-			      u32 authkeylen)
-{
-	return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len,
-			       ctx->split_key_pad_len, key_in, authkeylen,
-			       ctx->alg_op);
-}
+static u32 gen_split_aead_key(struct caam_ctx *ctx, const u8 *key_in,
+			      u32 authkeylen)
+{
+	return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len,
+			       ctx->split_key_pad_len, key_in, authkeylen,
+			       ctx->alg_op);
+}
 
 static int aead_setkey(struct crypto_aead *aead,
 			       const u8 *key, unsigned int keylen)
@@ -2100,7 +1733,7 @@ struct aead_edesc {
 	int sec4_sg_bytes;
 	dma_addr_t sec4_sg_dma;
 	struct sec4_sg_entry *sec4_sg;
-	u32 hw_desc[0];
+	u32 hw_desc[];
 };
 
 /*
@@ -2153,6 +1786,16 @@ static void caam_unmap(struct device *dev, struct scatterlist *src,
 static void aead_unmap(struct device *dev,
 		       struct aead_edesc *edesc,
 		       struct aead_request *req)
+{
+	caam_unmap(dev, req->src, req->dst,
+		   edesc->src_nents, edesc->src_chained, edesc->dst_nents,
+		   edesc->dst_chained, 0, 0,
+		   edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
+}
+
+static void old_aead_unmap(struct device *dev,
+			   struct aead_edesc *edesc,
+			   struct aead_request *req)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	int ivsize = crypto_aead_ivsize(aead);
@@ -2184,6 +1827,28 @@ static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
 {
 	struct aead_request *req = context;
 	struct aead_edesc *edesc;
+
+#ifdef DEBUG
+	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
+#endif
+
+	edesc = container_of(desc, struct aead_edesc, hw_desc[0]);
+
+	if (err)
+		caam_jr_strstatus(jrdev, err);
+
+	aead_unmap(jrdev, edesc, req);
+
+	kfree(edesc);
+
+	aead_request_complete(req, err);
+}
+
+static void old_aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
+				  void *context)
+{
+	struct aead_request *req = context;
+	struct aead_edesc *edesc;
 #ifdef DEBUG
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
@@ -2198,7 +1863,7 @@ static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
 	if (err)
 		caam_jr_strstatus(jrdev, err);
 
-	aead_unmap(jrdev, edesc, req);
+	old_aead_unmap(jrdev, edesc, req);
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "assoc  @"__stringify(__LINE__)": ",
@@ -2223,6 +1888,34 @@ static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 {
 	struct aead_request *req = context;
 	struct aead_edesc *edesc;
+
+#ifdef DEBUG
+	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
+#endif
+
+	edesc = container_of(desc, struct aead_edesc, hw_desc[0]);
+
+	if (err)
+		caam_jr_strstatus(jrdev, err);
+
+	aead_unmap(jrdev, edesc, req);
+
+	/*
+	 * verify hw auth check passed else return -EBADMSG
+	 */
+	if ((err & JRSTA_CCBERR_ERRID_MASK) == JRSTA_CCBERR_ERRID_ICVCHK)
+		err = -EBADMSG;
+
+	kfree(edesc);
+
+	aead_request_complete(req, err);
+}
+
+static void old_aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
+				  void *context)
+{
+	struct aead_request *req = context;
+	struct aead_edesc *edesc;
 #ifdef DEBUG
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
@@ -2246,7 +1939,7 @@ static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 	if (err)
 		caam_jr_strstatus(jrdev, err);
 
-	aead_unmap(jrdev, edesc, req);
+	old_aead_unmap(jrdev, edesc, req);
 
 	/*
 	 * verify hw auth check passed else return -EBADMSG
@@ -2342,10 +2035,10 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 /*
  * Fill in aead job descriptor
  */
-static void init_aead_job(u32 *sh_desc, dma_addr_t ptr,
-			  struct aead_edesc *edesc,
-			  struct aead_request *req,
-			  bool all_contig, bool encrypt)
+static void old_init_aead_job(u32 *sh_desc, dma_addr_t ptr,
+			      struct aead_edesc *edesc,
+			      struct aead_request *req,
+			      bool all_contig, bool encrypt)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
@@ -2424,6 +2117,97 @@ static void init_aead_job(u32 *sh_desc, dma_addr_t ptr,
 				   out_options);
 }
 
+/*
+ * Fill in aead job descriptor
+ */
+static void init_aead_job(struct aead_request *req,
+			  struct aead_edesc *edesc,
+			  bool all_contig, bool encrypt)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	int authsize = ctx->authsize;
+	u32 *desc = edesc->hw_desc;
+	u32 out_options, in_options;
+	dma_addr_t dst_dma, src_dma;
+	int len, sec4_sg_index = 0;
+	dma_addr_t ptr;
+	u32 *sh_desc;
+
+	sh_desc = encrypt ? ctx->sh_desc_enc : ctx->sh_desc_dec;
+	ptr = encrypt ? ctx->sh_desc_enc_dma : ctx->sh_desc_dec_dma;
+
+	len = desc_len(sh_desc);
+	init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
+
+	if (all_contig) {
+		src_dma = sg_dma_address(req->src);
+		in_options = 0;
+	} else {
+		src_dma = edesc->sec4_sg_dma;
+		sec4_sg_index += edesc->src_nents;
+		in_options = LDST_SGF;
+	}
+
+	append_seq_in_ptr(desc, src_dma, req->assoclen + req->cryptlen,
+			  in_options);
+
+	dst_dma = src_dma;
+	out_options = in_options;
+
+	if (unlikely(req->src != req->dst)) {
+		if (!edesc->dst_nents) {
+			dst_dma = sg_dma_address(req->dst);
+		} else {
+			dst_dma = edesc->sec4_sg_dma +
+				  sec4_sg_index *
+				  sizeof(struct sec4_sg_entry);
+			out_options = LDST_SGF;
+		}
+	}
+
+	if (encrypt)
+		append_seq_out_ptr(desc, dst_dma,
+				   req->assoclen + req->cryptlen + authsize,
+				   out_options);
+	else
+		append_seq_out_ptr(desc, dst_dma,
+				   req->assoclen + req->cryptlen - authsize,
+				   out_options);
+
+	/* REG3 = assoclen */
+	append_math_add_imm_u32(desc, REG3, ZERO, IMM, req->assoclen);
+}
+
+static void init_gcm_job(struct aead_request *req,
+			 struct aead_edesc *edesc,
+			 bool all_contig, bool encrypt)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int ivsize = crypto_aead_ivsize(aead);
+	u32 *desc = edesc->hw_desc;
+	bool generic_gcm = (ivsize == 12);
+	unsigned int last;
+
+	init_aead_job(req, edesc, all_contig, encrypt);
+
+	/* BUG This should not be specific to generic GCM. */
+	last = 0;
+	if (encrypt && generic_gcm && !(req->assoclen + req->cryptlen))
+		last = FIFOLD_TYPE_LAST1;
+
+	/* Read GCM IV */
+	append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE |
+			 FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | 12 | last);
+	/* Append Salt */
+	if (!generic_gcm)
+		append_data(desc, ctx->key + ctx->enckeylen, 4);
+	/* Append IV */
+	append_data(desc, req->iv, ivsize);
+	/* End of blank commands */
+}
+
 /*
  * Fill in aead givencrypt job descriptor
  */
@@ -2608,9 +2392,10 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
 /*
  * allocate and map the aead extended descriptor
  */
-static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
-					   int desc_bytes, bool *all_contig_ptr,
-					   bool encrypt)
+static struct aead_edesc *old_aead_edesc_alloc(struct aead_request *req,
+					       int desc_bytes,
+					       bool *all_contig_ptr,
+					       bool encrypt)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
@@ -2655,35 +2440,138 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 					 DMA_FROM_DEVICE, dst_chained);
 	}
 
-	iv_dma = dma_map_single(jrdev, req->iv, ivsize, DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, iv_dma)) {
-		dev_err(jrdev, "unable to map IV\n");
-		return ERR_PTR(-ENOMEM);
+	iv_dma = dma_map_single(jrdev, req->iv, ivsize, DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, iv_dma)) {
+		dev_err(jrdev, "unable to map IV\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (((ctx->class1_alg_type & OP_ALG_ALGSEL_MASK) ==
+	      OP_ALG_ALGSEL_AES) &&
+	    ((ctx->class1_alg_type & OP_ALG_AAI_MASK) == OP_ALG_AAI_GCM))
+		is_gcm = true;
+
+	/*
+	 * Check if data are contiguous.
+	 * GCM expected input sequence: IV, AAD, text
+	 * All other - expected input sequence: AAD, IV, text
+	 */
+	if (is_gcm)
+		all_contig = (!assoc_nents &&
+			      iv_dma + ivsize == sg_dma_address(req->assoc) &&
+			      !src_nents && sg_dma_address(req->assoc) +
+			      req->assoclen == sg_dma_address(req->src));
+	else
+		all_contig = (!assoc_nents && sg_dma_address(req->assoc) +
+			      req->assoclen == iv_dma && !src_nents &&
+			      iv_dma + ivsize == sg_dma_address(req->src));
+	if (!all_contig) {
+		assoc_nents = assoc_nents ? : 1;
+		src_nents = src_nents ? : 1;
+		sec4_sg_len = assoc_nents + 1 + src_nents;
+	}
+
+	sec4_sg_len += dst_nents;
+
+	sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry);
+
+	/* allocate space for base edesc and hw desc commands, link tables */
+	edesc = kmalloc(sizeof(struct aead_edesc) + desc_bytes +
+			sec4_sg_bytes, GFP_DMA | flags);
+	if (!edesc) {
+		dev_err(jrdev, "could not allocate extended descriptor\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	edesc->assoc_nents = assoc_nents;
+	edesc->assoc_chained = assoc_chained;
+	edesc->src_nents = src_nents;
+	edesc->src_chained = src_chained;
+	edesc->dst_nents = dst_nents;
+	edesc->dst_chained = dst_chained;
+	edesc->iv_dma = iv_dma;
+	edesc->sec4_sg_bytes = sec4_sg_bytes;
+	edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) +
+			 desc_bytes;
+	*all_contig_ptr = all_contig;
+
+	sec4_sg_index = 0;
+	if (!all_contig) {
+		if (!is_gcm) {
+			sg_to_sec4_sg_len(req->assoc, req->assoclen,
+					  edesc->sec4_sg + sec4_sg_index);
+			sec4_sg_index += assoc_nents;
+		}
+
+		dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index,
+				   iv_dma, ivsize, 0);
+		sec4_sg_index += 1;
+
+		if (is_gcm) {
+			sg_to_sec4_sg_len(req->assoc, req->assoclen,
+					  edesc->sec4_sg + sec4_sg_index);
+			sec4_sg_index += assoc_nents;
+		}
+
+		sg_to_sec4_sg_last(req->src,
+				   src_nents,
+				   edesc->sec4_sg +
+				   sec4_sg_index, 0);
+		sec4_sg_index += src_nents;
+	}
+	if (dst_nents) {
+		sg_to_sec4_sg_last(req->dst, dst_nents,
+				   edesc->sec4_sg + sec4_sg_index, 0);
+	}
+	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
+					    sec4_sg_bytes, DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
+		dev_err(jrdev, "unable to map S/G table\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return edesc;
+}
+
+/*
+ * allocate and map the aead extended descriptor
+ */
+static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
+					   int desc_bytes, bool *all_contig_ptr,
+					   bool encrypt)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	int src_nents, dst_nents = 0;
+	struct aead_edesc *edesc;
+	int sgc;
+	bool all_contig = true;
+	bool src_chained = false, dst_chained = false;
+	int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
+	unsigned int authsize = ctx->authsize;
+
+	if (unlikely(req->dst != req->src)) {
+		src_nents = sg_count(req->src, req->assoclen + req->cryptlen,
+				     &src_chained);
+		dst_nents = sg_count(req->dst,
+				     req->assoclen + req->cryptlen +
+					(encrypt ? authsize : (-authsize)),
+				     &dst_chained);
+	} else {
+		src_nents = sg_count(req->src,
+				     req->assoclen + req->cryptlen +
+					(encrypt ? authsize : 0),
+				     &src_chained);
 	}
 
-	if (((ctx->class1_alg_type & OP_ALG_ALGSEL_MASK) ==
-	      OP_ALG_ALGSEL_AES) &&
-	    ((ctx->class1_alg_type & OP_ALG_AAI_MASK) == OP_ALG_AAI_GCM))
-		is_gcm = true;
-
-	/*
-	 * Check if data are contiguous.
-	 * GCM expected input sequence: IV, AAD, text
-	 * All other - expected input sequence: AAD, IV, text
-	 */
-	if (is_gcm)
-		all_contig = (!assoc_nents &&
-			      iv_dma + ivsize == sg_dma_address(req->assoc) &&
-			      !src_nents && sg_dma_address(req->assoc) +
-			      req->assoclen == sg_dma_address(req->src));
-	else
-		all_contig = (!assoc_nents && sg_dma_address(req->assoc) +
-			      req->assoclen == iv_dma && !src_nents &&
-			      iv_dma + ivsize == sg_dma_address(req->src));
+	/* Check if data are contiguous. */
+	all_contig = !src_nents;
 	if (!all_contig) {
-		assoc_nents = assoc_nents ? : 1;
 		src_nents = src_nents ? : 1;
-		sec4_sg_len = assoc_nents + 1 + src_nents;
+		sec4_sg_len = src_nents;
 	}
 
 	sec4_sg_len += dst_nents;
@@ -2691,68 +2579,78 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry);
 
 	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = kmalloc(sizeof(struct aead_edesc) + desc_bytes +
+	edesc = kzalloc(sizeof(struct aead_edesc) + desc_bytes +
 			sec4_sg_bytes, GFP_DMA | flags);
 	if (!edesc) {
 		dev_err(jrdev, "could not allocate extended descriptor\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
-	edesc->assoc_nents = assoc_nents;
-	edesc->assoc_chained = assoc_chained;
+	if (likely(req->src == req->dst)) {
+		sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
+					 DMA_BIDIRECTIONAL, src_chained);
+		if (unlikely(!sgc)) {
+			dev_err(jrdev, "unable to map source\n");
+			kfree(edesc);
+			return ERR_PTR(-ENOMEM);
+		}
+	} else {
+		sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1,
+					 DMA_TO_DEVICE, src_chained);
+		if (unlikely(!sgc)) {
+			dev_err(jrdev, "unable to map source\n");
+			kfree(edesc);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1,
+					 DMA_FROM_DEVICE, dst_chained);
+		if (unlikely(!sgc)) {
+			dev_err(jrdev, "unable to map destination\n");
+			dma_unmap_sg_chained(jrdev, req->src, src_nents ? : 1,
+					     DMA_TO_DEVICE, src_chained);
+			kfree(edesc);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+
 	edesc->src_nents = src_nents;
 	edesc->src_chained = src_chained;
 	edesc->dst_nents = dst_nents;
 	edesc->dst_chained = dst_chained;
-	edesc->iv_dma = iv_dma;
-	edesc->sec4_sg_bytes = sec4_sg_bytes;
 	edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) +
 			 desc_bytes;
 	*all_contig_ptr = all_contig;
 
 	sec4_sg_index = 0;
 	if (!all_contig) {
-		if (!is_gcm) {
-			sg_to_sec4_sg(req->assoc,
-				      assoc_nents,
-				      edesc->sec4_sg +
-				      sec4_sg_index, 0);
-			sec4_sg_index += assoc_nents;
-		}
-
-		dma_to_sec4_sg_one(edesc->sec4_sg + sec4_sg_index,
-				   iv_dma, ivsize, 0);
-		sec4_sg_index += 1;
-
-		if (is_gcm) {
-			sg_to_sec4_sg(req->assoc,
-				      assoc_nents,
-				      edesc->sec4_sg +
-				      sec4_sg_index, 0);
-			sec4_sg_index += assoc_nents;
-		}
-
-		sg_to_sec4_sg_last(req->src,
-				   src_nents,
-				   edesc->sec4_sg +
-				   sec4_sg_index, 0);
+		sg_to_sec4_sg_last(req->src, src_nents,
+			      edesc->sec4_sg + sec4_sg_index, 0);
 		sec4_sg_index += src_nents;
 	}
 	if (dst_nents) {
 		sg_to_sec4_sg_last(req->dst, dst_nents,
 				   edesc->sec4_sg + sec4_sg_index, 0);
 	}
+
+	if (!sec4_sg_bytes)
+		return edesc;
+
 	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 					    sec4_sg_bytes, DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
 		dev_err(jrdev, "unable to map S/G table\n");
+		aead_unmap(jrdev, edesc, req);
+		kfree(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
 
+	edesc->sec4_sg_bytes = sec4_sg_bytes;
+
 	return edesc;
 }
 
-static int aead_encrypt(struct aead_request *req)
+static int gcm_encrypt(struct aead_request *req)
 {
 	struct aead_edesc *edesc;
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -2763,14 +2661,12 @@ static int aead_encrypt(struct aead_request *req)
 	int ret = 0;
 
 	/* allocate extended descriptor */
-	edesc = aead_edesc_alloc(req, DESC_JOB_IO_LEN *
-				 CAAM_CMD_SZ, &all_contig, true);
+	edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, true);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
 	/* Create and submit job descriptor */
-	init_aead_job(ctx->sh_desc_enc, ctx->sh_desc_enc_dma, edesc, req,
-		      all_contig, true);
+	init_gcm_job(req, edesc, all_contig, true);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
@@ -2789,7 +2685,79 @@ static int aead_encrypt(struct aead_request *req)
 	return ret;
 }
 
-static int aead_decrypt(struct aead_request *req)
+static int old_aead_encrypt(struct aead_request *req)
+{
+	struct aead_edesc *edesc;
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	bool all_contig;
+	u32 *desc;
+	int ret = 0;
+
+	/* allocate extended descriptor */
+	edesc = old_aead_edesc_alloc(req, DESC_JOB_IO_LEN *
+				     CAAM_CMD_SZ, &all_contig, true);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	/* Create and submit job descriptor */
+	old_init_aead_job(ctx->sh_desc_enc, ctx->sh_desc_enc_dma, edesc, req,
+			  all_contig, true);
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
+		       desc_bytes(edesc->hw_desc), 1);
+#endif
+
+	desc = edesc->hw_desc;
+	ret = caam_jr_enqueue(jrdev, desc, old_aead_encrypt_done, req);
+	if (!ret) {
+		ret = -EINPROGRESS;
+	} else {
+		old_aead_unmap(jrdev, edesc, req);
+		kfree(edesc);
+	}
+
+	return ret;
+}
+
+static int gcm_decrypt(struct aead_request *req)
+{
+	struct aead_edesc *edesc;
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct device *jrdev = ctx->jrdev;
+	bool all_contig;
+	u32 *desc;
+	int ret = 0;
+
+	/* allocate extended descriptor */
+	edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, false);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	/* Create and submit job descriptor*/
+	init_gcm_job(req, edesc, all_contig, false);
+#ifdef DEBUG
+	print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ",
+		       DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
+		       desc_bytes(edesc->hw_desc), 1);
+#endif
+
+	desc = edesc->hw_desc;
+	ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req);
+	if (!ret) {
+		ret = -EINPROGRESS;
+	} else {
+		aead_unmap(jrdev, edesc, req);
+		kfree(edesc);
+	}
+
+	return ret;
+}
+
+static int old_aead_decrypt(struct aead_request *req)
 {
 	struct aead_edesc *edesc;
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -2800,8 +2768,8 @@ static int aead_decrypt(struct aead_request *req)
 	int ret = 0;
 
 	/* allocate extended descriptor */
-	edesc = aead_edesc_alloc(req, DESC_JOB_IO_LEN *
-				 CAAM_CMD_SZ, &all_contig, false);
+	edesc = old_aead_edesc_alloc(req, DESC_JOB_IO_LEN *
+				     CAAM_CMD_SZ, &all_contig, false);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
@@ -2812,8 +2780,8 @@ static int aead_decrypt(struct aead_request *req)
 #endif
 
 	/* Create and submit job descriptor*/
-	init_aead_job(ctx->sh_desc_dec,
-		      ctx->sh_desc_dec_dma, edesc, req, all_contig, false);
+	old_init_aead_job(ctx->sh_desc_dec,
+			  ctx->sh_desc_dec_dma, edesc, req, all_contig, false);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
@@ -2821,11 +2789,11 @@ static int aead_decrypt(struct aead_request *req)
 #endif
 
 	desc = edesc->hw_desc;
-	ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req);
+	ret = caam_jr_enqueue(jrdev, desc, old_aead_decrypt_done, req);
 	if (!ret) {
 		ret = -EINPROGRESS;
 	} else {
-		aead_unmap(jrdev, edesc, req);
+		old_aead_unmap(jrdev, edesc, req);
 		kfree(edesc);
 	}
 
@@ -2953,8 +2921,8 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request
 	sec4_sg_index = 0;
 	if (!(contig & GIV_SRC_CONTIG)) {
 		if (!is_gcm) {
-			sg_to_sec4_sg(req->assoc, assoc_nents,
-				      edesc->sec4_sg + sec4_sg_index, 0);
+			sg_to_sec4_sg_len(req->assoc, req->assoclen,
+					  edesc->sec4_sg + sec4_sg_index);
 			sec4_sg_index += assoc_nents;
 		}
 
@@ -2963,8 +2931,8 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request
 		sec4_sg_index += 1;
 
 		if (is_gcm) {
-			sg_to_sec4_sg(req->assoc, assoc_nents,
-				      edesc->sec4_sg + sec4_sg_index, 0);
+			sg_to_sec4_sg_len(req->assoc, req->assoclen,
+					  edesc->sec4_sg + sec4_sg_index);
 			sec4_sg_index += assoc_nents;
 		}
 
@@ -2999,7 +2967,7 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request
 	return edesc;
 }
 
-static int aead_givencrypt(struct aead_givcrypt_request *areq)
+static int old_aead_givencrypt(struct aead_givcrypt_request *areq)
 {
 	struct aead_request *req = &areq->areq;
 	struct aead_edesc *edesc;
@@ -3033,11 +3001,11 @@ static int aead_givencrypt(struct aead_givcrypt_request *areq)
 #endif
 
 	desc = edesc->hw_desc;
-	ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req);
+	ret = caam_jr_enqueue(jrdev, desc, old_aead_encrypt_done, req);
 	if (!ret) {
 		ret = -EINPROGRESS;
 	} else {
-		aead_unmap(jrdev, edesc, req);
+		old_aead_unmap(jrdev, edesc, req);
 		kfree(edesc);
 	}
 
@@ -3046,7 +3014,7 @@ static int aead_givencrypt(struct aead_givcrypt_request *areq)
 
 static int aead_null_givencrypt(struct aead_givcrypt_request *areq)
 {
-	return aead_encrypt(&areq->areq);
+	return old_aead_encrypt(&areq->areq);
 }
 
 /*
@@ -3379,11 +3347,7 @@ struct caam_alg_template {
 	u32 type;
 	union {
 		struct ablkcipher_alg ablkcipher;
-		struct aead_alg aead;
-		struct blkcipher_alg blkcipher;
-		struct cipher_alg cipher;
-		struct compress_alg compress;
-		struct rng_alg rng;
+		struct old_aead_alg aead;
 	} template_u;
 	u32 class1_alg_type;
 	u32 class2_alg_type;
@@ -3400,8 +3364,8 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
 			.givencrypt = aead_null_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = NULL_IV_SIZE,
@@ -3419,8 +3383,8 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
 			.givencrypt = aead_null_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = NULL_IV_SIZE,
@@ -3438,8 +3402,8 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
 			.givencrypt = aead_null_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = NULL_IV_SIZE,
@@ -3458,8 +3422,8 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
 			.givencrypt = aead_null_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = NULL_IV_SIZE,
@@ -3478,8 +3442,8 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
 			.givencrypt = aead_null_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = NULL_IV_SIZE,
@@ -3498,8 +3462,8 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
 			.givencrypt = aead_null_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = NULL_IV_SIZE,
@@ -3518,9 +3482,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
@@ -3537,9 +3501,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
@@ -3556,9 +3520,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
@@ -3576,9 +3540,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
@@ -3596,9 +3560,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
@@ -3617,9 +3581,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
@@ -3637,9 +3601,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
@@ -3656,9 +3620,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
@@ -3675,9 +3639,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
@@ -3695,9 +3659,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
@@ -3715,9 +3679,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
@@ -3735,9 +3699,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
@@ -3755,9 +3719,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
@@ -3774,9 +3738,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
@@ -3793,9 +3757,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
@@ -3813,9 +3777,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
@@ -3833,9 +3797,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
@@ -3853,9 +3817,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
@@ -3873,9 +3837,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
@@ -3892,9 +3856,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
@@ -3911,9 +3875,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
@@ -3931,9 +3895,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
@@ -3951,9 +3915,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
@@ -3971,9 +3935,9 @@ static struct caam_alg_template driver_algs[] = {
 		.template_aead = {
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
+			.encrypt = old_aead_encrypt,
+			.decrypt = old_aead_decrypt,
+			.givencrypt = old_aead_givencrypt,
 			.geniv = "<built-in>",
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
@@ -3983,58 +3947,6 @@ static struct caam_alg_template driver_algs[] = {
 				   OP_ALG_AAI_HMAC_PRECOMP,
 		.alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
 	},
-	{
-		.name = "rfc4106(gcm(aes))",
-		.driver_name = "rfc4106-gcm-aes-caam",
-		.blocksize = 1,
-		.type = CRYPTO_ALG_TYPE_AEAD,
-		.template_aead = {
-			.setkey = rfc4106_setkey,
-			.setauthsize = rfc4106_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
-			.geniv = "<built-in>",
-			.ivsize = 8,
-			.maxauthsize = AES_BLOCK_SIZE,
-			},
-		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
-	},
-	{
-		.name = "rfc4543(gcm(aes))",
-		.driver_name = "rfc4543-gcm-aes-caam",
-		.blocksize = 1,
-		.type = CRYPTO_ALG_TYPE_AEAD,
-		.template_aead = {
-			.setkey = rfc4543_setkey,
-			.setauthsize = rfc4543_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = aead_givencrypt,
-			.geniv = "<built-in>",
-			.ivsize = 8,
-			.maxauthsize = AES_BLOCK_SIZE,
-			},
-		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
-	},
-	/* Galois Counter Mode */
-	{
-		.name = "gcm(aes)",
-		.driver_name = "gcm-aes-caam",
-		.blocksize = 1,
-		.type = CRYPTO_ALG_TYPE_AEAD,
-		.template_aead = {
-			.setkey = gcm_setkey,
-			.setauthsize = gcm_setauthsize,
-			.encrypt = aead_encrypt,
-			.decrypt = aead_decrypt,
-			.givencrypt = NULL,
-			.geniv = "<built-in>",
-			.ivsize = 12,
-			.maxauthsize = AES_BLOCK_SIZE,
-			},
-		.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
-	},
 	/* ablkcipher descriptor */
 	{
 		.name = "cbc(aes)",
@@ -4124,21 +4036,84 @@ static struct caam_alg_template driver_algs[] = {
 	}
 };
 
-struct caam_crypto_alg {
-	struct list_head entry;
+struct caam_alg_entry {
 	int class1_alg_type;
 	int class2_alg_type;
 	int alg_op;
+};
+
+struct caam_aead_alg {
+	struct aead_alg aead;
+	struct caam_alg_entry caam;
+	bool registered;
+};
+
+static struct caam_aead_alg driver_aeads[] = {
+	{
+		.aead = {
+			.base = {
+				.cra_name = "rfc4106(gcm(aes))",
+				.cra_driver_name = "rfc4106-gcm-aes-caam",
+				.cra_blocksize = 1,
+			},
+			.setkey = rfc4106_setkey,
+			.setauthsize = rfc4106_setauthsize,
+			.encrypt = gcm_encrypt,
+			.decrypt = gcm_decrypt,
+			.ivsize = 8,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		},
+	},
+	{
+		.aead = {
+			.base = {
+				.cra_name = "rfc4543(gcm(aes))",
+				.cra_driver_name = "rfc4543-gcm-aes-caam",
+				.cra_blocksize = 1,
+			},
+			.setkey = rfc4543_setkey,
+			.setauthsize = rfc4543_setauthsize,
+			.encrypt = gcm_encrypt,
+			.decrypt = gcm_decrypt,
+			.ivsize = 8,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		},
+	},
+	/* Galois Counter Mode */
+	{
+		.aead = {
+			.base = {
+				.cra_name = "gcm(aes)",
+				.cra_driver_name = "gcm-aes-caam",
+				.cra_blocksize = 1,
+			},
+			.setkey = gcm_setkey,
+			.setauthsize = gcm_setauthsize,
+			.encrypt = gcm_encrypt,
+			.decrypt = gcm_decrypt,
+			.ivsize = 12,
+			.maxauthsize = AES_BLOCK_SIZE,
+		},
+		.caam = {
+			.class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM,
+		},
+	},
+};
+
+struct caam_crypto_alg {
 	struct crypto_alg crypto_alg;
+	struct list_head entry;
+	struct caam_alg_entry caam;
 };
 
-static int caam_cra_init(struct crypto_tfm *tfm)
+static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
-	struct caam_crypto_alg *caam_alg =
-		 container_of(alg, struct caam_crypto_alg, crypto_alg);
-	struct caam_ctx *ctx = crypto_tfm_ctx(tfm);
-
 	ctx->jrdev = caam_jr_alloc();
 	if (IS_ERR(ctx->jrdev)) {
 		pr_err("Job Ring Device allocation for transform failed\n");
@@ -4146,17 +4121,35 @@ static int caam_cra_init(struct crypto_tfm *tfm)
 	}
 
 	/* copy descriptor header template value */
-	ctx->class1_alg_type = OP_TYPE_CLASS1_ALG | caam_alg->class1_alg_type;
-	ctx->class2_alg_type = OP_TYPE_CLASS2_ALG | caam_alg->class2_alg_type;
-	ctx->alg_op = OP_TYPE_CLASS2_ALG | caam_alg->alg_op;
+	ctx->class1_alg_type = OP_TYPE_CLASS1_ALG | caam->class1_alg_type;
+	ctx->class2_alg_type = OP_TYPE_CLASS2_ALG | caam->class2_alg_type;
+	ctx->alg_op = OP_TYPE_CLASS2_ALG | caam->alg_op;
 
 	return 0;
 }
 
-static void caam_cra_exit(struct crypto_tfm *tfm)
+static int caam_cra_init(struct crypto_tfm *tfm)
 {
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct caam_crypto_alg *caam_alg =
+		 container_of(alg, struct caam_crypto_alg, crypto_alg);
 	struct caam_ctx *ctx = crypto_tfm_ctx(tfm);
 
+	return caam_init_common(ctx, &caam_alg->caam);
+}
+
+static int caam_aead_init(struct crypto_aead *tfm)
+{
+	struct aead_alg *alg = crypto_aead_alg(tfm);
+	struct caam_aead_alg *caam_alg =
+		 container_of(alg, struct caam_aead_alg, aead);
+	struct caam_ctx *ctx = crypto_aead_ctx(tfm);
+
+	return caam_init_common(ctx, &caam_alg->caam);
+}
+
+static void caam_exit_common(struct caam_ctx *ctx)
+{
 	if (ctx->sh_desc_enc_dma &&
 	    !dma_mapping_error(ctx->jrdev, ctx->sh_desc_enc_dma))
 		dma_unmap_single(ctx->jrdev, ctx->sh_desc_enc_dma,
@@ -4179,10 +4172,28 @@ static void caam_cra_exit(struct crypto_tfm *tfm)
 	caam_jr_free(ctx->jrdev);
 }
 
+static void caam_cra_exit(struct crypto_tfm *tfm)
+{
+	caam_exit_common(crypto_tfm_ctx(tfm));
+}
+
+static void caam_aead_exit(struct crypto_aead *tfm)
+{
+	caam_exit_common(crypto_aead_ctx(tfm));
+}
+
 static void __exit caam_algapi_exit(void)
 {
 
 	struct caam_crypto_alg *t_alg, *n;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(driver_aeads); i++) {
+		struct caam_aead_alg *t_alg = driver_aeads + i;
+
+		if (t_alg->registered)
+			crypto_unregister_aead(&t_alg->aead);
+	}
 
 	if (!alg_list.next)
 		return;
@@ -4235,13 +4246,26 @@ static struct caam_crypto_alg *caam_alg_alloc(struct caam_alg_template
 		break;
 	}
 
-	t_alg->class1_alg_type = template->class1_alg_type;
-	t_alg->class2_alg_type = template->class2_alg_type;
-	t_alg->alg_op = template->alg_op;
+	t_alg->caam.class1_alg_type = template->class1_alg_type;
+	t_alg->caam.class2_alg_type = template->class2_alg_type;
+	t_alg->caam.alg_op = template->alg_op;
 
 	return t_alg;
 }
 
+static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
+{
+	struct aead_alg *alg = &t_alg->aead;
+
+	alg->base.cra_module = THIS_MODULE;
+	alg->base.cra_priority = CAAM_CRA_PRIORITY;
+	alg->base.cra_ctxsize = sizeof(struct caam_ctx);
+	alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
+
+	alg->init = caam_aead_init;
+	alg->exit = caam_aead_exit;
+}
+
 static int __init caam_algapi_init(void)
 {
 	struct device_node *dev_node;
@@ -4249,6 +4273,7 @@ static int __init caam_algapi_init(void)
 	struct device *ctrldev;
 	void *priv;
 	int i = 0, err = 0;
+	bool registered = false;
 
 	dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
 	if (!dev_node) {
@@ -4295,10 +4320,30 @@ static int __init caam_algapi_init(void)
 			pr_warn("%s alg registration failed\n",
 				t_alg->crypto_alg.cra_driver_name);
 			kfree(t_alg);
-		} else
-			list_add_tail(&t_alg->entry, &alg_list);
+			continue;
+		}
+
+		list_add_tail(&t_alg->entry, &alg_list);
+		registered = true;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(driver_aeads); i++) {
+		struct caam_aead_alg *t_alg = driver_aeads + i;
+
+		caam_aead_alg_init(t_alg);
+
+		err = crypto_register_aead(&t_alg->aead);
+		if (err) {
+			pr_warn("%s alg registration failed\n",
+				t_alg->aead.base.cra_driver_name);
+			continue;
+		}
+
+		t_alg->registered = true;
+		registered = true;
 	}
-	if (!list_empty(&alg_list))
+
+	if (registered)
 		pr_info("caam algorithms registered in /proc/crypto\n");
 
 	return err;

+ 5 - 4
drivers/crypto/caam/caamhash.c

@@ -835,17 +835,17 @@ static int ahash_update_ctx(struct ahash_request *req)
 			src_map_to_sec4_sg(jrdev, req->src, src_nents,
 					   edesc->sec4_sg + sec4_sg_src_index,
 					   chained);
-			if (*next_buflen) {
+			if (*next_buflen)
 				scatterwalk_map_and_copy(next_buf, req->src,
 							 to_hash - *buflen,
 							 *next_buflen, 0);
-				state->current_buf = !state->current_buf;
-			}
 		} else {
 			(edesc->sec4_sg + sec4_sg_src_index - 1)->len |=
 							SEC4_SG_LEN_FIN;
 		}
 
+		state->current_buf = !state->current_buf;
+
 		sh_len = desc_len(sh_desc);
 		desc = edesc->hw_desc;
 		init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER |
@@ -1268,9 +1268,10 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 			scatterwalk_map_and_copy(next_buf, req->src,
 						 to_hash - *buflen,
 						 *next_buflen, 0);
-			state->current_buf = !state->current_buf;
 		}
 
+		state->current_buf = !state->current_buf;
+
 		sh_len = desc_len(sh_desc);
 		desc = edesc->hw_desc;
 		init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER |

+ 1 - 1
drivers/crypto/caam/compat.h

@@ -32,7 +32,7 @@
 #include <crypto/des.h>
 #include <crypto/sha.h>
 #include <crypto/md5.h>
-#include <crypto/aead.h>
+#include <crypto/internal/aead.h>
 #include <crypto/authenc.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/skcipher.h>

+ 2 - 2
drivers/crypto/caam/ctrl.c

@@ -301,7 +301,7 @@ static int caam_remove(struct platform_device *pdev)
 #endif
 
 	/* Unmap controller region */
-	iounmap(&ctrl);
+	iounmap(ctrl);
 
 	return ret;
 }
@@ -496,7 +496,7 @@ static int caam_probe(struct platform_device *pdev)
 					sizeof(struct platform_device *) * rspec,
 					GFP_KERNEL);
 	if (ctrlpriv->jrpdev == NULL) {
-		iounmap(&ctrl);
+		iounmap(ctrl);
 		return -ENOMEM;
 	}
 

+ 19 - 19
drivers/crypto/caam/regs.h

@@ -83,35 +83,35 @@
 #endif
 #endif
 
+/*
+ * The only users of these wr/rd_reg64 functions is the Job Ring (JR).
+ * The DMA address registers in the JR are a pair of 32-bit registers.
+ * The layout is:
+ *
+ *    base + 0x0000 : most-significant 32 bits
+ *    base + 0x0004 : least-significant 32 bits
+ *
+ * The 32-bit version of this core therefore has to write to base + 0x0004
+ * to set the 32-bit wide DMA address. This seems to be independent of the
+ * endianness of the written/read data.
+ */
+
 #ifndef CONFIG_64BIT
-#ifdef __BIG_ENDIAN
-static inline void wr_reg64(u64 __iomem *reg, u64 data)
-{
-	wr_reg32((u32 __iomem *)reg, (data & 0xffffffff00000000ull) >> 32);
-	wr_reg32((u32 __iomem *)reg + 1, data & 0x00000000ffffffffull);
-}
+#define REG64_MS32(reg) ((u32 __iomem *)(reg))
+#define REG64_LS32(reg) ((u32 __iomem *)(reg) + 1)
 
-static inline u64 rd_reg64(u64 __iomem *reg)
-{
-	return (((u64)rd_reg32((u32 __iomem *)reg)) << 32) |
-		((u64)rd_reg32((u32 __iomem *)reg + 1));
-}
-#else
-#ifdef __LITTLE_ENDIAN
 static inline void wr_reg64(u64 __iomem *reg, u64 data)
 {
-	wr_reg32((u32 __iomem *)reg + 1, (data & 0xffffffff00000000ull) >> 32);
-	wr_reg32((u32 __iomem *)reg, data & 0x00000000ffffffffull);
+	wr_reg32(REG64_MS32(reg), data >> 32);
+	wr_reg32(REG64_LS32(reg), data);
 }
 
 static inline u64 rd_reg64(u64 __iomem *reg)
 {
-	return (((u64)rd_reg32((u32 __iomem *)reg + 1)) << 32) |
-		((u64)rd_reg32((u32 __iomem *)reg));
+	return ((u64)rd_reg32(REG64_MS32(reg)) << 32 |
+		(u64)rd_reg32(REG64_LS32(reg)));
 }
 #endif
-#endif
-#endif
 
 /*
  * jr_outentry

+ 36 - 14
drivers/crypto/caam/sg_sw_sec4.h

@@ -55,6 +55,21 @@ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count,
 	sec4_sg_ptr->len |= SEC4_SG_LEN_FIN;
 }
 
+static inline struct sec4_sg_entry *sg_to_sec4_sg_len(
+	struct scatterlist *sg, unsigned int total,
+	struct sec4_sg_entry *sec4_sg_ptr)
+{
+	do {
+		unsigned int len = min(sg_dma_len(sg), total);
+
+		dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg), len, 0);
+		sec4_sg_ptr++;
+		sg = sg_next(sg);
+		total -= len;
+	} while (total);
+	return sec4_sg_ptr - 1;
+}
+
 /* count number of elements in scatterlist */
 static inline int __sg_count(struct scatterlist *sg_list, int nbytes,
 			     bool *chained)
@@ -85,34 +100,41 @@ static inline int sg_count(struct scatterlist *sg_list, int nbytes,
 	return sg_nents;
 }
 
-static int dma_map_sg_chained(struct device *dev, struct scatterlist *sg,
-			      unsigned int nents, enum dma_data_direction dir,
-			      bool chained)
+static inline void dma_unmap_sg_chained(
+	struct device *dev, struct scatterlist *sg, unsigned int nents,
+	enum dma_data_direction dir, bool chained)
 {
 	if (unlikely(chained)) {
 		int i;
 		for (i = 0; i < nents; i++) {
-			dma_map_sg(dev, sg, 1, dir);
+			dma_unmap_sg(dev, sg, 1, dir);
 			sg = sg_next(sg);
 		}
-	} else {
-		dma_map_sg(dev, sg, nents, dir);
+	} else if (nents) {
+		dma_unmap_sg(dev, sg, nents, dir);
 	}
-	return nents;
 }
 
-static int dma_unmap_sg_chained(struct device *dev, struct scatterlist *sg,
-				unsigned int nents, enum dma_data_direction dir,
-				bool chained)
+static inline int dma_map_sg_chained(
+	struct device *dev, struct scatterlist *sg, unsigned int nents,
+	enum dma_data_direction dir, bool chained)
 {
+	struct scatterlist *first = sg;
+
 	if (unlikely(chained)) {
 		int i;
 		for (i = 0; i < nents; i++) {
-			dma_unmap_sg(dev, sg, 1, dir);
+			if (!dma_map_sg(dev, sg, 1, dir)) {
+				dma_unmap_sg_chained(dev, first, i, dir,
+						     chained);
+				nents = 0;
+				break;
+			}
+
 			sg = sg_next(sg);
 		}
-	} else {
-		dma_unmap_sg(dev, sg, nents, dir);
-	}
+	} else
+		nents = dma_map_sg(dev, sg, nents, dir);
+
 	return nents;
 }

+ 0 - 1
drivers/crypto/ccp/Kconfig

@@ -13,7 +13,6 @@ config CRYPTO_DEV_CCP_CRYPTO
 	tristate "Encryption and hashing acceleration support"
 	depends on CRYPTO_DEV_CCP_DD
 	default m
-	select CRYPTO_ALGAPI
 	select CRYPTO_HASH
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AUTHENC

+ 5 - 4
drivers/crypto/ccp/ccp-ops.c

@@ -52,8 +52,7 @@ struct ccp_dm_workarea {
 
 struct ccp_sg_workarea {
 	struct scatterlist *sg;
-	unsigned int nents;
-	unsigned int length;
+	int nents;
 
 	struct scatterlist *dma_sg;
 	struct device *dma_dev;
@@ -496,8 +495,10 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
 	if (!sg)
 		return 0;
 
-	wa->nents = sg_nents(sg);
-	wa->length = sg->length;
+	wa->nents = sg_nents_for_len(sg, len);
+	if (wa->nents < 0)
+		return wa->nents;
+
 	wa->bytes_left = len;
 	wa->sg_used = 0;
 

+ 0 - 2
drivers/crypto/ccp/ccp-platform.c

@@ -174,8 +174,6 @@ static int ccp_platform_probe(struct platform_device *pdev)
 	}
 	ccp->io_regs = ccp->io_map;
 
-	if (!dev->dma_mask)
-		dev->dma_mask = &dev->coherent_dma_mask;
 	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
 	if (ret) {
 		dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);

+ 5 - 4
drivers/crypto/ixp4xx_crypto.c

@@ -25,7 +25,7 @@
 #include <crypto/aes.h>
 #include <crypto/sha.h>
 #include <crypto/algapi.h>
-#include <crypto/aead.h>
+#include <crypto/internal/aead.h>
 #include <crypto/authenc.h>
 #include <crypto/scatterwalk.h>
 
@@ -575,7 +575,8 @@ static int init_tfm_ablk(struct crypto_tfm *tfm)
 
 static int init_tfm_aead(struct crypto_tfm *tfm)
 {
-	tfm->crt_aead.reqsize = sizeof(struct aead_ctx);
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+				sizeof(struct aead_ctx));
 	return init_tfm(tfm);
 }
 
@@ -1096,7 +1097,7 @@ static int aead_setup(struct crypto_aead *tfm, unsigned int authsize)
 {
 	struct ixp_ctx *ctx = crypto_aead_ctx(tfm);
 	u32 *flags = &tfm->base.crt_flags;
-	unsigned digest_len = crypto_aead_alg(tfm)->maxauthsize;
+	unsigned digest_len = crypto_aead_maxauthsize(tfm);
 	int ret;
 
 	if (!ctx->enckey_len && !ctx->authkey_len)
@@ -1138,7 +1139,7 @@ out:
 
 static int aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
 {
-	int max = crypto_aead_alg(tfm)->maxauthsize >> 2;
+	int max = crypto_aead_maxauthsize(tfm) >> 2;
 
 	if ((authsize>>2) < 1 || (authsize>>2) > max || (authsize & 3))
 		return -EINVAL;

+ 2 - 0
drivers/crypto/marvell/Makefile

@@ -0,0 +1,2 @@
+obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell-cesa.o
+marvell-cesa-objs := cesa.o cipher.o hash.o tdma.o

+ 548 - 0
drivers/crypto/marvell/cesa.c

@@ -0,0 +1,548 @@
+/*
+ * Support for Marvell's Cryptographic Engine and Security Accelerator (CESA)
+ * that can be found on the following platform: Orion, Kirkwood, Armada. This
+ * driver supports the TDMA engine on platforms on which it is available.
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ * Author: Arnaud Ebalard <arno@natisbad.org>
+ *
+ * This work is based on an initial version written by
+ * Sebastian Andrzej Siewior < sebastian at breakpoint dot cc >
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/genalloc.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kthread.h>
+#include <linux/mbus.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+
+#include "cesa.h"
+
+static int allhwsupport = !IS_ENABLED(CONFIG_CRYPTO_DEV_MV_CESA);
+module_param_named(allhwsupport, allhwsupport, int, 0444);
+MODULE_PARM_DESC(allhwsupport, "Enable support for all hardware (even it if overlaps with the mv_cesa driver)");
+
+struct mv_cesa_dev *cesa_dev;
+
+static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine)
+{
+	struct crypto_async_request *req, *backlog;
+	struct mv_cesa_ctx *ctx;
+
+	spin_lock_bh(&cesa_dev->lock);
+	backlog = crypto_get_backlog(&cesa_dev->queue);
+	req = crypto_dequeue_request(&cesa_dev->queue);
+	engine->req = req;
+	spin_unlock_bh(&cesa_dev->lock);
+
+	if (!req)
+		return;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	ctx = crypto_tfm_ctx(req->tfm);
+	ctx->ops->prepare(req, engine);
+	ctx->ops->step(req);
+}
+
+static irqreturn_t mv_cesa_int(int irq, void *priv)
+{
+	struct mv_cesa_engine *engine = priv;
+	struct crypto_async_request *req;
+	struct mv_cesa_ctx *ctx;
+	u32 status, mask;
+	irqreturn_t ret = IRQ_NONE;
+
+	while (true) {
+		int res;
+
+		mask = mv_cesa_get_int_mask(engine);
+		status = readl(engine->regs + CESA_SA_INT_STATUS);
+
+		if (!(status & mask))
+			break;
+
+		/*
+		 * TODO: avoid clearing the FPGA_INT_STATUS if this not
+		 * relevant on some platforms.
+		 */
+		writel(~status, engine->regs + CESA_SA_FPGA_INT_STATUS);
+		writel(~status, engine->regs + CESA_SA_INT_STATUS);
+
+		ret = IRQ_HANDLED;
+		spin_lock_bh(&engine->lock);
+		req = engine->req;
+		spin_unlock_bh(&engine->lock);
+		if (req) {
+			ctx = crypto_tfm_ctx(req->tfm);
+			res = ctx->ops->process(req, status & mask);
+			if (res != -EINPROGRESS) {
+				spin_lock_bh(&engine->lock);
+				engine->req = NULL;
+				mv_cesa_dequeue_req_unlocked(engine);
+				spin_unlock_bh(&engine->lock);
+				ctx->ops->cleanup(req);
+				local_bh_disable();
+				req->complete(req, res);
+				local_bh_enable();
+			} else {
+				ctx->ops->step(req);
+			}
+		}
+	}
+
+	return ret;
+}
+
+int mv_cesa_queue_req(struct crypto_async_request *req)
+{
+	int ret;
+	int i;
+
+	spin_lock_bh(&cesa_dev->lock);
+	ret = crypto_enqueue_request(&cesa_dev->queue, req);
+	spin_unlock_bh(&cesa_dev->lock);
+
+	if (ret != -EINPROGRESS)
+		return ret;
+
+	for (i = 0; i < cesa_dev->caps->nengines; i++) {
+		spin_lock_bh(&cesa_dev->engines[i].lock);
+		if (!cesa_dev->engines[i].req)
+			mv_cesa_dequeue_req_unlocked(&cesa_dev->engines[i]);
+		spin_unlock_bh(&cesa_dev->engines[i].lock);
+	}
+
+	return -EINPROGRESS;
+}
+
+static int mv_cesa_add_algs(struct mv_cesa_dev *cesa)
+{
+	int ret;
+	int i, j;
+
+	for (i = 0; i < cesa->caps->ncipher_algs; i++) {
+		ret = crypto_register_alg(cesa->caps->cipher_algs[i]);
+		if (ret)
+			goto err_unregister_crypto;
+	}
+
+	for (i = 0; i < cesa->caps->nahash_algs; i++) {
+		ret = crypto_register_ahash(cesa->caps->ahash_algs[i]);
+		if (ret)
+			goto err_unregister_ahash;
+	}
+
+	return 0;
+
+err_unregister_ahash:
+	for (j = 0; j < i; j++)
+		crypto_unregister_ahash(cesa->caps->ahash_algs[j]);
+	i = cesa->caps->ncipher_algs;
+
+err_unregister_crypto:
+	for (j = 0; j < i; j++)
+		crypto_unregister_alg(cesa->caps->cipher_algs[j]);
+
+	return ret;
+}
+
+static void mv_cesa_remove_algs(struct mv_cesa_dev *cesa)
+{
+	int i;
+
+	for (i = 0; i < cesa->caps->nahash_algs; i++)
+		crypto_unregister_ahash(cesa->caps->ahash_algs[i]);
+
+	for (i = 0; i < cesa->caps->ncipher_algs; i++)
+		crypto_unregister_alg(cesa->caps->cipher_algs[i]);
+}
+
+static struct crypto_alg *orion_cipher_algs[] = {
+	&mv_cesa_ecb_des_alg,
+	&mv_cesa_cbc_des_alg,
+	&mv_cesa_ecb_des3_ede_alg,
+	&mv_cesa_cbc_des3_ede_alg,
+	&mv_cesa_ecb_aes_alg,
+	&mv_cesa_cbc_aes_alg,
+};
+
+static struct ahash_alg *orion_ahash_algs[] = {
+	&mv_md5_alg,
+	&mv_sha1_alg,
+	&mv_ahmac_md5_alg,
+	&mv_ahmac_sha1_alg,
+};
+
+static struct crypto_alg *armada_370_cipher_algs[] = {
+	&mv_cesa_ecb_des_alg,
+	&mv_cesa_cbc_des_alg,
+	&mv_cesa_ecb_des3_ede_alg,
+	&mv_cesa_cbc_des3_ede_alg,
+	&mv_cesa_ecb_aes_alg,
+	&mv_cesa_cbc_aes_alg,
+};
+
+static struct ahash_alg *armada_370_ahash_algs[] = {
+	&mv_md5_alg,
+	&mv_sha1_alg,
+	&mv_sha256_alg,
+	&mv_ahmac_md5_alg,
+	&mv_ahmac_sha1_alg,
+	&mv_ahmac_sha256_alg,
+};
+
+static const struct mv_cesa_caps orion_caps = {
+	.nengines = 1,
+	.cipher_algs = orion_cipher_algs,
+	.ncipher_algs = ARRAY_SIZE(orion_cipher_algs),
+	.ahash_algs = orion_ahash_algs,
+	.nahash_algs = ARRAY_SIZE(orion_ahash_algs),
+	.has_tdma = false,
+};
+
+static const struct mv_cesa_caps kirkwood_caps = {
+	.nengines = 1,
+	.cipher_algs = orion_cipher_algs,
+	.ncipher_algs = ARRAY_SIZE(orion_cipher_algs),
+	.ahash_algs = orion_ahash_algs,
+	.nahash_algs = ARRAY_SIZE(orion_ahash_algs),
+	.has_tdma = true,
+};
+
+static const struct mv_cesa_caps armada_370_caps = {
+	.nengines = 1,
+	.cipher_algs = armada_370_cipher_algs,
+	.ncipher_algs = ARRAY_SIZE(armada_370_cipher_algs),
+	.ahash_algs = armada_370_ahash_algs,
+	.nahash_algs = ARRAY_SIZE(armada_370_ahash_algs),
+	.has_tdma = true,
+};
+
+static const struct mv_cesa_caps armada_xp_caps = {
+	.nengines = 2,
+	.cipher_algs = armada_370_cipher_algs,
+	.ncipher_algs = ARRAY_SIZE(armada_370_cipher_algs),
+	.ahash_algs = armada_370_ahash_algs,
+	.nahash_algs = ARRAY_SIZE(armada_370_ahash_algs),
+	.has_tdma = true,
+};
+
+static const struct of_device_id mv_cesa_of_match_table[] = {
+	{ .compatible = "marvell,orion-crypto", .data = &orion_caps },
+	{ .compatible = "marvell,kirkwood-crypto", .data = &kirkwood_caps },
+	{ .compatible = "marvell,dove-crypto", .data = &kirkwood_caps },
+	{ .compatible = "marvell,armada-370-crypto", .data = &armada_370_caps },
+	{ .compatible = "marvell,armada-xp-crypto", .data = &armada_xp_caps },
+	{ .compatible = "marvell,armada-375-crypto", .data = &armada_xp_caps },
+	{ .compatible = "marvell,armada-38x-crypto", .data = &armada_xp_caps },
+	{}
+};
+MODULE_DEVICE_TABLE(of, mv_cesa_of_match_table);
+
+static void
+mv_cesa_conf_mbus_windows(struct mv_cesa_engine *engine,
+			  const struct mbus_dram_target_info *dram)
+{
+	void __iomem *iobase = engine->regs;
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		writel(0, iobase + CESA_TDMA_WINDOW_CTRL(i));
+		writel(0, iobase + CESA_TDMA_WINDOW_BASE(i));
+	}
+
+	for (i = 0; i < dram->num_cs; i++) {
+		const struct mbus_dram_window *cs = dram->cs + i;
+
+		writel(((cs->size - 1) & 0xffff0000) |
+		       (cs->mbus_attr << 8) |
+		       (dram->mbus_dram_target_id << 4) | 1,
+		       iobase + CESA_TDMA_WINDOW_CTRL(i));
+		writel(cs->base, iobase + CESA_TDMA_WINDOW_BASE(i));
+	}
+}
+
+static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa)
+{
+	struct device *dev = cesa->dev;
+	struct mv_cesa_dev_dma *dma;
+
+	if (!cesa->caps->has_tdma)
+		return 0;
+
+	dma = devm_kzalloc(dev, sizeof(*dma), GFP_KERNEL);
+	if (!dma)
+		return -ENOMEM;
+
+	dma->tdma_desc_pool = dmam_pool_create("tdma_desc", dev,
+					sizeof(struct mv_cesa_tdma_desc),
+					16, 0);
+	if (!dma->tdma_desc_pool)
+		return -ENOMEM;
+
+	dma->op_pool = dmam_pool_create("cesa_op", dev,
+					sizeof(struct mv_cesa_op_ctx), 16, 0);
+	if (!dma->op_pool)
+		return -ENOMEM;
+
+	dma->cache_pool = dmam_pool_create("cesa_cache", dev,
+					   CESA_MAX_HASH_BLOCK_SIZE, 1, 0);
+	if (!dma->cache_pool)
+		return -ENOMEM;
+
+	dma->padding_pool = dmam_pool_create("cesa_padding", dev, 72, 1, 0);
+	if (!dma->cache_pool)
+		return -ENOMEM;
+
+	cesa->dma = dma;
+
+	return 0;
+}
+
+static int mv_cesa_get_sram(struct platform_device *pdev, int idx)
+{
+	struct mv_cesa_dev *cesa = platform_get_drvdata(pdev);
+	struct mv_cesa_engine *engine = &cesa->engines[idx];
+	const char *res_name = "sram";
+	struct resource *res;
+
+	engine->pool = of_get_named_gen_pool(cesa->dev->of_node,
+					     "marvell,crypto-srams",
+					     idx);
+	if (engine->pool) {
+		engine->sram = gen_pool_dma_alloc(engine->pool,
+						  cesa->sram_size,
+						  &engine->sram_dma);
+		if (engine->sram)
+			return 0;
+
+		engine->pool = NULL;
+		return -ENOMEM;
+	}
+
+	if (cesa->caps->nengines > 1) {
+		if (!idx)
+			res_name = "sram0";
+		else
+			res_name = "sram1";
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   res_name);
+	if (!res || resource_size(res) < cesa->sram_size)
+		return -EINVAL;
+
+	engine->sram = devm_ioremap_resource(cesa->dev, res);
+	if (IS_ERR(engine->sram))
+		return PTR_ERR(engine->sram);
+
+	engine->sram_dma = phys_to_dma(cesa->dev,
+				       (phys_addr_t)res->start);
+
+	return 0;
+}
+
+static void mv_cesa_put_sram(struct platform_device *pdev, int idx)
+{
+	struct mv_cesa_dev *cesa = platform_get_drvdata(pdev);
+	struct mv_cesa_engine *engine = &cesa->engines[idx];
+
+	if (!engine->pool)
+		return;
+
+	gen_pool_free(engine->pool, (unsigned long)engine->sram,
+		      cesa->sram_size);
+}
+
+static int mv_cesa_probe(struct platform_device *pdev)
+{
+	const struct mv_cesa_caps *caps = &orion_caps;
+	const struct mbus_dram_target_info *dram;
+	const struct of_device_id *match;
+	struct device *dev = &pdev->dev;
+	struct mv_cesa_dev *cesa;
+	struct mv_cesa_engine *engines;
+	struct resource *res;
+	int irq, ret, i;
+	u32 sram_size;
+
+	if (cesa_dev) {
+		dev_err(&pdev->dev, "Only one CESA device authorized\n");
+		return -EEXIST;
+	}
+
+	if (dev->of_node) {
+		match = of_match_node(mv_cesa_of_match_table, dev->of_node);
+		if (!match || !match->data)
+			return -ENOTSUPP;
+
+		caps = match->data;
+	}
+
+	if ((caps == &orion_caps || caps == &kirkwood_caps) && !allhwsupport)
+		return -ENOTSUPP;
+
+	cesa = devm_kzalloc(dev, sizeof(*cesa), GFP_KERNEL);
+	if (!cesa)
+		return -ENOMEM;
+
+	cesa->caps = caps;
+	cesa->dev = dev;
+
+	sram_size = CESA_SA_DEFAULT_SRAM_SIZE;
+	of_property_read_u32(cesa->dev->of_node, "marvell,crypto-sram-size",
+			     &sram_size);
+	if (sram_size < CESA_SA_MIN_SRAM_SIZE)
+		sram_size = CESA_SA_MIN_SRAM_SIZE;
+
+	cesa->sram_size = sram_size;
+	cesa->engines = devm_kzalloc(dev, caps->nengines * sizeof(*engines),
+				     GFP_KERNEL);
+	if (!cesa->engines)
+		return -ENOMEM;
+
+	spin_lock_init(&cesa->lock);
+	crypto_init_queue(&cesa->queue, 50);
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
+	cesa->regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(cesa->regs))
+		return -ENOMEM;
+
+	ret = mv_cesa_dev_dma_init(cesa);
+	if (ret)
+		return ret;
+
+	dram = mv_mbus_dram_info_nooverlap();
+
+	platform_set_drvdata(pdev, cesa);
+
+	for (i = 0; i < caps->nengines; i++) {
+		struct mv_cesa_engine *engine = &cesa->engines[i];
+		char res_name[7];
+
+		engine->id = i;
+		spin_lock_init(&engine->lock);
+
+		ret = mv_cesa_get_sram(pdev, i);
+		if (ret)
+			goto err_cleanup;
+
+		irq = platform_get_irq(pdev, i);
+		if (irq < 0) {
+			ret = irq;
+			goto err_cleanup;
+		}
+
+		/*
+		 * Not all platforms can gate the CESA clocks: do not complain
+		 * if the clock does not exist.
+		 */
+		snprintf(res_name, sizeof(res_name), "cesa%d", i);
+		engine->clk = devm_clk_get(dev, res_name);
+		if (IS_ERR(engine->clk)) {
+			engine->clk = devm_clk_get(dev, NULL);
+			if (IS_ERR(engine->clk))
+				engine->clk = NULL;
+		}
+
+		snprintf(res_name, sizeof(res_name), "cesaz%d", i);
+		engine->zclk = devm_clk_get(dev, res_name);
+		if (IS_ERR(engine->zclk))
+			engine->zclk = NULL;
+
+		ret = clk_prepare_enable(engine->clk);
+		if (ret)
+			goto err_cleanup;
+
+		ret = clk_prepare_enable(engine->zclk);
+		if (ret)
+			goto err_cleanup;
+
+		engine->regs = cesa->regs + CESA_ENGINE_OFF(i);
+
+		if (dram && cesa->caps->has_tdma)
+			mv_cesa_conf_mbus_windows(&cesa->engines[i], dram);
+
+		writel(0, cesa->engines[i].regs + CESA_SA_INT_STATUS);
+		writel(CESA_SA_CFG_STOP_DIG_ERR,
+		       cesa->engines[i].regs + CESA_SA_CFG);
+		writel(engine->sram_dma & CESA_SA_SRAM_MSK,
+		       cesa->engines[i].regs + CESA_SA_DESC_P0);
+
+		ret = devm_request_threaded_irq(dev, irq, NULL, mv_cesa_int,
+						IRQF_ONESHOT,
+						dev_name(&pdev->dev),
+						&cesa->engines[i]);
+		if (ret)
+			goto err_cleanup;
+	}
+
+	cesa_dev = cesa;
+
+	ret = mv_cesa_add_algs(cesa);
+	if (ret) {
+		cesa_dev = NULL;
+		goto err_cleanup;
+	}
+
+	dev_info(dev, "CESA device successfully registered\n");
+
+	return 0;
+
+err_cleanup:
+	for (i = 0; i < caps->nengines; i++) {
+		clk_disable_unprepare(cesa->engines[i].zclk);
+		clk_disable_unprepare(cesa->engines[i].clk);
+		mv_cesa_put_sram(pdev, i);
+	}
+
+	return ret;
+}
+
+static int mv_cesa_remove(struct platform_device *pdev)
+{
+	struct mv_cesa_dev *cesa = platform_get_drvdata(pdev);
+	int i;
+
+	mv_cesa_remove_algs(cesa);
+
+	for (i = 0; i < cesa->caps->nengines; i++) {
+		clk_disable_unprepare(cesa->engines[i].zclk);
+		clk_disable_unprepare(cesa->engines[i].clk);
+		mv_cesa_put_sram(pdev, i);
+	}
+
+	return 0;
+}
+
+static struct platform_driver marvell_cesa = {
+	.probe		= mv_cesa_probe,
+	.remove		= mv_cesa_remove,
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "marvell-cesa",
+		.of_match_table = mv_cesa_of_match_table,
+	},
+};
+module_platform_driver(marvell_cesa);
+
+MODULE_ALIAS("platform:mv_crypto");
+MODULE_AUTHOR("Boris Brezillon <boris.brezillon@free-electrons.com>");
+MODULE_AUTHOR("Arnaud Ebalard <arno@natisbad.org>");
+MODULE_DESCRIPTION("Support for Marvell's cryptographic engine");
+MODULE_LICENSE("GPL v2");

+ 791 - 0
drivers/crypto/marvell/cesa.h

@@ -0,0 +1,791 @@
+#ifndef __MARVELL_CESA_H__
+#define __MARVELL_CESA_H__
+
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+
+#include <linux/crypto.h>
+#include <linux/dmapool.h>
+
+#define CESA_ENGINE_OFF(i)			(((i) * 0x2000))
+
+#define CESA_TDMA_BYTE_CNT			0x800
+#define CESA_TDMA_SRC_ADDR			0x810
+#define CESA_TDMA_DST_ADDR			0x820
+#define CESA_TDMA_NEXT_ADDR			0x830
+
+#define CESA_TDMA_CONTROL			0x840
+#define CESA_TDMA_DST_BURST			GENMASK(2, 0)
+#define CESA_TDMA_DST_BURST_32B			3
+#define CESA_TDMA_DST_BURST_128B		4
+#define CESA_TDMA_OUT_RD_EN			BIT(4)
+#define CESA_TDMA_SRC_BURST			GENMASK(8, 6)
+#define CESA_TDMA_SRC_BURST_32B			(3 << 6)
+#define CESA_TDMA_SRC_BURST_128B		(4 << 6)
+#define CESA_TDMA_CHAIN				BIT(9)
+#define CESA_TDMA_BYTE_SWAP			BIT(11)
+#define CESA_TDMA_NO_BYTE_SWAP			BIT(11)
+#define CESA_TDMA_EN				BIT(12)
+#define CESA_TDMA_FETCH_ND			BIT(13)
+#define CESA_TDMA_ACT				BIT(14)
+
+#define CESA_TDMA_CUR				0x870
+#define CESA_TDMA_ERROR_CAUSE			0x8c8
+#define CESA_TDMA_ERROR_MSK			0x8cc
+
+#define CESA_TDMA_WINDOW_BASE(x)		(((x) * 0x8) + 0xa00)
+#define CESA_TDMA_WINDOW_CTRL(x)		(((x) * 0x8) + 0xa04)
+
+#define CESA_IVDIG(x)				(0xdd00 + ((x) * 4) +	\
+						 (((x) < 5) ? 0 : 0x14))
+
+#define CESA_SA_CMD				0xde00
+#define CESA_SA_CMD_EN_CESA_SA_ACCL0		BIT(0)
+#define CESA_SA_CMD_EN_CESA_SA_ACCL1		BIT(1)
+#define CESA_SA_CMD_DISABLE_SEC			BIT(2)
+
+#define CESA_SA_DESC_P0				0xde04
+
+#define CESA_SA_DESC_P1				0xde14
+
+#define CESA_SA_CFG				0xde08
+#define CESA_SA_CFG_STOP_DIG_ERR		GENMASK(1, 0)
+#define CESA_SA_CFG_DIG_ERR_CONT		0
+#define CESA_SA_CFG_DIG_ERR_SKIP		1
+#define CESA_SA_CFG_DIG_ERR_STOP		3
+#define CESA_SA_CFG_CH0_W_IDMA			BIT(7)
+#define CESA_SA_CFG_CH1_W_IDMA			BIT(8)
+#define CESA_SA_CFG_ACT_CH0_IDMA		BIT(9)
+#define CESA_SA_CFG_ACT_CH1_IDMA		BIT(10)
+#define CESA_SA_CFG_MULTI_PKT			BIT(11)
+#define CESA_SA_CFG_PARA_DIS			BIT(13)
+
+#define CESA_SA_ACCEL_STATUS			0xde0c
+#define CESA_SA_ST_ACT_0			BIT(0)
+#define CESA_SA_ST_ACT_1			BIT(1)
+
+/*
+ * CESA_SA_FPGA_INT_STATUS looks like a FPGA leftover and is documented only
+ * in Errata 4.12. It looks like that it was part of an IRQ-controller in FPGA
+ * and someone forgot to remove  it while switching to the core and moving to
+ * CESA_SA_INT_STATUS.
+ */
+#define CESA_SA_FPGA_INT_STATUS			0xdd68
+#define CESA_SA_INT_STATUS			0xde20
+#define CESA_SA_INT_AUTH_DONE			BIT(0)
+#define CESA_SA_INT_DES_E_DONE			BIT(1)
+#define CESA_SA_INT_AES_E_DONE			BIT(2)
+#define CESA_SA_INT_AES_D_DONE			BIT(3)
+#define CESA_SA_INT_ENC_DONE			BIT(4)
+#define CESA_SA_INT_ACCEL0_DONE			BIT(5)
+#define CESA_SA_INT_ACCEL1_DONE			BIT(6)
+#define CESA_SA_INT_ACC0_IDMA_DONE		BIT(7)
+#define CESA_SA_INT_ACC1_IDMA_DONE		BIT(8)
+#define CESA_SA_INT_IDMA_DONE			BIT(9)
+#define CESA_SA_INT_IDMA_OWN_ERR		BIT(10)
+
+#define CESA_SA_INT_MSK				0xde24
+
+#define CESA_SA_DESC_CFG_OP_MAC_ONLY		0
+#define CESA_SA_DESC_CFG_OP_CRYPT_ONLY		1
+#define CESA_SA_DESC_CFG_OP_MAC_CRYPT		2
+#define CESA_SA_DESC_CFG_OP_CRYPT_MAC		3
+#define CESA_SA_DESC_CFG_OP_MSK			GENMASK(1, 0)
+#define CESA_SA_DESC_CFG_MACM_SHA256		(1 << 4)
+#define CESA_SA_DESC_CFG_MACM_HMAC_SHA256	(3 << 4)
+#define CESA_SA_DESC_CFG_MACM_MD5		(4 << 4)
+#define CESA_SA_DESC_CFG_MACM_SHA1		(5 << 4)
+#define CESA_SA_DESC_CFG_MACM_HMAC_MD5		(6 << 4)
+#define CESA_SA_DESC_CFG_MACM_HMAC_SHA1		(7 << 4)
+#define CESA_SA_DESC_CFG_MACM_MSK		GENMASK(6, 4)
+#define CESA_SA_DESC_CFG_CRYPTM_DES		(1 << 8)
+#define CESA_SA_DESC_CFG_CRYPTM_3DES		(2 << 8)
+#define CESA_SA_DESC_CFG_CRYPTM_AES		(3 << 8)
+#define CESA_SA_DESC_CFG_CRYPTM_MSK		GENMASK(9, 8)
+#define CESA_SA_DESC_CFG_DIR_ENC		(0 << 12)
+#define CESA_SA_DESC_CFG_DIR_DEC		(1 << 12)
+#define CESA_SA_DESC_CFG_CRYPTCM_ECB		(0 << 16)
+#define CESA_SA_DESC_CFG_CRYPTCM_CBC		(1 << 16)
+#define CESA_SA_DESC_CFG_CRYPTCM_MSK		BIT(16)
+#define CESA_SA_DESC_CFG_3DES_EEE		(0 << 20)
+#define CESA_SA_DESC_CFG_3DES_EDE		(1 << 20)
+#define CESA_SA_DESC_CFG_AES_LEN_128		(0 << 24)
+#define CESA_SA_DESC_CFG_AES_LEN_192		(1 << 24)
+#define CESA_SA_DESC_CFG_AES_LEN_256		(2 << 24)
+#define CESA_SA_DESC_CFG_AES_LEN_MSK		GENMASK(25, 24)
+#define CESA_SA_DESC_CFG_NOT_FRAG		(0 << 30)
+#define CESA_SA_DESC_CFG_FIRST_FRAG		(1 << 30)
+#define CESA_SA_DESC_CFG_LAST_FRAG		(2 << 30)
+#define CESA_SA_DESC_CFG_MID_FRAG		(3 << 30)
+#define CESA_SA_DESC_CFG_FRAG_MSK		GENMASK(31, 30)
+
+/*
+ * /-----------\ 0
+ * | ACCEL CFG |	4 * 8
+ * |-----------| 0x20
+ * | CRYPT KEY |	8 * 4
+ * |-----------| 0x40
+ * |  IV   IN  |	4 * 4
+ * |-----------| 0x40 (inplace)
+ * |  IV BUF   |	4 * 4
+ * |-----------| 0x80
+ * |  DATA IN  |	16 * x (max ->max_req_size)
+ * |-----------| 0x80 (inplace operation)
+ * |  DATA OUT |	16 * x (max ->max_req_size)
+ * \-----------/ SRAM size
+ */
+
+/*
+ * Hashing memory map:
+ * /-----------\ 0
+ * | ACCEL CFG |        4 * 8
+ * |-----------| 0x20
+ * | Inner IV  |        8 * 4
+ * |-----------| 0x40
+ * | Outer IV  |        8 * 4
+ * |-----------| 0x60
+ * | Output BUF|        8 * 4
+ * |-----------| 0x80
+ * |  DATA IN  |        64 * x (max ->max_req_size)
+ * \-----------/ SRAM size
+ */
+
+#define CESA_SA_CFG_SRAM_OFFSET			0x00
+#define CESA_SA_DATA_SRAM_OFFSET		0x80
+
+#define CESA_SA_CRYPT_KEY_SRAM_OFFSET		0x20
+#define CESA_SA_CRYPT_IV_SRAM_OFFSET		0x40
+
+#define CESA_SA_MAC_IIV_SRAM_OFFSET		0x20
+#define CESA_SA_MAC_OIV_SRAM_OFFSET		0x40
+#define CESA_SA_MAC_DIG_SRAM_OFFSET		0x60
+
+#define CESA_SA_DESC_CRYPT_DATA(offset)					\
+	cpu_to_le32((CESA_SA_DATA_SRAM_OFFSET + (offset)) |		\
+		    ((CESA_SA_DATA_SRAM_OFFSET + (offset)) << 16))
+
+#define CESA_SA_DESC_CRYPT_IV(offset)					\
+	cpu_to_le32((CESA_SA_CRYPT_IV_SRAM_OFFSET + (offset)) |	\
+		    ((CESA_SA_CRYPT_IV_SRAM_OFFSET + (offset)) << 16))
+
+#define CESA_SA_DESC_CRYPT_KEY(offset)					\
+	cpu_to_le32(CESA_SA_CRYPT_KEY_SRAM_OFFSET + (offset))
+
+#define CESA_SA_DESC_MAC_DATA(offset)					\
+	cpu_to_le32(CESA_SA_DATA_SRAM_OFFSET + (offset))
+#define CESA_SA_DESC_MAC_DATA_MSK		GENMASK(15, 0)
+
+#define CESA_SA_DESC_MAC_TOTAL_LEN(total_len)	cpu_to_le32((total_len) << 16)
+#define CESA_SA_DESC_MAC_TOTAL_LEN_MSK		GENMASK(31, 16)
+
+#define CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX	0xffff
+
+#define CESA_SA_DESC_MAC_DIGEST(offset)					\
+	cpu_to_le32(CESA_SA_MAC_DIG_SRAM_OFFSET + (offset))
+#define CESA_SA_DESC_MAC_DIGEST_MSK		GENMASK(15, 0)
+
+#define CESA_SA_DESC_MAC_FRAG_LEN(frag_len)	cpu_to_le32((frag_len) << 16)
+#define CESA_SA_DESC_MAC_FRAG_LEN_MSK		GENMASK(31, 16)
+
+#define CESA_SA_DESC_MAC_IV(offset)					\
+	cpu_to_le32((CESA_SA_MAC_IIV_SRAM_OFFSET + (offset)) |		\
+		    ((CESA_SA_MAC_OIV_SRAM_OFFSET + (offset)) << 16))
+
+#define CESA_SA_SRAM_SIZE			2048
+#define CESA_SA_SRAM_PAYLOAD_SIZE		(cesa_dev->sram_size - \
+						 CESA_SA_DATA_SRAM_OFFSET)
+
+#define CESA_SA_DEFAULT_SRAM_SIZE		2048
+#define CESA_SA_MIN_SRAM_SIZE			1024
+
+#define CESA_SA_SRAM_MSK			(2048 - 1)
+
+#define CESA_MAX_HASH_BLOCK_SIZE		64
+#define CESA_HASH_BLOCK_SIZE_MSK		(CESA_MAX_HASH_BLOCK_SIZE - 1)
+
+/**
+ * struct mv_cesa_sec_accel_desc - security accelerator descriptor
+ * @config:	engine config
+ * @enc_p:	input and output data pointers for a cipher operation
+ * @enc_len:	cipher operation length
+ * @enc_key_p:	cipher key pointer
+ * @enc_iv:	cipher IV pointers
+ * @mac_src_p:	input pointer and total hash length
+ * @mac_digest:	digest pointer and hash operation length
+ * @mac_iv:	hmac IV pointers
+ *
+ * Structure passed to the CESA engine to describe the crypto operation
+ * to be executed.
+ */
+struct mv_cesa_sec_accel_desc {
+	u32 config;
+	u32 enc_p;
+	u32 enc_len;
+	u32 enc_key_p;
+	u32 enc_iv;
+	u32 mac_src_p;
+	u32 mac_digest;
+	u32 mac_iv;
+};
+
+/**
+ * struct mv_cesa_blkcipher_op_ctx - cipher operation context
+ * @key:	cipher key
+ * @iv:		cipher IV
+ *
+ * Context associated to a cipher operation.
+ */
+struct mv_cesa_blkcipher_op_ctx {
+	u32 key[8];
+	u32 iv[4];
+};
+
+/**
+ * struct mv_cesa_hash_op_ctx - hash or hmac operation context
+ * @key:	cipher key
+ * @iv:		cipher IV
+ *
+ * Context associated to an hash or hmac operation.
+ */
+struct mv_cesa_hash_op_ctx {
+	u32 iv[16];
+	u32 hash[8];
+};
+
+/**
+ * struct mv_cesa_op_ctx - crypto operation context
+ * @desc:	CESA descriptor
+ * @ctx:	context associated to the crypto operation
+ *
+ * Context associated to a crypto operation.
+ */
+struct mv_cesa_op_ctx {
+	struct mv_cesa_sec_accel_desc desc;
+	union {
+		struct mv_cesa_blkcipher_op_ctx blkcipher;
+		struct mv_cesa_hash_op_ctx hash;
+	} ctx;
+};
+
+/* TDMA descriptor flags */
+#define CESA_TDMA_DST_IN_SRAM			BIT(31)
+#define CESA_TDMA_SRC_IN_SRAM			BIT(30)
+#define CESA_TDMA_TYPE_MSK			GENMASK(29, 0)
+#define CESA_TDMA_DUMMY				0
+#define CESA_TDMA_DATA				1
+#define CESA_TDMA_OP				2
+
+/**
+ * struct mv_cesa_tdma_desc - TDMA descriptor
+ * @byte_cnt:	number of bytes to transfer
+ * @src:	DMA address of the source
+ * @dst:	DMA address of the destination
+ * @next_dma:	DMA address of the next TDMA descriptor
+ * @cur_dma:	DMA address of this TDMA descriptor
+ * @next:	pointer to the next TDMA descriptor
+ * @op:		CESA operation attached to this TDMA descriptor
+ * @data:	raw data attached to this TDMA descriptor
+ * @flags:	flags describing the TDMA transfer. See the
+ *		"TDMA descriptor flags" section above
+ *
+ * TDMA descriptor used to create a transfer chain describing a crypto
+ * operation.
+ */
+struct mv_cesa_tdma_desc {
+	u32 byte_cnt;
+	u32 src;
+	u32 dst;
+	u32 next_dma;
+	u32 cur_dma;
+	struct mv_cesa_tdma_desc *next;
+	union {
+		struct mv_cesa_op_ctx *op;
+		void *data;
+	};
+	u32 flags;
+};
+
+/**
+ * struct mv_cesa_sg_dma_iter - scatter-gather iterator
+ * @dir:	transfer direction
+ * @sg:		scatter list
+ * @offset:	current position in the scatter list
+ * @op_offset:	current position in the crypto operation
+ *
+ * Iterator used to iterate over a scatterlist while creating a TDMA chain for
+ * a crypto operation.
+ */
+struct mv_cesa_sg_dma_iter {
+	enum dma_data_direction dir;
+	struct scatterlist *sg;
+	unsigned int offset;
+	unsigned int op_offset;
+};
+
+/**
+ * struct mv_cesa_dma_iter - crypto operation iterator
+ * @len:	the crypto operation length
+ * @offset:	current position in the crypto operation
+ * @op_len:	sub-operation length (the crypto engine can only act on 2kb
+ *		chunks)
+ *
+ * Iterator used to create a TDMA chain for a given crypto operation.
+ */
+struct mv_cesa_dma_iter {
+	unsigned int len;
+	unsigned int offset;
+	unsigned int op_len;
+};
+
+/**
+ * struct mv_cesa_tdma_chain - TDMA chain
+ * @first:	first entry in the TDMA chain
+ * @last:	last entry in the TDMA chain
+ *
+ * Stores a TDMA chain for a specific crypto operation.
+ */
+struct mv_cesa_tdma_chain {
+	struct mv_cesa_tdma_desc *first;
+	struct mv_cesa_tdma_desc *last;
+};
+
+struct mv_cesa_engine;
+
+/**
+ * struct mv_cesa_caps - CESA device capabilities
+ * @engines:		number of engines
+ * @has_tdma:		whether this device has a TDMA block
+ * @cipher_algs:	supported cipher algorithms
+ * @ncipher_algs:	number of supported cipher algorithms
+ * @ahash_algs:		supported hash algorithms
+ * @nahash_algs:	number of supported hash algorithms
+ *
+ * Structure used to describe CESA device capabilities.
+ */
+struct mv_cesa_caps {
+	int nengines;
+	bool has_tdma;
+	struct crypto_alg **cipher_algs;
+	int ncipher_algs;
+	struct ahash_alg **ahash_algs;
+	int nahash_algs;
+};
+
+/**
+ * struct mv_cesa_dev_dma - DMA pools
+ * @tdma_desc_pool:	TDMA desc pool
+ * @op_pool:		crypto operation pool
+ * @cache_pool:		data cache pool (used by hash implementation when the
+ *			hash request is smaller than the hash block size)
+ * @padding_pool:	padding pool (used by hash implementation when hardware
+ *			padding cannot be used)
+ *
+ * Structure containing the different DMA pools used by this driver.
+ */
+struct mv_cesa_dev_dma {
+	struct dma_pool *tdma_desc_pool;
+	struct dma_pool *op_pool;
+	struct dma_pool *cache_pool;
+	struct dma_pool *padding_pool;
+};
+
+/**
+ * struct mv_cesa_dev - CESA device
+ * @caps:	device capabilities
+ * @regs:	device registers
+ * @sram_size:	usable SRAM size
+ * @lock:	device lock
+ * @queue:	crypto request queue
+ * @engines:	array of engines
+ * @dma:	dma pools
+ *
+ * Structure storing CESA device information.
+ */
+struct mv_cesa_dev {
+	const struct mv_cesa_caps *caps;
+	void __iomem *regs;
+	struct device *dev;
+	unsigned int sram_size;
+	spinlock_t lock;
+	struct crypto_queue queue;
+	struct mv_cesa_engine *engines;
+	struct mv_cesa_dev_dma *dma;
+};
+
+/**
+ * struct mv_cesa_engine - CESA engine
+ * @id:			engine id
+ * @regs:		engine registers
+ * @sram:		SRAM memory region
+ * @sram_dma:		DMA address of the SRAM memory region
+ * @lock:		engine lock
+ * @req:		current crypto request
+ * @clk:		engine clk
+ * @zclk:		engine zclk
+ * @max_req_len:	maximum chunk length (useful to create the TDMA chain)
+ * @int_mask:		interrupt mask cache
+ * @pool:		memory pool pointing to the memory region reserved in
+ *			SRAM
+ *
+ * Structure storing CESA engine information.
+ */
+struct mv_cesa_engine {
+	int id;
+	void __iomem *regs;
+	void __iomem *sram;
+	dma_addr_t sram_dma;
+	spinlock_t lock;
+	struct crypto_async_request *req;
+	struct clk *clk;
+	struct clk *zclk;
+	size_t max_req_len;
+	u32 int_mask;
+	struct gen_pool *pool;
+};
+
+/**
+ * struct mv_cesa_req_ops - CESA request operations
+ * @prepare:	prepare a request to be executed on the specified engine
+ * @process:	process a request chunk result (should return 0 if the
+ *		operation, -EINPROGRESS if it needs more steps or an error
+ *		code)
+ * @step:	launch the crypto operation on the next chunk
+ * @cleanup:	cleanup the crypto request (release associated data)
+ */
+struct mv_cesa_req_ops {
+	void (*prepare)(struct crypto_async_request *req,
+			struct mv_cesa_engine *engine);
+	int (*process)(struct crypto_async_request *req, u32 status);
+	void (*step)(struct crypto_async_request *req);
+	void (*cleanup)(struct crypto_async_request *req);
+};
+
+/**
+ * struct mv_cesa_ctx - CESA operation context
+ * @ops:	crypto operations
+ *
+ * Base context structure inherited by operation specific ones.
+ */
+struct mv_cesa_ctx {
+	const struct mv_cesa_req_ops *ops;
+};
+
+/**
+ * struct mv_cesa_hash_ctx - CESA hash operation context
+ * @base:	base context structure
+ *
+ * Hash context structure.
+ */
+struct mv_cesa_hash_ctx {
+	struct mv_cesa_ctx base;
+};
+
+/**
+ * struct mv_cesa_hash_ctx - CESA hmac operation context
+ * @base:	base context structure
+ * @iv:		initialization vectors
+ *
+ * HMAC context structure.
+ */
+struct mv_cesa_hmac_ctx {
+	struct mv_cesa_ctx base;
+	u32 iv[16];
+};
+
+/**
+ * enum mv_cesa_req_type - request type definitions
+ * @CESA_STD_REQ:	standard request
+ * @CESA_DMA_REQ:	DMA request
+ */
+enum mv_cesa_req_type {
+	CESA_STD_REQ,
+	CESA_DMA_REQ,
+};
+
+/**
+ * struct mv_cesa_req - CESA request
+ * @type:	request type
+ * @engine:	engine associated with this request
+ */
+struct mv_cesa_req {
+	enum mv_cesa_req_type type;
+	struct mv_cesa_engine *engine;
+};
+
+/**
+ * struct mv_cesa_tdma_req - CESA TDMA request
+ * @base:	base information
+ * @chain:	TDMA chain
+ */
+struct mv_cesa_tdma_req {
+	struct mv_cesa_req base;
+	struct mv_cesa_tdma_chain chain;
+};
+
+/**
+ * struct mv_cesa_sg_std_iter - CESA scatter-gather iterator for standard
+ *				requests
+ * @iter:	sg mapping iterator
+ * @offset:	current offset in the SG entry mapped in memory
+ */
+struct mv_cesa_sg_std_iter {
+	struct sg_mapping_iter iter;
+	unsigned int offset;
+};
+
+/**
+ * struct mv_cesa_ablkcipher_std_req - cipher standard request
+ * @base:	base information
+ * @op:		operation context
+ * @offset:	current operation offset
+ * @size:	size of the crypto operation
+ */
+struct mv_cesa_ablkcipher_std_req {
+	struct mv_cesa_req base;
+	struct mv_cesa_op_ctx op;
+	unsigned int offset;
+	unsigned int size;
+	bool skip_ctx;
+};
+
+/**
+ * struct mv_cesa_ablkcipher_req - cipher request
+ * @req:	type specific request information
+ * @src_nents:	number of entries in the src sg list
+ * @dst_nents:	number of entries in the dest sg list
+ */
+struct mv_cesa_ablkcipher_req {
+	union {
+		struct mv_cesa_req base;
+		struct mv_cesa_tdma_req dma;
+		struct mv_cesa_ablkcipher_std_req std;
+	} req;
+	int src_nents;
+	int dst_nents;
+};
+
+/**
+ * struct mv_cesa_ahash_std_req - standard hash request
+ * @base:	base information
+ * @offset:	current operation offset
+ */
+struct mv_cesa_ahash_std_req {
+	struct mv_cesa_req base;
+	unsigned int offset;
+};
+
+/**
+ * struct mv_cesa_ahash_dma_req - DMA hash request
+ * @base:		base information
+ * @padding:		padding buffer
+ * @padding_dma:	DMA address of the padding buffer
+ * @cache_dma:		DMA address of the cache buffer
+ */
+struct mv_cesa_ahash_dma_req {
+	struct mv_cesa_tdma_req base;
+	u8 *padding;
+	dma_addr_t padding_dma;
+	dma_addr_t cache_dma;
+};
+
+/**
+ * struct mv_cesa_ahash_req - hash request
+ * @req:		type specific request information
+ * @cache:		cache buffer
+ * @cache_ptr:		write pointer in the cache buffer
+ * @len:		hash total length
+ * @src_nents:		number of entries in the scatterlist
+ * @last_req:		define whether the current operation is the last one
+ *			or not
+ * @state:		hash state
+ */
+struct mv_cesa_ahash_req {
+	union {
+		struct mv_cesa_req base;
+		struct mv_cesa_ahash_dma_req dma;
+		struct mv_cesa_ahash_std_req std;
+	} req;
+	struct mv_cesa_op_ctx op_tmpl;
+	u8 *cache;
+	unsigned int cache_ptr;
+	u64 len;
+	int src_nents;
+	bool last_req;
+	__be32 state[8];
+};
+
+/* CESA functions */
+
+extern struct mv_cesa_dev *cesa_dev;
+
+static inline void mv_cesa_update_op_cfg(struct mv_cesa_op_ctx *op,
+					 u32 cfg, u32 mask)
+{
+	op->desc.config &= cpu_to_le32(~mask);
+	op->desc.config |= cpu_to_le32(cfg);
+}
+
+static inline u32 mv_cesa_get_op_cfg(struct mv_cesa_op_ctx *op)
+{
+	return le32_to_cpu(op->desc.config);
+}
+
+static inline void mv_cesa_set_op_cfg(struct mv_cesa_op_ctx *op, u32 cfg)
+{
+	op->desc.config = cpu_to_le32(cfg);
+}
+
+static inline void mv_cesa_adjust_op(struct mv_cesa_engine *engine,
+				     struct mv_cesa_op_ctx *op)
+{
+	u32 offset = engine->sram_dma & CESA_SA_SRAM_MSK;
+
+	op->desc.enc_p = CESA_SA_DESC_CRYPT_DATA(offset);
+	op->desc.enc_key_p = CESA_SA_DESC_CRYPT_KEY(offset);
+	op->desc.enc_iv = CESA_SA_DESC_CRYPT_IV(offset);
+	op->desc.mac_src_p &= ~CESA_SA_DESC_MAC_DATA_MSK;
+	op->desc.mac_src_p |= CESA_SA_DESC_MAC_DATA(offset);
+	op->desc.mac_digest &= ~CESA_SA_DESC_MAC_DIGEST_MSK;
+	op->desc.mac_digest |= CESA_SA_DESC_MAC_DIGEST(offset);
+	op->desc.mac_iv = CESA_SA_DESC_MAC_IV(offset);
+}
+
+static inline void mv_cesa_set_crypt_op_len(struct mv_cesa_op_ctx *op, int len)
+{
+	op->desc.enc_len = cpu_to_le32(len);
+}
+
+static inline void mv_cesa_set_mac_op_total_len(struct mv_cesa_op_ctx *op,
+						int len)
+{
+	op->desc.mac_src_p &= ~CESA_SA_DESC_MAC_TOTAL_LEN_MSK;
+	op->desc.mac_src_p |= CESA_SA_DESC_MAC_TOTAL_LEN(len);
+}
+
+static inline void mv_cesa_set_mac_op_frag_len(struct mv_cesa_op_ctx *op,
+					       int len)
+{
+	op->desc.mac_digest &= ~CESA_SA_DESC_MAC_FRAG_LEN_MSK;
+	op->desc.mac_digest |= CESA_SA_DESC_MAC_FRAG_LEN(len);
+}
+
+static inline void mv_cesa_set_int_mask(struct mv_cesa_engine *engine,
+					u32 int_mask)
+{
+	if (int_mask == engine->int_mask)
+		return;
+
+	writel(int_mask, engine->regs + CESA_SA_INT_MSK);
+	engine->int_mask = int_mask;
+}
+
+static inline u32 mv_cesa_get_int_mask(struct mv_cesa_engine *engine)
+{
+	return engine->int_mask;
+}
+
+int mv_cesa_queue_req(struct crypto_async_request *req);
+
+/* TDMA functions */
+
+static inline void mv_cesa_req_dma_iter_init(struct mv_cesa_dma_iter *iter,
+					     unsigned int len)
+{
+	iter->len = len;
+	iter->op_len = min(len, CESA_SA_SRAM_PAYLOAD_SIZE);
+	iter->offset = 0;
+}
+
+static inline void mv_cesa_sg_dma_iter_init(struct mv_cesa_sg_dma_iter *iter,
+					    struct scatterlist *sg,
+					    enum dma_data_direction dir)
+{
+	iter->op_offset = 0;
+	iter->offset = 0;
+	iter->sg = sg;
+	iter->dir = dir;
+}
+
+static inline unsigned int
+mv_cesa_req_dma_iter_transfer_len(struct mv_cesa_dma_iter *iter,
+				  struct mv_cesa_sg_dma_iter *sgiter)
+{
+	return min(iter->op_len - sgiter->op_offset,
+		   sg_dma_len(sgiter->sg) - sgiter->offset);
+}
+
+bool mv_cesa_req_dma_iter_next_transfer(struct mv_cesa_dma_iter *chain,
+					struct mv_cesa_sg_dma_iter *sgiter,
+					unsigned int len);
+
+static inline bool mv_cesa_req_dma_iter_next_op(struct mv_cesa_dma_iter *iter)
+{
+	iter->offset += iter->op_len;
+	iter->op_len = min(iter->len - iter->offset,
+			   CESA_SA_SRAM_PAYLOAD_SIZE);
+
+	return iter->op_len;
+}
+
+void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq);
+
+static inline int mv_cesa_dma_process(struct mv_cesa_tdma_req *dreq,
+				      u32 status)
+{
+	if (!(status & CESA_SA_INT_ACC0_IDMA_DONE))
+		return -EINPROGRESS;
+
+	if (status & CESA_SA_INT_IDMA_OWN_ERR)
+		return -EINVAL;
+
+	return 0;
+}
+
+void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq,
+			 struct mv_cesa_engine *engine);
+
+void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq);
+
+static inline void
+mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain)
+{
+	memset(chain, 0, sizeof(*chain));
+}
+
+struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
+					const struct mv_cesa_op_ctx *op_templ,
+					bool skip_ctx,
+					gfp_t flags);
+
+int mv_cesa_dma_add_data_transfer(struct mv_cesa_tdma_chain *chain,
+				  dma_addr_t dst, dma_addr_t src, u32 size,
+				  u32 flags, gfp_t gfp_flags);
+
+int mv_cesa_dma_add_dummy_launch(struct mv_cesa_tdma_chain *chain,
+				 u32 flags);
+
+int mv_cesa_dma_add_dummy_end(struct mv_cesa_tdma_chain *chain, u32 flags);
+
+int mv_cesa_dma_add_op_transfers(struct mv_cesa_tdma_chain *chain,
+				 struct mv_cesa_dma_iter *dma_iter,
+				 struct mv_cesa_sg_dma_iter *sgiter,
+				 gfp_t gfp_flags);
+
+/* Algorithm definitions */
+
+extern struct ahash_alg mv_md5_alg;
+extern struct ahash_alg mv_sha1_alg;
+extern struct ahash_alg mv_sha256_alg;
+extern struct ahash_alg mv_ahmac_md5_alg;
+extern struct ahash_alg mv_ahmac_sha1_alg;
+extern struct ahash_alg mv_ahmac_sha256_alg;
+
+extern struct crypto_alg mv_cesa_ecb_des_alg;
+extern struct crypto_alg mv_cesa_cbc_des_alg;
+extern struct crypto_alg mv_cesa_ecb_des3_ede_alg;
+extern struct crypto_alg mv_cesa_cbc_des3_ede_alg;
+extern struct crypto_alg mv_cesa_ecb_aes_alg;
+extern struct crypto_alg mv_cesa_cbc_aes_alg;
+
+#endif /* __MARVELL_CESA_H__ */

+ 797 - 0
drivers/crypto/marvell/cipher.c

@@ -0,0 +1,797 @@
+/*
+ * Cipher algorithms supported by the CESA: DES, 3DES and AES.
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ * Author: Arnaud Ebalard <arno@natisbad.org>
+ *
+ * This work is based on an initial version written by
+ * Sebastian Andrzej Siewior < sebastian at breakpoint dot cc >
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/des.h>
+
+#include "cesa.h"
+
+struct mv_cesa_des_ctx {
+	struct mv_cesa_ctx base;
+	u8 key[DES_KEY_SIZE];
+};
+
+struct mv_cesa_des3_ctx {
+	struct mv_cesa_ctx base;
+	u8 key[DES3_EDE_KEY_SIZE];
+};
+
+struct mv_cesa_aes_ctx {
+	struct mv_cesa_ctx base;
+	struct crypto_aes_ctx aes;
+};
+
+struct mv_cesa_ablkcipher_dma_iter {
+	struct mv_cesa_dma_iter base;
+	struct mv_cesa_sg_dma_iter src;
+	struct mv_cesa_sg_dma_iter dst;
+};
+
+static inline void
+mv_cesa_ablkcipher_req_iter_init(struct mv_cesa_ablkcipher_dma_iter *iter,
+				 struct ablkcipher_request *req)
+{
+	mv_cesa_req_dma_iter_init(&iter->base, req->nbytes);
+	mv_cesa_sg_dma_iter_init(&iter->src, req->src, DMA_TO_DEVICE);
+	mv_cesa_sg_dma_iter_init(&iter->dst, req->dst, DMA_FROM_DEVICE);
+}
+
+static inline bool
+mv_cesa_ablkcipher_req_iter_next_op(struct mv_cesa_ablkcipher_dma_iter *iter)
+{
+	iter->src.op_offset = 0;
+	iter->dst.op_offset = 0;
+
+	return mv_cesa_req_dma_iter_next_op(&iter->base);
+}
+
+static inline void
+mv_cesa_ablkcipher_dma_cleanup(struct ablkcipher_request *req)
+{
+	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
+
+	if (req->dst != req->src) {
+		dma_unmap_sg(cesa_dev->dev, req->dst, creq->dst_nents,
+			     DMA_FROM_DEVICE);
+		dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents,
+			     DMA_TO_DEVICE);
+	} else {
+		dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents,
+			     DMA_BIDIRECTIONAL);
+	}
+	mv_cesa_dma_cleanup(&creq->req.dma);
+}
+
+static inline void mv_cesa_ablkcipher_cleanup(struct ablkcipher_request *req)
+{
+	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
+
+	if (creq->req.base.type == CESA_DMA_REQ)
+		mv_cesa_ablkcipher_dma_cleanup(req);
+}
+
+static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req)
+{
+	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
+	struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std;
+	struct mv_cesa_engine *engine = sreq->base.engine;
+	size_t  len = min_t(size_t, req->nbytes - sreq->offset,
+			    CESA_SA_SRAM_PAYLOAD_SIZE);
+
+	len = sg_pcopy_to_buffer(req->src, creq->src_nents,
+				 engine->sram + CESA_SA_DATA_SRAM_OFFSET,
+				 len, sreq->offset);
+
+	sreq->size = len;
+	mv_cesa_set_crypt_op_len(&sreq->op, len);
+
+	/* FIXME: only update enc_len field */
+	if (!sreq->skip_ctx) {
+		memcpy(engine->sram, &sreq->op, sizeof(sreq->op));
+		sreq->skip_ctx = true;
+	} else {
+		memcpy(engine->sram, &sreq->op, sizeof(sreq->op.desc));
+	}
+
+	mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE);
+	writel(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG);
+	writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
+}
+
+static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req,
+					  u32 status)
+{
+	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
+	struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std;
+	struct mv_cesa_engine *engine = sreq->base.engine;
+	size_t len;
+
+	len = sg_pcopy_from_buffer(req->dst, creq->dst_nents,
+				   engine->sram + CESA_SA_DATA_SRAM_OFFSET,
+				   sreq->size, sreq->offset);
+
+	sreq->offset += len;
+	if (sreq->offset < req->nbytes)
+		return -EINPROGRESS;
+
+	return 0;
+}
+
+static int mv_cesa_ablkcipher_process(struct crypto_async_request *req,
+				      u32 status)
+{
+	struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req);
+	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq);
+	struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std;
+	struct mv_cesa_engine *engine = sreq->base.engine;
+	int ret;
+
+	if (creq->req.base.type == CESA_DMA_REQ)
+		ret = mv_cesa_dma_process(&creq->req.dma, status);
+	else
+		ret = mv_cesa_ablkcipher_std_process(ablkreq, status);
+
+	if (ret)
+		return ret;
+
+	memcpy(ablkreq->info, engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET,
+	       crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)));
+
+	return 0;
+}
+
+static void mv_cesa_ablkcipher_step(struct crypto_async_request *req)
+{
+	struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req);
+	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq);
+
+	if (creq->req.base.type == CESA_DMA_REQ)
+		mv_cesa_dma_step(&creq->req.dma);
+	else
+		mv_cesa_ablkcipher_std_step(ablkreq);
+}
+
+static inline void
+mv_cesa_ablkcipher_dma_prepare(struct ablkcipher_request *req)
+{
+	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
+	struct mv_cesa_tdma_req *dreq = &creq->req.dma;
+
+	mv_cesa_dma_prepare(dreq, dreq->base.engine);
+}
+
+static inline void
+mv_cesa_ablkcipher_std_prepare(struct ablkcipher_request *req)
+{
+	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
+	struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std;
+	struct mv_cesa_engine *engine = sreq->base.engine;
+
+	sreq->size = 0;
+	sreq->offset = 0;
+	mv_cesa_adjust_op(engine, &sreq->op);
+	memcpy(engine->sram, &sreq->op, sizeof(sreq->op));
+}
+
+static inline void mv_cesa_ablkcipher_prepare(struct crypto_async_request *req,
+					      struct mv_cesa_engine *engine)
+{
+	struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req);
+	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq);
+
+	creq->req.base.engine = engine;
+
+	if (creq->req.base.type == CESA_DMA_REQ)
+		mv_cesa_ablkcipher_dma_prepare(ablkreq);
+	else
+		mv_cesa_ablkcipher_std_prepare(ablkreq);
+}
+
+static inline void
+mv_cesa_ablkcipher_req_cleanup(struct crypto_async_request *req)
+{
+	struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req);
+
+	mv_cesa_ablkcipher_cleanup(ablkreq);
+}
+
+static const struct mv_cesa_req_ops mv_cesa_ablkcipher_req_ops = {
+	.step = mv_cesa_ablkcipher_step,
+	.process = mv_cesa_ablkcipher_process,
+	.prepare = mv_cesa_ablkcipher_prepare,
+	.cleanup = mv_cesa_ablkcipher_req_cleanup,
+};
+
+static int mv_cesa_ablkcipher_cra_init(struct crypto_tfm *tfm)
+{
+	struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->base.ops = &mv_cesa_ablkcipher_req_ops;
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct mv_cesa_ablkcipher_req);
+
+	return 0;
+}
+
+static int mv_cesa_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			      unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	int remaining;
+	int offset;
+	int ret;
+	int i;
+
+	ret = crypto_aes_expand_key(&ctx->aes, key, len);
+	if (ret) {
+		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return ret;
+	}
+
+	remaining = (ctx->aes.key_length - 16) / 4;
+	offset = ctx->aes.key_length + 24 - remaining;
+	for (i = 0; i < remaining; i++)
+		ctx->aes.key_dec[4 + i] =
+			cpu_to_le32(ctx->aes.key_enc[offset + i]);
+
+	return 0;
+}
+
+static int mv_cesa_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			      unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 tmp[DES_EXPKEY_WORDS];
+	int ret;
+
+	if (len != DES_KEY_SIZE) {
+		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	ret = des_ekey(tmp, key);
+	if (!ret && (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, DES_KEY_SIZE);
+
+	return 0;
+}
+
+static int mv_cesa_des3_ede_setkey(struct crypto_ablkcipher *cipher,
+				   const u8 *key, unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (len != DES3_EDE_KEY_SIZE) {
+		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, DES3_EDE_KEY_SIZE);
+
+	return 0;
+}
+
+static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req,
+				const struct mv_cesa_op_ctx *op_templ)
+{
+	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	struct mv_cesa_tdma_req *dreq = &creq->req.dma;
+	struct mv_cesa_ablkcipher_dma_iter iter;
+	struct mv_cesa_tdma_chain chain;
+	bool skip_ctx = false;
+	int ret;
+
+	dreq->base.type = CESA_DMA_REQ;
+	dreq->chain.first = NULL;
+	dreq->chain.last = NULL;
+
+	if (req->src != req->dst) {
+		ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents,
+				 DMA_TO_DEVICE);
+		if (!ret)
+			return -ENOMEM;
+
+		ret = dma_map_sg(cesa_dev->dev, req->dst, creq->dst_nents,
+				 DMA_FROM_DEVICE);
+		if (!ret) {
+			ret = -ENOMEM;
+			goto err_unmap_src;
+		}
+	} else {
+		ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents,
+				 DMA_BIDIRECTIONAL);
+		if (!ret)
+			return -ENOMEM;
+	}
+
+	mv_cesa_tdma_desc_iter_init(&chain);
+	mv_cesa_ablkcipher_req_iter_init(&iter, req);
+
+	do {
+		struct mv_cesa_op_ctx *op;
+
+		op = mv_cesa_dma_add_op(&chain, op_templ, skip_ctx, flags);
+		if (IS_ERR(op)) {
+			ret = PTR_ERR(op);
+			goto err_free_tdma;
+		}
+		skip_ctx = true;
+
+		mv_cesa_set_crypt_op_len(op, iter.base.op_len);
+
+		/* Add input transfers */
+		ret = mv_cesa_dma_add_op_transfers(&chain, &iter.base,
+						   &iter.src, flags);
+		if (ret)
+			goto err_free_tdma;
+
+		/* Add dummy desc to launch the crypto operation */
+		ret = mv_cesa_dma_add_dummy_launch(&chain, flags);
+		if (ret)
+			goto err_free_tdma;
+
+		/* Add output transfers */
+		ret = mv_cesa_dma_add_op_transfers(&chain, &iter.base,
+						   &iter.dst, flags);
+		if (ret)
+			goto err_free_tdma;
+
+	} while (mv_cesa_ablkcipher_req_iter_next_op(&iter));
+
+	dreq->chain = chain;
+
+	return 0;
+
+err_free_tdma:
+	mv_cesa_dma_cleanup(dreq);
+	if (req->dst != req->src)
+		dma_unmap_sg(cesa_dev->dev, req->dst, creq->dst_nents,
+			     DMA_FROM_DEVICE);
+
+err_unmap_src:
+	dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents,
+		     req->dst != req->src ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL);
+
+	return ret;
+}
+
+static inline int
+mv_cesa_ablkcipher_std_req_init(struct ablkcipher_request *req,
+				const struct mv_cesa_op_ctx *op_templ)
+{
+	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
+	struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std;
+
+	sreq->base.type = CESA_STD_REQ;
+	sreq->op = *op_templ;
+	sreq->skip_ctx = false;
+
+	return 0;
+}
+
+static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req,
+				       struct mv_cesa_op_ctx *tmpl)
+{
+	struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	unsigned int blksize = crypto_ablkcipher_blocksize(tfm);
+	int ret;
+
+	if (!IS_ALIGNED(req->nbytes, blksize))
+		return -EINVAL;
+
+	creq->src_nents = sg_nents_for_len(req->src, req->nbytes);
+	creq->dst_nents = sg_nents_for_len(req->dst, req->nbytes);
+
+	mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_OP_CRYPT_ONLY,
+			      CESA_SA_DESC_CFG_OP_MSK);
+
+	/* TODO: add a threshold for DMA usage */
+	if (cesa_dev->caps->has_tdma)
+		ret = mv_cesa_ablkcipher_dma_req_init(req, tmpl);
+	else
+		ret = mv_cesa_ablkcipher_std_req_init(req, tmpl);
+
+	return ret;
+}
+
+static int mv_cesa_des_op(struct ablkcipher_request *req,
+			  struct mv_cesa_op_ctx *tmpl)
+{
+	struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	int ret;
+
+	mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES,
+			      CESA_SA_DESC_CFG_CRYPTM_MSK);
+
+	memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE);
+
+	ret = mv_cesa_ablkcipher_req_init(req, tmpl);
+	if (ret)
+		return ret;
+
+	ret = mv_cesa_queue_req(&req->base);
+	if (ret && ret != -EINPROGRESS)
+		mv_cesa_ablkcipher_cleanup(req);
+
+	return ret;
+}
+
+static int mv_cesa_ecb_des_encrypt(struct ablkcipher_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl,
+			   CESA_SA_DESC_CFG_CRYPTCM_ECB |
+			   CESA_SA_DESC_CFG_DIR_ENC);
+
+	return mv_cesa_des_op(req, &tmpl);
+}
+
+static int mv_cesa_ecb_des_decrypt(struct ablkcipher_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl,
+			   CESA_SA_DESC_CFG_CRYPTCM_ECB |
+			   CESA_SA_DESC_CFG_DIR_DEC);
+
+	return mv_cesa_des_op(req, &tmpl);
+}
+
+struct crypto_alg mv_cesa_ecb_des_alg = {
+	.cra_name = "ecb(des)",
+	.cra_driver_name = "mv-ecb-des",
+	.cra_priority = 300,
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
+		     CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = DES_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct mv_cesa_des_ctx),
+	.cra_alignmask = 0,
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_module = THIS_MODULE,
+	.cra_init = mv_cesa_ablkcipher_cra_init,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize = DES_KEY_SIZE,
+			.max_keysize = DES_KEY_SIZE,
+			.setkey = mv_cesa_des_setkey,
+			.encrypt = mv_cesa_ecb_des_encrypt,
+			.decrypt = mv_cesa_ecb_des_decrypt,
+		},
+	},
+};
+
+static int mv_cesa_cbc_des_op(struct ablkcipher_request *req,
+			      struct mv_cesa_op_ctx *tmpl)
+{
+	mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTCM_CBC,
+			      CESA_SA_DESC_CFG_CRYPTCM_MSK);
+
+	memcpy(tmpl->ctx.blkcipher.iv, req->info, DES_BLOCK_SIZE);
+
+	return mv_cesa_des_op(req, tmpl);
+}
+
+static int mv_cesa_cbc_des_encrypt(struct ablkcipher_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_DIR_ENC);
+
+	return mv_cesa_cbc_des_op(req, &tmpl);
+}
+
+static int mv_cesa_cbc_des_decrypt(struct ablkcipher_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_DIR_DEC);
+
+	return mv_cesa_cbc_des_op(req, &tmpl);
+}
+
+struct crypto_alg mv_cesa_cbc_des_alg = {
+	.cra_name = "cbc(des)",
+	.cra_driver_name = "mv-cbc-des",
+	.cra_priority = 300,
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
+		     CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = DES_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct mv_cesa_des_ctx),
+	.cra_alignmask = 0,
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_module = THIS_MODULE,
+	.cra_init = mv_cesa_ablkcipher_cra_init,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize = DES_KEY_SIZE,
+			.max_keysize = DES_KEY_SIZE,
+			.ivsize	     = DES_BLOCK_SIZE,
+			.setkey = mv_cesa_des_setkey,
+			.encrypt = mv_cesa_cbc_des_encrypt,
+			.decrypt = mv_cesa_cbc_des_decrypt,
+		},
+	},
+};
+
+static int mv_cesa_des3_op(struct ablkcipher_request *req,
+			   struct mv_cesa_op_ctx *tmpl)
+{
+	struct mv_cesa_des3_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	int ret;
+
+	mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_3DES,
+			      CESA_SA_DESC_CFG_CRYPTM_MSK);
+
+	memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES3_EDE_KEY_SIZE);
+
+	ret = mv_cesa_ablkcipher_req_init(req, tmpl);
+	if (ret)
+		return ret;
+
+	ret = mv_cesa_queue_req(&req->base);
+	if (ret && ret != -EINPROGRESS)
+		mv_cesa_ablkcipher_cleanup(req);
+
+	return ret;
+}
+
+static int mv_cesa_ecb_des3_ede_encrypt(struct ablkcipher_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl,
+			   CESA_SA_DESC_CFG_CRYPTCM_ECB |
+			   CESA_SA_DESC_CFG_3DES_EDE |
+			   CESA_SA_DESC_CFG_DIR_ENC);
+
+	return mv_cesa_des3_op(req, &tmpl);
+}
+
+static int mv_cesa_ecb_des3_ede_decrypt(struct ablkcipher_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl,
+			   CESA_SA_DESC_CFG_CRYPTCM_ECB |
+			   CESA_SA_DESC_CFG_3DES_EDE |
+			   CESA_SA_DESC_CFG_DIR_DEC);
+
+	return mv_cesa_des3_op(req, &tmpl);
+}
+
+struct crypto_alg mv_cesa_ecb_des3_ede_alg = {
+	.cra_name = "ecb(des3_ede)",
+	.cra_driver_name = "mv-ecb-des3-ede",
+	.cra_priority = 300,
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
+		     CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct mv_cesa_des3_ctx),
+	.cra_alignmask = 0,
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_module = THIS_MODULE,
+	.cra_init = mv_cesa_ablkcipher_cra_init,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize = DES3_EDE_KEY_SIZE,
+			.max_keysize = DES3_EDE_KEY_SIZE,
+			.ivsize	     = DES3_EDE_BLOCK_SIZE,
+			.setkey = mv_cesa_des3_ede_setkey,
+			.encrypt = mv_cesa_ecb_des3_ede_encrypt,
+			.decrypt = mv_cesa_ecb_des3_ede_decrypt,
+		},
+	},
+};
+
+static int mv_cesa_cbc_des3_op(struct ablkcipher_request *req,
+			       struct mv_cesa_op_ctx *tmpl)
+{
+	memcpy(tmpl->ctx.blkcipher.iv, req->info, DES3_EDE_BLOCK_SIZE);
+
+	return mv_cesa_des3_op(req, tmpl);
+}
+
+static int mv_cesa_cbc_des3_ede_encrypt(struct ablkcipher_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl,
+			   CESA_SA_DESC_CFG_CRYPTCM_CBC |
+			   CESA_SA_DESC_CFG_3DES_EDE |
+			   CESA_SA_DESC_CFG_DIR_ENC);
+
+	return mv_cesa_cbc_des3_op(req, &tmpl);
+}
+
+static int mv_cesa_cbc_des3_ede_decrypt(struct ablkcipher_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl,
+			   CESA_SA_DESC_CFG_CRYPTCM_CBC |
+			   CESA_SA_DESC_CFG_3DES_EDE |
+			   CESA_SA_DESC_CFG_DIR_DEC);
+
+	return mv_cesa_cbc_des3_op(req, &tmpl);
+}
+
+struct crypto_alg mv_cesa_cbc_des3_ede_alg = {
+	.cra_name = "cbc(des3_ede)",
+	.cra_driver_name = "mv-cbc-des3-ede",
+	.cra_priority = 300,
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
+		     CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct mv_cesa_des3_ctx),
+	.cra_alignmask = 0,
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_module = THIS_MODULE,
+	.cra_init = mv_cesa_ablkcipher_cra_init,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize = DES3_EDE_KEY_SIZE,
+			.max_keysize = DES3_EDE_KEY_SIZE,
+			.ivsize	     = DES3_EDE_BLOCK_SIZE,
+			.setkey = mv_cesa_des3_ede_setkey,
+			.encrypt = mv_cesa_cbc_des3_ede_encrypt,
+			.decrypt = mv_cesa_cbc_des3_ede_decrypt,
+		},
+	},
+};
+
+static int mv_cesa_aes_op(struct ablkcipher_request *req,
+			  struct mv_cesa_op_ctx *tmpl)
+{
+	struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	int ret, i;
+	u32 *key;
+	u32 cfg;
+
+	cfg = CESA_SA_DESC_CFG_CRYPTM_AES;
+
+	if (mv_cesa_get_op_cfg(tmpl) & CESA_SA_DESC_CFG_DIR_DEC)
+		key = ctx->aes.key_dec;
+	else
+		key = ctx->aes.key_enc;
+
+	for (i = 0; i < ctx->aes.key_length / sizeof(u32); i++)
+		tmpl->ctx.blkcipher.key[i] = cpu_to_le32(key[i]);
+
+	if (ctx->aes.key_length == 24)
+		cfg |= CESA_SA_DESC_CFG_AES_LEN_192;
+	else if (ctx->aes.key_length == 32)
+		cfg |= CESA_SA_DESC_CFG_AES_LEN_256;
+
+	mv_cesa_update_op_cfg(tmpl, cfg,
+			      CESA_SA_DESC_CFG_CRYPTM_MSK |
+			      CESA_SA_DESC_CFG_AES_LEN_MSK);
+
+	ret = mv_cesa_ablkcipher_req_init(req, tmpl);
+	if (ret)
+		return ret;
+
+	ret = mv_cesa_queue_req(&req->base);
+	if (ret && ret != -EINPROGRESS)
+		mv_cesa_ablkcipher_cleanup(req);
+
+	return ret;
+}
+
+static int mv_cesa_ecb_aes_encrypt(struct ablkcipher_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl,
+			   CESA_SA_DESC_CFG_CRYPTCM_ECB |
+			   CESA_SA_DESC_CFG_DIR_ENC);
+
+	return mv_cesa_aes_op(req, &tmpl);
+}
+
+static int mv_cesa_ecb_aes_decrypt(struct ablkcipher_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl,
+			   CESA_SA_DESC_CFG_CRYPTCM_ECB |
+			   CESA_SA_DESC_CFG_DIR_DEC);
+
+	return mv_cesa_aes_op(req, &tmpl);
+}
+
+struct crypto_alg mv_cesa_ecb_aes_alg = {
+	.cra_name = "ecb(aes)",
+	.cra_driver_name = "mv-ecb-aes",
+	.cra_priority = 300,
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
+		     CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = AES_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct mv_cesa_aes_ctx),
+	.cra_alignmask = 0,
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_module = THIS_MODULE,
+	.cra_init = mv_cesa_ablkcipher_cra_init,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.setkey = mv_cesa_aes_setkey,
+			.encrypt = mv_cesa_ecb_aes_encrypt,
+			.decrypt = mv_cesa_ecb_aes_decrypt,
+		},
+	},
+};
+
+static int mv_cesa_cbc_aes_op(struct ablkcipher_request *req,
+			      struct mv_cesa_op_ctx *tmpl)
+{
+	mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTCM_CBC,
+			      CESA_SA_DESC_CFG_CRYPTCM_MSK);
+	memcpy(tmpl->ctx.blkcipher.iv, req->info, AES_BLOCK_SIZE);
+
+	return mv_cesa_aes_op(req, tmpl);
+}
+
+static int mv_cesa_cbc_aes_encrypt(struct ablkcipher_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_DIR_ENC);
+
+	return mv_cesa_cbc_aes_op(req, &tmpl);
+}
+
+static int mv_cesa_cbc_aes_decrypt(struct ablkcipher_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_DIR_DEC);
+
+	return mv_cesa_cbc_aes_op(req, &tmpl);
+}
+
+struct crypto_alg mv_cesa_cbc_aes_alg = {
+	.cra_name = "cbc(aes)",
+	.cra_driver_name = "mv-cbc-aes",
+	.cra_priority = 300,
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
+		     CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = AES_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct mv_cesa_aes_ctx),
+	.cra_alignmask = 0,
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_module = THIS_MODULE,
+	.cra_init = mv_cesa_ablkcipher_cra_init,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			.setkey = mv_cesa_aes_setkey,
+			.encrypt = mv_cesa_cbc_aes_encrypt,
+			.decrypt = mv_cesa_cbc_aes_decrypt,
+		},
+	},
+};

+ 1441 - 0
drivers/crypto/marvell/hash.c

@@ -0,0 +1,1441 @@
+/*
+ * Hash algorithms supported by the CESA: MD5, SHA1 and SHA256.
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ * Author: Arnaud Ebalard <arno@natisbad.org>
+ *
+ * This work is based on an initial version written by
+ * Sebastian Andrzej Siewior < sebastian at breakpoint dot cc >
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <crypto/md5.h>
+#include <crypto/sha.h>
+
+#include "cesa.h"
+
+struct mv_cesa_ahash_dma_iter {
+	struct mv_cesa_dma_iter base;
+	struct mv_cesa_sg_dma_iter src;
+};
+
+static inline void
+mv_cesa_ahash_req_iter_init(struct mv_cesa_ahash_dma_iter *iter,
+			    struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	unsigned int len = req->nbytes;
+
+	if (!creq->last_req)
+		len = (len + creq->cache_ptr) & ~CESA_HASH_BLOCK_SIZE_MSK;
+
+	mv_cesa_req_dma_iter_init(&iter->base, len);
+	mv_cesa_sg_dma_iter_init(&iter->src, req->src, DMA_TO_DEVICE);
+	iter->src.op_offset = creq->cache_ptr;
+}
+
+static inline bool
+mv_cesa_ahash_req_iter_next_op(struct mv_cesa_ahash_dma_iter *iter)
+{
+	iter->src.op_offset = 0;
+
+	return mv_cesa_req_dma_iter_next_op(&iter->base);
+}
+
+static inline int mv_cesa_ahash_dma_alloc_cache(struct mv_cesa_ahash_req *creq,
+						gfp_t flags)
+{
+	struct mv_cesa_ahash_dma_req *dreq = &creq->req.dma;
+
+	creq->cache = dma_pool_alloc(cesa_dev->dma->cache_pool, flags,
+				     &dreq->cache_dma);
+	if (!creq->cache)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static inline int mv_cesa_ahash_std_alloc_cache(struct mv_cesa_ahash_req *creq,
+						gfp_t flags)
+{
+	creq->cache = kzalloc(CESA_MAX_HASH_BLOCK_SIZE, flags);
+	if (!creq->cache)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int mv_cesa_ahash_alloc_cache(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	int ret;
+
+	if (creq->cache)
+		return 0;
+
+	if (creq->req.base.type == CESA_DMA_REQ)
+		ret = mv_cesa_ahash_dma_alloc_cache(creq, flags);
+	else
+		ret = mv_cesa_ahash_std_alloc_cache(creq, flags);
+
+	return ret;
+}
+
+static inline void mv_cesa_ahash_dma_free_cache(struct mv_cesa_ahash_req *creq)
+{
+	dma_pool_free(cesa_dev->dma->cache_pool, creq->cache,
+		      creq->req.dma.cache_dma);
+}
+
+static inline void mv_cesa_ahash_std_free_cache(struct mv_cesa_ahash_req *creq)
+{
+	kfree(creq->cache);
+}
+
+static void mv_cesa_ahash_free_cache(struct mv_cesa_ahash_req *creq)
+{
+	if (!creq->cache)
+		return;
+
+	if (creq->req.base.type == CESA_DMA_REQ)
+		mv_cesa_ahash_dma_free_cache(creq);
+	else
+		mv_cesa_ahash_std_free_cache(creq);
+
+	creq->cache = NULL;
+}
+
+static int mv_cesa_ahash_dma_alloc_padding(struct mv_cesa_ahash_dma_req *req,
+					   gfp_t flags)
+{
+	if (req->padding)
+		return 0;
+
+	req->padding = dma_pool_alloc(cesa_dev->dma->padding_pool, flags,
+				      &req->padding_dma);
+	if (!req->padding)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void mv_cesa_ahash_dma_free_padding(struct mv_cesa_ahash_dma_req *req)
+{
+	if (!req->padding)
+		return;
+
+	dma_pool_free(cesa_dev->dma->padding_pool, req->padding,
+		      req->padding_dma);
+	req->padding = NULL;
+}
+
+static inline void mv_cesa_ahash_dma_last_cleanup(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+
+	mv_cesa_ahash_dma_free_padding(&creq->req.dma);
+}
+
+static inline void mv_cesa_ahash_dma_cleanup(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+
+	dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE);
+	mv_cesa_dma_cleanup(&creq->req.dma.base);
+}
+
+static inline void mv_cesa_ahash_cleanup(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+
+	if (creq->req.base.type == CESA_DMA_REQ)
+		mv_cesa_ahash_dma_cleanup(req);
+}
+
+static void mv_cesa_ahash_last_cleanup(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+
+	mv_cesa_ahash_free_cache(creq);
+
+	if (creq->req.base.type == CESA_DMA_REQ)
+		mv_cesa_ahash_dma_last_cleanup(req);
+}
+
+static int mv_cesa_ahash_pad_len(struct mv_cesa_ahash_req *creq)
+{
+	unsigned int index, padlen;
+
+	index = creq->len & CESA_HASH_BLOCK_SIZE_MSK;
+	padlen = (index < 56) ? (56 - index) : (64 + 56 - index);
+
+	return padlen;
+}
+
+static int mv_cesa_ahash_pad_req(struct mv_cesa_ahash_req *creq, u8 *buf)
+{
+	__be64 bits = cpu_to_be64(creq->len << 3);
+	unsigned int index, padlen;
+
+	buf[0] = 0x80;
+	/* Pad out to 56 mod 64 */
+	index = creq->len & CESA_HASH_BLOCK_SIZE_MSK;
+	padlen = mv_cesa_ahash_pad_len(creq);
+	memset(buf + 1, 0, padlen - 1);
+	memcpy(buf + padlen, &bits, sizeof(bits));
+
+	return padlen + 8;
+}
+
+static void mv_cesa_ahash_std_step(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	struct mv_cesa_ahash_std_req *sreq = &creq->req.std;
+	struct mv_cesa_engine *engine = sreq->base.engine;
+	struct mv_cesa_op_ctx *op;
+	unsigned int new_cache_ptr = 0;
+	u32 frag_mode;
+	size_t  len;
+
+	if (creq->cache_ptr)
+		memcpy(engine->sram + CESA_SA_DATA_SRAM_OFFSET, creq->cache,
+		       creq->cache_ptr);
+
+	len = min_t(size_t, req->nbytes + creq->cache_ptr - sreq->offset,
+		    CESA_SA_SRAM_PAYLOAD_SIZE);
+
+	if (!creq->last_req) {
+		new_cache_ptr = len & CESA_HASH_BLOCK_SIZE_MSK;
+		len &= ~CESA_HASH_BLOCK_SIZE_MSK;
+	}
+
+	if (len - creq->cache_ptr)
+		sreq->offset += sg_pcopy_to_buffer(req->src, creq->src_nents,
+						   engine->sram +
+						   CESA_SA_DATA_SRAM_OFFSET +
+						   creq->cache_ptr,
+						   len - creq->cache_ptr,
+						   sreq->offset);
+
+	op = &creq->op_tmpl;
+
+	frag_mode = mv_cesa_get_op_cfg(op) & CESA_SA_DESC_CFG_FRAG_MSK;
+
+	if (creq->last_req && sreq->offset == req->nbytes &&
+	    creq->len <= CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX) {
+		if (frag_mode == CESA_SA_DESC_CFG_FIRST_FRAG)
+			frag_mode = CESA_SA_DESC_CFG_NOT_FRAG;
+		else if (frag_mode == CESA_SA_DESC_CFG_MID_FRAG)
+			frag_mode = CESA_SA_DESC_CFG_LAST_FRAG;
+	}
+
+	if (frag_mode == CESA_SA_DESC_CFG_NOT_FRAG ||
+	    frag_mode == CESA_SA_DESC_CFG_LAST_FRAG) {
+		if (len &&
+		    creq->len <= CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX) {
+			mv_cesa_set_mac_op_total_len(op, creq->len);
+		} else {
+			int trailerlen = mv_cesa_ahash_pad_len(creq) + 8;
+
+			if (len + trailerlen > CESA_SA_SRAM_PAYLOAD_SIZE) {
+				len &= CESA_HASH_BLOCK_SIZE_MSK;
+				new_cache_ptr = 64 - trailerlen;
+				memcpy(creq->cache,
+				       engine->sram +
+				       CESA_SA_DATA_SRAM_OFFSET + len,
+				       new_cache_ptr);
+			} else {
+				len += mv_cesa_ahash_pad_req(creq,
+						engine->sram + len +
+						CESA_SA_DATA_SRAM_OFFSET);
+			}
+
+			if (frag_mode == CESA_SA_DESC_CFG_LAST_FRAG)
+				frag_mode = CESA_SA_DESC_CFG_MID_FRAG;
+			else
+				frag_mode = CESA_SA_DESC_CFG_FIRST_FRAG;
+		}
+	}
+
+	mv_cesa_set_mac_op_frag_len(op, len);
+	mv_cesa_update_op_cfg(op, frag_mode, CESA_SA_DESC_CFG_FRAG_MSK);
+
+	/* FIXME: only update enc_len field */
+	memcpy(engine->sram, op, sizeof(*op));
+
+	if (frag_mode == CESA_SA_DESC_CFG_FIRST_FRAG)
+		mv_cesa_update_op_cfg(op, CESA_SA_DESC_CFG_MID_FRAG,
+				      CESA_SA_DESC_CFG_FRAG_MSK);
+
+	creq->cache_ptr = new_cache_ptr;
+
+	mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE);
+	writel(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG);
+	writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
+}
+
+static int mv_cesa_ahash_std_process(struct ahash_request *req, u32 status)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	struct mv_cesa_ahash_std_req *sreq = &creq->req.std;
+
+	if (sreq->offset < (req->nbytes - creq->cache_ptr))
+		return -EINPROGRESS;
+
+	return 0;
+}
+
+static inline void mv_cesa_ahash_dma_prepare(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	struct mv_cesa_tdma_req *dreq = &creq->req.dma.base;
+
+	mv_cesa_dma_prepare(dreq, dreq->base.engine);
+}
+
+static void mv_cesa_ahash_std_prepare(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	struct mv_cesa_ahash_std_req *sreq = &creq->req.std;
+	struct mv_cesa_engine *engine = sreq->base.engine;
+
+	sreq->offset = 0;
+	mv_cesa_adjust_op(engine, &creq->op_tmpl);
+	memcpy(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl));
+}
+
+static void mv_cesa_ahash_step(struct crypto_async_request *req)
+{
+	struct ahash_request *ahashreq = ahash_request_cast(req);
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq);
+
+	if (creq->req.base.type == CESA_DMA_REQ)
+		mv_cesa_dma_step(&creq->req.dma.base);
+	else
+		mv_cesa_ahash_std_step(ahashreq);
+}
+
+static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status)
+{
+	struct ahash_request *ahashreq = ahash_request_cast(req);
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq);
+	struct mv_cesa_engine *engine = creq->req.base.engine;
+	unsigned int digsize;
+	int ret, i;
+
+	if (creq->req.base.type == CESA_DMA_REQ)
+		ret = mv_cesa_dma_process(&creq->req.dma.base, status);
+	else
+		ret = mv_cesa_ahash_std_process(ahashreq, status);
+
+	if (ret == -EINPROGRESS)
+		return ret;
+
+	digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
+	for (i = 0; i < digsize / 4; i++)
+		creq->state[i] = readl(engine->regs + CESA_IVDIG(i));
+
+	if (creq->cache_ptr)
+		sg_pcopy_to_buffer(ahashreq->src, creq->src_nents,
+				   creq->cache,
+				   creq->cache_ptr,
+				   ahashreq->nbytes - creq->cache_ptr);
+
+	if (creq->last_req) {
+		for (i = 0; i < digsize / 4; i++) {
+			/*
+			 * Hardware provides MD5 digest in a different
+			 * endianness than SHA-1 and SHA-256 ones.
+			 */
+			if (digsize == MD5_DIGEST_SIZE)
+				creq->state[i] = cpu_to_le32(creq->state[i]);
+			else
+				creq->state[i] = cpu_to_be32(creq->state[i]);
+		}
+
+		memcpy(ahashreq->result, creq->state, digsize);
+	}
+
+	return ret;
+}
+
+static void mv_cesa_ahash_prepare(struct crypto_async_request *req,
+				  struct mv_cesa_engine *engine)
+{
+	struct ahash_request *ahashreq = ahash_request_cast(req);
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq);
+	unsigned int digsize;
+	int i;
+
+	creq->req.base.engine = engine;
+
+	if (creq->req.base.type == CESA_DMA_REQ)
+		mv_cesa_ahash_dma_prepare(ahashreq);
+	else
+		mv_cesa_ahash_std_prepare(ahashreq);
+
+	digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
+	for (i = 0; i < digsize / 4; i++)
+		writel(creq->state[i],
+		       engine->regs + CESA_IVDIG(i));
+}
+
+static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req)
+{
+	struct ahash_request *ahashreq = ahash_request_cast(req);
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq);
+
+	if (creq->last_req)
+		mv_cesa_ahash_last_cleanup(ahashreq);
+
+	mv_cesa_ahash_cleanup(ahashreq);
+}
+
+static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = {
+	.step = mv_cesa_ahash_step,
+	.process = mv_cesa_ahash_process,
+	.prepare = mv_cesa_ahash_prepare,
+	.cleanup = mv_cesa_ahash_req_cleanup,
+};
+
+static int mv_cesa_ahash_init(struct ahash_request *req,
+			      struct mv_cesa_op_ctx *tmpl)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+
+	memset(creq, 0, sizeof(*creq));
+	mv_cesa_update_op_cfg(tmpl,
+			      CESA_SA_DESC_CFG_OP_MAC_ONLY |
+			      CESA_SA_DESC_CFG_FIRST_FRAG,
+			      CESA_SA_DESC_CFG_OP_MSK |
+			      CESA_SA_DESC_CFG_FRAG_MSK);
+	mv_cesa_set_mac_op_total_len(tmpl, 0);
+	mv_cesa_set_mac_op_frag_len(tmpl, 0);
+	creq->op_tmpl = *tmpl;
+	creq->len = 0;
+
+	return 0;
+}
+
+static inline int mv_cesa_ahash_cra_init(struct crypto_tfm *tfm)
+{
+	struct mv_cesa_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->base.ops = &mv_cesa_ahash_req_ops;
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct mv_cesa_ahash_req));
+	return 0;
+}
+
+static int mv_cesa_ahash_cache_req(struct ahash_request *req, bool *cached)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	int ret;
+
+	if (((creq->cache_ptr + req->nbytes) & CESA_HASH_BLOCK_SIZE_MSK) &&
+	    !creq->last_req) {
+		ret = mv_cesa_ahash_alloc_cache(req);
+		if (ret)
+			return ret;
+	}
+
+	if (creq->cache_ptr + req->nbytes < 64 && !creq->last_req) {
+		*cached = true;
+
+		if (!req->nbytes)
+			return 0;
+
+		sg_pcopy_to_buffer(req->src, creq->src_nents,
+				   creq->cache + creq->cache_ptr,
+				   req->nbytes, 0);
+
+		creq->cache_ptr += req->nbytes;
+	}
+
+	return 0;
+}
+
+static struct mv_cesa_op_ctx *
+mv_cesa_ahash_dma_add_cache(struct mv_cesa_tdma_chain *chain,
+			    struct mv_cesa_ahash_dma_iter *dma_iter,
+			    struct mv_cesa_ahash_req *creq,
+			    gfp_t flags)
+{
+	struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma;
+	struct mv_cesa_op_ctx *op = NULL;
+	int ret;
+
+	if (!creq->cache_ptr)
+		return NULL;
+
+	ret = mv_cesa_dma_add_data_transfer(chain,
+					    CESA_SA_DATA_SRAM_OFFSET,
+					    ahashdreq->cache_dma,
+					    creq->cache_ptr,
+					    CESA_TDMA_DST_IN_SRAM,
+					    flags);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (!dma_iter->base.op_len) {
+		op = mv_cesa_dma_add_op(chain, &creq->op_tmpl, false, flags);
+		if (IS_ERR(op))
+			return op;
+
+		mv_cesa_set_mac_op_frag_len(op, creq->cache_ptr);
+
+		/* Add dummy desc to launch crypto operation */
+		ret = mv_cesa_dma_add_dummy_launch(chain, flags);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
+	return op;
+}
+
+static struct mv_cesa_op_ctx *
+mv_cesa_ahash_dma_add_data(struct mv_cesa_tdma_chain *chain,
+			   struct mv_cesa_ahash_dma_iter *dma_iter,
+			   struct mv_cesa_ahash_req *creq,
+			   gfp_t flags)
+{
+	struct mv_cesa_op_ctx *op;
+	int ret;
+
+	op = mv_cesa_dma_add_op(chain, &creq->op_tmpl, false, flags);
+	if (IS_ERR(op))
+		return op;
+
+	mv_cesa_set_mac_op_frag_len(op, dma_iter->base.op_len);
+
+	if ((mv_cesa_get_op_cfg(&creq->op_tmpl) & CESA_SA_DESC_CFG_FRAG_MSK) ==
+	    CESA_SA_DESC_CFG_FIRST_FRAG)
+		mv_cesa_update_op_cfg(&creq->op_tmpl,
+				      CESA_SA_DESC_CFG_MID_FRAG,
+				      CESA_SA_DESC_CFG_FRAG_MSK);
+
+	/* Add input transfers */
+	ret = mv_cesa_dma_add_op_transfers(chain, &dma_iter->base,
+					   &dma_iter->src, flags);
+	if (ret)
+		return ERR_PTR(ret);
+
+	/* Add dummy desc to launch crypto operation */
+	ret = mv_cesa_dma_add_dummy_launch(chain, flags);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return op;
+}
+
+static struct mv_cesa_op_ctx *
+mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain,
+			   struct mv_cesa_ahash_dma_iter *dma_iter,
+			   struct mv_cesa_ahash_req *creq,
+			   struct mv_cesa_op_ctx *op,
+			   gfp_t flags)
+{
+	struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma;
+	unsigned int len, trailerlen, padoff = 0;
+	int ret;
+
+	if (!creq->last_req)
+		return op;
+
+	if (op && creq->len <= CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX) {
+		u32 frag = CESA_SA_DESC_CFG_NOT_FRAG;
+
+		if ((mv_cesa_get_op_cfg(op) & CESA_SA_DESC_CFG_FRAG_MSK) !=
+		    CESA_SA_DESC_CFG_FIRST_FRAG)
+			frag = CESA_SA_DESC_CFG_LAST_FRAG;
+
+		mv_cesa_update_op_cfg(op, frag, CESA_SA_DESC_CFG_FRAG_MSK);
+
+		return op;
+	}
+
+	ret = mv_cesa_ahash_dma_alloc_padding(ahashdreq, flags);
+	if (ret)
+		return ERR_PTR(ret);
+
+	trailerlen = mv_cesa_ahash_pad_req(creq, ahashdreq->padding);
+
+	if (op) {
+		len = min(CESA_SA_SRAM_PAYLOAD_SIZE - dma_iter->base.op_len,
+			  trailerlen);
+		if (len) {
+			ret = mv_cesa_dma_add_data_transfer(chain,
+						CESA_SA_DATA_SRAM_OFFSET +
+						dma_iter->base.op_len,
+						ahashdreq->padding_dma,
+						len, CESA_TDMA_DST_IN_SRAM,
+						flags);
+			if (ret)
+				return ERR_PTR(ret);
+
+			mv_cesa_update_op_cfg(op, CESA_SA_DESC_CFG_MID_FRAG,
+					      CESA_SA_DESC_CFG_FRAG_MSK);
+			mv_cesa_set_mac_op_frag_len(op,
+					dma_iter->base.op_len + len);
+			padoff += len;
+		}
+	}
+
+	if (padoff >= trailerlen)
+		return op;
+
+	if ((mv_cesa_get_op_cfg(&creq->op_tmpl) & CESA_SA_DESC_CFG_FRAG_MSK) !=
+	    CESA_SA_DESC_CFG_FIRST_FRAG)
+		mv_cesa_update_op_cfg(&creq->op_tmpl,
+				      CESA_SA_DESC_CFG_MID_FRAG,
+				      CESA_SA_DESC_CFG_FRAG_MSK);
+
+	op = mv_cesa_dma_add_op(chain, &creq->op_tmpl, false, flags);
+	if (IS_ERR(op))
+		return op;
+
+	mv_cesa_set_mac_op_frag_len(op, trailerlen - padoff);
+
+	ret = mv_cesa_dma_add_data_transfer(chain,
+					    CESA_SA_DATA_SRAM_OFFSET,
+					    ahashdreq->padding_dma +
+					    padoff,
+					    trailerlen - padoff,
+					    CESA_TDMA_DST_IN_SRAM,
+					    flags);
+	if (ret)
+		return ERR_PTR(ret);
+
+	/* Add dummy desc to launch crypto operation */
+	ret = mv_cesa_dma_add_dummy_launch(chain, flags);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return op;
+}
+
+static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		      GFP_KERNEL : GFP_ATOMIC;
+	struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma;
+	struct mv_cesa_tdma_req *dreq = &ahashdreq->base;
+	struct mv_cesa_tdma_chain chain;
+	struct mv_cesa_ahash_dma_iter iter;
+	struct mv_cesa_op_ctx *op = NULL;
+	int ret;
+
+	dreq->chain.first = NULL;
+	dreq->chain.last = NULL;
+
+	if (creq->src_nents) {
+		ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents,
+				 DMA_TO_DEVICE);
+		if (!ret) {
+			ret = -ENOMEM;
+			goto err;
+		}
+	}
+
+	mv_cesa_tdma_desc_iter_init(&chain);
+	mv_cesa_ahash_req_iter_init(&iter, req);
+
+	op = mv_cesa_ahash_dma_add_cache(&chain, &iter,
+					 creq, flags);
+	if (IS_ERR(op)) {
+		ret = PTR_ERR(op);
+		goto err_free_tdma;
+	}
+
+	do {
+		if (!iter.base.op_len)
+			break;
+
+		op = mv_cesa_ahash_dma_add_data(&chain, &iter,
+						creq, flags);
+		if (IS_ERR(op)) {
+			ret = PTR_ERR(op);
+			goto err_free_tdma;
+		}
+	} while (mv_cesa_ahash_req_iter_next_op(&iter));
+
+	op = mv_cesa_ahash_dma_last_req(&chain, &iter, creq, op, flags);
+	if (IS_ERR(op)) {
+		ret = PTR_ERR(op);
+		goto err_free_tdma;
+	}
+
+	if (op) {
+		/* Add dummy desc to wait for crypto operation end */
+		ret = mv_cesa_dma_add_dummy_end(&chain, flags);
+		if (ret)
+			goto err_free_tdma;
+	}
+
+	if (!creq->last_req)
+		creq->cache_ptr = req->nbytes + creq->cache_ptr -
+				  iter.base.len;
+	else
+		creq->cache_ptr = 0;
+
+	dreq->chain = chain;
+
+	return 0;
+
+err_free_tdma:
+	mv_cesa_dma_cleanup(dreq);
+	dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE);
+
+err:
+	mv_cesa_ahash_last_cleanup(req);
+
+	return ret;
+}
+
+static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	int ret;
+
+	if (cesa_dev->caps->has_tdma)
+		creq->req.base.type = CESA_DMA_REQ;
+	else
+		creq->req.base.type = CESA_STD_REQ;
+
+	creq->src_nents = sg_nents_for_len(req->src, req->nbytes);
+
+	ret = mv_cesa_ahash_cache_req(req, cached);
+	if (ret)
+		return ret;
+
+	if (*cached)
+		return 0;
+
+	if (creq->req.base.type == CESA_DMA_REQ)
+		ret = mv_cesa_ahash_dma_req_init(req);
+
+	return ret;
+}
+
+static int mv_cesa_ahash_update(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	bool cached = false;
+	int ret;
+
+	creq->len += req->nbytes;
+	ret = mv_cesa_ahash_req_init(req, &cached);
+	if (ret)
+		return ret;
+
+	if (cached)
+		return 0;
+
+	ret = mv_cesa_queue_req(&req->base);
+	if (ret && ret != -EINPROGRESS) {
+		mv_cesa_ahash_cleanup(req);
+		return ret;
+	}
+
+	return ret;
+}
+
+static int mv_cesa_ahash_final(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl;
+	bool cached = false;
+	int ret;
+
+	mv_cesa_set_mac_op_total_len(tmpl, creq->len);
+	creq->last_req = true;
+	req->nbytes = 0;
+
+	ret = mv_cesa_ahash_req_init(req, &cached);
+	if (ret)
+		return ret;
+
+	if (cached)
+		return 0;
+
+	ret = mv_cesa_queue_req(&req->base);
+	if (ret && ret != -EINPROGRESS)
+		mv_cesa_ahash_cleanup(req);
+
+	return ret;
+}
+
+static int mv_cesa_ahash_finup(struct ahash_request *req)
+{
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl;
+	bool cached = false;
+	int ret;
+
+	creq->len += req->nbytes;
+	mv_cesa_set_mac_op_total_len(tmpl, creq->len);
+	creq->last_req = true;
+
+	ret = mv_cesa_ahash_req_init(req, &cached);
+	if (ret)
+		return ret;
+
+	if (cached)
+		return 0;
+
+	ret = mv_cesa_queue_req(&req->base);
+	if (ret && ret != -EINPROGRESS)
+		mv_cesa_ahash_cleanup(req);
+
+	return ret;
+}
+
+static int mv_cesa_md5_init(struct ahash_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_MD5);
+
+	mv_cesa_ahash_init(req, &tmpl);
+
+	return 0;
+}
+
+static int mv_cesa_md5_export(struct ahash_request *req, void *out)
+{
+	struct md5_state *out_state = out;
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	unsigned int digsize = crypto_ahash_digestsize(ahash);
+
+	out_state->byte_count = creq->len;
+	memcpy(out_state->hash, creq->state, digsize);
+	memset(out_state->block, 0, sizeof(out_state->block));
+	if (creq->cache)
+		memcpy(out_state->block, creq->cache, creq->cache_ptr);
+
+	return 0;
+}
+
+static int mv_cesa_md5_import(struct ahash_request *req, const void *in)
+{
+	const struct md5_state *in_state = in;
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	unsigned int digsize = crypto_ahash_digestsize(ahash);
+	unsigned int cache_ptr;
+	int ret;
+
+	creq->len = in_state->byte_count;
+	memcpy(creq->state, in_state->hash, digsize);
+	creq->cache_ptr = 0;
+
+	cache_ptr = creq->len % sizeof(in_state->block);
+	if (!cache_ptr)
+		return 0;
+
+	ret = mv_cesa_ahash_alloc_cache(req);
+	if (ret)
+		return ret;
+
+	memcpy(creq->cache, in_state->block, cache_ptr);
+	creq->cache_ptr = cache_ptr;
+
+	return 0;
+}
+
+static int mv_cesa_md5_digest(struct ahash_request *req)
+{
+	int ret;
+
+	ret = mv_cesa_md5_init(req);
+	if (ret)
+		return ret;
+
+	return mv_cesa_ahash_finup(req);
+}
+
+struct ahash_alg mv_md5_alg = {
+	.init = mv_cesa_md5_init,
+	.update = mv_cesa_ahash_update,
+	.final = mv_cesa_ahash_final,
+	.finup = mv_cesa_ahash_finup,
+	.digest = mv_cesa_md5_digest,
+	.export = mv_cesa_md5_export,
+	.import = mv_cesa_md5_import,
+	.halg = {
+		.digestsize = MD5_DIGEST_SIZE,
+		.base = {
+			.cra_name = "md5",
+			.cra_driver_name = "mv-md5",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct mv_cesa_hash_ctx),
+			.cra_init = mv_cesa_ahash_cra_init,
+			.cra_module = THIS_MODULE,
+		 }
+	}
+};
+
+static int mv_cesa_sha1_init(struct ahash_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA1);
+
+	mv_cesa_ahash_init(req, &tmpl);
+
+	return 0;
+}
+
+static int mv_cesa_sha1_export(struct ahash_request *req, void *out)
+{
+	struct sha1_state *out_state = out;
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	unsigned int digsize = crypto_ahash_digestsize(ahash);
+
+	out_state->count = creq->len;
+	memcpy(out_state->state, creq->state, digsize);
+	memset(out_state->buffer, 0, sizeof(out_state->buffer));
+	if (creq->cache)
+		memcpy(out_state->buffer, creq->cache, creq->cache_ptr);
+
+	return 0;
+}
+
+static int mv_cesa_sha1_import(struct ahash_request *req, const void *in)
+{
+	const struct sha1_state *in_state = in;
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	unsigned int digsize = crypto_ahash_digestsize(ahash);
+	unsigned int cache_ptr;
+	int ret;
+
+	creq->len = in_state->count;
+	memcpy(creq->state, in_state->state, digsize);
+	creq->cache_ptr = 0;
+
+	cache_ptr = creq->len % SHA1_BLOCK_SIZE;
+	if (!cache_ptr)
+		return 0;
+
+	ret = mv_cesa_ahash_alloc_cache(req);
+	if (ret)
+		return ret;
+
+	memcpy(creq->cache, in_state->buffer, cache_ptr);
+	creq->cache_ptr = cache_ptr;
+
+	return 0;
+}
+
+static int mv_cesa_sha1_digest(struct ahash_request *req)
+{
+	int ret;
+
+	ret = mv_cesa_sha1_init(req);
+	if (ret)
+		return ret;
+
+	return mv_cesa_ahash_finup(req);
+}
+
+struct ahash_alg mv_sha1_alg = {
+	.init = mv_cesa_sha1_init,
+	.update = mv_cesa_ahash_update,
+	.final = mv_cesa_ahash_final,
+	.finup = mv_cesa_ahash_finup,
+	.digest = mv_cesa_sha1_digest,
+	.export = mv_cesa_sha1_export,
+	.import = mv_cesa_sha1_import,
+	.halg = {
+		.digestsize = SHA1_DIGEST_SIZE,
+		.base = {
+			.cra_name = "sha1",
+			.cra_driver_name = "mv-sha1",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = SHA1_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct mv_cesa_hash_ctx),
+			.cra_init = mv_cesa_ahash_cra_init,
+			.cra_module = THIS_MODULE,
+		 }
+	}
+};
+
+static int mv_cesa_sha256_init(struct ahash_request *req)
+{
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA256);
+
+	mv_cesa_ahash_init(req, &tmpl);
+
+	return 0;
+}
+
+static int mv_cesa_sha256_digest(struct ahash_request *req)
+{
+	int ret;
+
+	ret = mv_cesa_sha256_init(req);
+	if (ret)
+		return ret;
+
+	return mv_cesa_ahash_finup(req);
+}
+
+static int mv_cesa_sha256_export(struct ahash_request *req, void *out)
+{
+	struct sha256_state *out_state = out;
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	unsigned int ds = crypto_ahash_digestsize(ahash);
+
+	out_state->count = creq->len;
+	memcpy(out_state->state, creq->state, ds);
+	memset(out_state->buf, 0, sizeof(out_state->buf));
+	if (creq->cache)
+		memcpy(out_state->buf, creq->cache, creq->cache_ptr);
+
+	return 0;
+}
+
+static int mv_cesa_sha256_import(struct ahash_request *req, const void *in)
+{
+	const struct sha256_state *in_state = in;
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	unsigned int digsize = crypto_ahash_digestsize(ahash);
+	unsigned int cache_ptr;
+	int ret;
+
+	creq->len = in_state->count;
+	memcpy(creq->state, in_state->state, digsize);
+	creq->cache_ptr = 0;
+
+	cache_ptr = creq->len % SHA256_BLOCK_SIZE;
+	if (!cache_ptr)
+		return 0;
+
+	ret = mv_cesa_ahash_alloc_cache(req);
+	if (ret)
+		return ret;
+
+	memcpy(creq->cache, in_state->buf, cache_ptr);
+	creq->cache_ptr = cache_ptr;
+
+	return 0;
+}
+
+struct ahash_alg mv_sha256_alg = {
+	.init = mv_cesa_sha256_init,
+	.update = mv_cesa_ahash_update,
+	.final = mv_cesa_ahash_final,
+	.finup = mv_cesa_ahash_finup,
+	.digest = mv_cesa_sha256_digest,
+	.export = mv_cesa_sha256_export,
+	.import = mv_cesa_sha256_import,
+	.halg = {
+		.digestsize = SHA256_DIGEST_SIZE,
+		.base = {
+			.cra_name = "sha256",
+			.cra_driver_name = "mv-sha256",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = SHA256_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct mv_cesa_hash_ctx),
+			.cra_init = mv_cesa_ahash_cra_init,
+			.cra_module = THIS_MODULE,
+		 }
+	}
+};
+
+struct mv_cesa_ahash_result {
+	struct completion completion;
+	int error;
+};
+
+static void mv_cesa_hmac_ahash_complete(struct crypto_async_request *req,
+					int error)
+{
+	struct mv_cesa_ahash_result *result = req->data;
+
+	if (error == -EINPROGRESS)
+		return;
+
+	result->error = error;
+	complete(&result->completion);
+}
+
+static int mv_cesa_ahmac_iv_state_init(struct ahash_request *req, u8 *pad,
+				       void *state, unsigned int blocksize)
+{
+	struct mv_cesa_ahash_result result;
+	struct scatterlist sg;
+	int ret;
+
+	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				   mv_cesa_hmac_ahash_complete, &result);
+	sg_init_one(&sg, pad, blocksize);
+	ahash_request_set_crypt(req, &sg, pad, blocksize);
+	init_completion(&result.completion);
+
+	ret = crypto_ahash_init(req);
+	if (ret)
+		return ret;
+
+	ret = crypto_ahash_update(req);
+	if (ret && ret != -EINPROGRESS)
+		return ret;
+
+	wait_for_completion_interruptible(&result.completion);
+	if (result.error)
+		return result.error;
+
+	ret = crypto_ahash_export(req, state);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int mv_cesa_ahmac_pad_init(struct ahash_request *req,
+				  const u8 *key, unsigned int keylen,
+				  u8 *ipad, u8 *opad,
+				  unsigned int blocksize)
+{
+	struct mv_cesa_ahash_result result;
+	struct scatterlist sg;
+	int ret;
+	int i;
+
+	if (keylen <= blocksize) {
+		memcpy(ipad, key, keylen);
+	} else {
+		u8 *keydup = kmemdup(key, keylen, GFP_KERNEL);
+
+		if (!keydup)
+			return -ENOMEM;
+
+		ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+					   mv_cesa_hmac_ahash_complete,
+					   &result);
+		sg_init_one(&sg, keydup, keylen);
+		ahash_request_set_crypt(req, &sg, ipad, keylen);
+		init_completion(&result.completion);
+
+		ret = crypto_ahash_digest(req);
+		if (ret == -EINPROGRESS) {
+			wait_for_completion_interruptible(&result.completion);
+			ret = result.error;
+		}
+
+		/* Set the memory region to 0 to avoid any leak. */
+		memset(keydup, 0, keylen);
+		kfree(keydup);
+
+		if (ret)
+			return ret;
+
+		keylen = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
+	}
+
+	memset(ipad + keylen, 0, blocksize - keylen);
+	memcpy(opad, ipad, blocksize);
+
+	for (i = 0; i < blocksize; i++) {
+		ipad[i] ^= 0x36;
+		opad[i] ^= 0x5c;
+	}
+
+	return 0;
+}
+
+static int mv_cesa_ahmac_setkey(const char *hash_alg_name,
+				const u8 *key, unsigned int keylen,
+				void *istate, void *ostate)
+{
+	struct ahash_request *req;
+	struct crypto_ahash *tfm;
+	unsigned int blocksize;
+	u8 *ipad = NULL;
+	u8 *opad;
+	int ret;
+
+	tfm = crypto_alloc_ahash(hash_alg_name, CRYPTO_ALG_TYPE_AHASH,
+				 CRYPTO_ALG_TYPE_AHASH_MASK);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	req = ahash_request_alloc(tfm, GFP_KERNEL);
+	if (!req) {
+		ret = -ENOMEM;
+		goto free_ahash;
+	}
+
+	crypto_ahash_clear_flags(tfm, ~0);
+
+	blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+
+	ipad = kzalloc(2 * blocksize, GFP_KERNEL);
+	if (!ipad) {
+		ret = -ENOMEM;
+		goto free_req;
+	}
+
+	opad = ipad + blocksize;
+
+	ret = mv_cesa_ahmac_pad_init(req, key, keylen, ipad, opad, blocksize);
+	if (ret)
+		goto free_ipad;
+
+	ret = mv_cesa_ahmac_iv_state_init(req, ipad, istate, blocksize);
+	if (ret)
+		goto free_ipad;
+
+	ret = mv_cesa_ahmac_iv_state_init(req, opad, ostate, blocksize);
+
+free_ipad:
+	kfree(ipad);
+free_req:
+	ahash_request_free(req);
+free_ahash:
+	crypto_free_ahash(tfm);
+
+	return ret;
+}
+
+static int mv_cesa_ahmac_cra_init(struct crypto_tfm *tfm)
+{
+	struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->base.ops = &mv_cesa_ahash_req_ops;
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct mv_cesa_ahash_req));
+	return 0;
+}
+
+static int mv_cesa_ahmac_md5_init(struct ahash_request *req)
+{
+	struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_HMAC_MD5);
+	memcpy(tmpl.ctx.hash.iv, ctx->iv, sizeof(ctx->iv));
+
+	mv_cesa_ahash_init(req, &tmpl);
+
+	return 0;
+}
+
+static int mv_cesa_ahmac_md5_setkey(struct crypto_ahash *tfm, const u8 *key,
+				    unsigned int keylen)
+{
+	struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct md5_state istate, ostate;
+	int ret, i;
+
+	ret = mv_cesa_ahmac_setkey("mv-md5", key, keylen, &istate, &ostate);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(istate.hash); i++)
+		ctx->iv[i] = be32_to_cpu(istate.hash[i]);
+
+	for (i = 0; i < ARRAY_SIZE(ostate.hash); i++)
+		ctx->iv[i + 8] = be32_to_cpu(ostate.hash[i]);
+
+	return 0;
+}
+
+static int mv_cesa_ahmac_md5_digest(struct ahash_request *req)
+{
+	int ret;
+
+	ret = mv_cesa_ahmac_md5_init(req);
+	if (ret)
+		return ret;
+
+	return mv_cesa_ahash_finup(req);
+}
+
+struct ahash_alg mv_ahmac_md5_alg = {
+	.init = mv_cesa_ahmac_md5_init,
+	.update = mv_cesa_ahash_update,
+	.final = mv_cesa_ahash_final,
+	.finup = mv_cesa_ahash_finup,
+	.digest = mv_cesa_ahmac_md5_digest,
+	.setkey = mv_cesa_ahmac_md5_setkey,
+	.export = mv_cesa_md5_export,
+	.import = mv_cesa_md5_import,
+	.halg = {
+		.digestsize = MD5_DIGEST_SIZE,
+		.statesize = sizeof(struct md5_state),
+		.base = {
+			.cra_name = "hmac(md5)",
+			.cra_driver_name = "mv-hmac-md5",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx),
+			.cra_init = mv_cesa_ahmac_cra_init,
+			.cra_module = THIS_MODULE,
+		 }
+	}
+};
+
+static int mv_cesa_ahmac_sha1_init(struct ahash_request *req)
+{
+	struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_HMAC_SHA1);
+	memcpy(tmpl.ctx.hash.iv, ctx->iv, sizeof(ctx->iv));
+
+	mv_cesa_ahash_init(req, &tmpl);
+
+	return 0;
+}
+
+static int mv_cesa_ahmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key,
+				     unsigned int keylen)
+{
+	struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct sha1_state istate, ostate;
+	int ret, i;
+
+	ret = mv_cesa_ahmac_setkey("mv-sha1", key, keylen, &istate, &ostate);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(istate.state); i++)
+		ctx->iv[i] = be32_to_cpu(istate.state[i]);
+
+	for (i = 0; i < ARRAY_SIZE(ostate.state); i++)
+		ctx->iv[i + 8] = be32_to_cpu(ostate.state[i]);
+
+	return 0;
+}
+
+static int mv_cesa_ahmac_sha1_digest(struct ahash_request *req)
+{
+	int ret;
+
+	ret = mv_cesa_ahmac_sha1_init(req);
+	if (ret)
+		return ret;
+
+	return mv_cesa_ahash_finup(req);
+}
+
+struct ahash_alg mv_ahmac_sha1_alg = {
+	.init = mv_cesa_ahmac_sha1_init,
+	.update = mv_cesa_ahash_update,
+	.final = mv_cesa_ahash_final,
+	.finup = mv_cesa_ahash_finup,
+	.digest = mv_cesa_ahmac_sha1_digest,
+	.setkey = mv_cesa_ahmac_sha1_setkey,
+	.export = mv_cesa_sha1_export,
+	.import = mv_cesa_sha1_import,
+	.halg = {
+		.digestsize = SHA1_DIGEST_SIZE,
+		.statesize = sizeof(struct sha1_state),
+		.base = {
+			.cra_name = "hmac(sha1)",
+			.cra_driver_name = "mv-hmac-sha1",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = SHA1_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx),
+			.cra_init = mv_cesa_ahmac_cra_init,
+			.cra_module = THIS_MODULE,
+		 }
+	}
+};
+
+static int mv_cesa_ahmac_sha256_setkey(struct crypto_ahash *tfm, const u8 *key,
+				       unsigned int keylen)
+{
+	struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct sha256_state istate, ostate;
+	int ret, i;
+
+	ret = mv_cesa_ahmac_setkey("mv-sha256", key, keylen, &istate, &ostate);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(istate.state); i++)
+		ctx->iv[i] = be32_to_cpu(istate.state[i]);
+
+	for (i = 0; i < ARRAY_SIZE(ostate.state); i++)
+		ctx->iv[i + 8] = be32_to_cpu(ostate.state[i]);
+
+	return 0;
+}
+
+static int mv_cesa_ahmac_sha256_init(struct ahash_request *req)
+{
+	struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct mv_cesa_op_ctx tmpl;
+
+	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_HMAC_SHA256);
+	memcpy(tmpl.ctx.hash.iv, ctx->iv, sizeof(ctx->iv));
+
+	mv_cesa_ahash_init(req, &tmpl);
+
+	return 0;
+}
+
+static int mv_cesa_ahmac_sha256_digest(struct ahash_request *req)
+{
+	int ret;
+
+	ret = mv_cesa_ahmac_sha256_init(req);
+	if (ret)
+		return ret;
+
+	return mv_cesa_ahash_finup(req);
+}
+
+struct ahash_alg mv_ahmac_sha256_alg = {
+	.init = mv_cesa_ahmac_sha256_init,
+	.update = mv_cesa_ahash_update,
+	.final = mv_cesa_ahash_final,
+	.finup = mv_cesa_ahash_finup,
+	.digest = mv_cesa_ahmac_sha256_digest,
+	.setkey = mv_cesa_ahmac_sha256_setkey,
+	.export = mv_cesa_sha256_export,
+	.import = mv_cesa_sha256_import,
+	.halg = {
+		.digestsize = SHA256_DIGEST_SIZE,
+		.statesize = sizeof(struct sha256_state),
+		.base = {
+			.cra_name = "hmac(sha256)",
+			.cra_driver_name = "mv-hmac-sha256",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = SHA256_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx),
+			.cra_init = mv_cesa_ahmac_cra_init,
+			.cra_module = THIS_MODULE,
+		 }
+	}
+};

+ 224 - 0
drivers/crypto/marvell/tdma.c

@@ -0,0 +1,224 @@
+/*
+ * Provide TDMA helper functions used by cipher and hash algorithm
+ * implementations.
+ *
+ * Author: Boris Brezillon <boris.brezillon@free-electrons.com>
+ * Author: Arnaud Ebalard <arno@natisbad.org>
+ *
+ * This work is based on an initial version written by
+ * Sebastian Andrzej Siewior < sebastian at breakpoint dot cc >
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include "cesa.h"
+
+bool mv_cesa_req_dma_iter_next_transfer(struct mv_cesa_dma_iter *iter,
+					struct mv_cesa_sg_dma_iter *sgiter,
+					unsigned int len)
+{
+	if (!sgiter->sg)
+		return false;
+
+	sgiter->op_offset += len;
+	sgiter->offset += len;
+	if (sgiter->offset == sg_dma_len(sgiter->sg)) {
+		if (sg_is_last(sgiter->sg))
+			return false;
+		sgiter->offset = 0;
+		sgiter->sg = sg_next(sgiter->sg);
+	}
+
+	if (sgiter->op_offset == iter->op_len)
+		return false;
+
+	return true;
+}
+
+void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq)
+{
+	struct mv_cesa_engine *engine = dreq->base.engine;
+
+	writel(0, engine->regs + CESA_SA_CFG);
+
+	mv_cesa_set_int_mask(engine, CESA_SA_INT_ACC0_IDMA_DONE);
+	writel(CESA_TDMA_DST_BURST_128B | CESA_TDMA_SRC_BURST_128B |
+	       CESA_TDMA_NO_BYTE_SWAP | CESA_TDMA_EN,
+	       engine->regs + CESA_TDMA_CONTROL);
+
+	writel(CESA_SA_CFG_ACT_CH0_IDMA | CESA_SA_CFG_MULTI_PKT |
+	       CESA_SA_CFG_CH0_W_IDMA | CESA_SA_CFG_PARA_DIS,
+	       engine->regs + CESA_SA_CFG);
+	writel(dreq->chain.first->cur_dma,
+	       engine->regs + CESA_TDMA_NEXT_ADDR);
+	writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD);
+}
+
+void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq)
+{
+	struct mv_cesa_tdma_desc *tdma;
+
+	for (tdma = dreq->chain.first; tdma;) {
+		struct mv_cesa_tdma_desc *old_tdma = tdma;
+
+		if (tdma->flags & CESA_TDMA_OP)
+			dma_pool_free(cesa_dev->dma->op_pool, tdma->op,
+				      le32_to_cpu(tdma->src));
+
+		tdma = tdma->next;
+		dma_pool_free(cesa_dev->dma->tdma_desc_pool, old_tdma,
+			      le32_to_cpu(old_tdma->cur_dma));
+	}
+
+	dreq->chain.first = NULL;
+	dreq->chain.last = NULL;
+}
+
+void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq,
+			 struct mv_cesa_engine *engine)
+{
+	struct mv_cesa_tdma_desc *tdma;
+
+	for (tdma = dreq->chain.first; tdma; tdma = tdma->next) {
+		if (tdma->flags & CESA_TDMA_DST_IN_SRAM)
+			tdma->dst = cpu_to_le32(tdma->dst + engine->sram_dma);
+
+		if (tdma->flags & CESA_TDMA_SRC_IN_SRAM)
+			tdma->src = cpu_to_le32(tdma->src + engine->sram_dma);
+
+		if (tdma->flags & CESA_TDMA_OP)
+			mv_cesa_adjust_op(engine, tdma->op);
+	}
+}
+
+static struct mv_cesa_tdma_desc *
+mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags)
+{
+	struct mv_cesa_tdma_desc *new_tdma = NULL;
+	dma_addr_t dma_handle;
+
+	new_tdma = dma_pool_alloc(cesa_dev->dma->tdma_desc_pool, flags,
+				  &dma_handle);
+	if (!new_tdma)
+		return ERR_PTR(-ENOMEM);
+
+	memset(new_tdma, 0, sizeof(*new_tdma));
+	new_tdma->cur_dma = cpu_to_le32(dma_handle);
+	if (chain->last) {
+		chain->last->next_dma = new_tdma->cur_dma;
+		chain->last->next = new_tdma;
+	} else {
+		chain->first = new_tdma;
+	}
+
+	chain->last = new_tdma;
+
+	return new_tdma;
+}
+
+struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
+					const struct mv_cesa_op_ctx *op_templ,
+					bool skip_ctx,
+					gfp_t flags)
+{
+	struct mv_cesa_tdma_desc *tdma;
+	struct mv_cesa_op_ctx *op;
+	dma_addr_t dma_handle;
+
+	tdma = mv_cesa_dma_add_desc(chain, flags);
+	if (IS_ERR(tdma))
+		return ERR_CAST(tdma);
+
+	op = dma_pool_alloc(cesa_dev->dma->op_pool, flags, &dma_handle);
+	if (!op)
+		return ERR_PTR(-ENOMEM);
+
+	*op = *op_templ;
+
+	tdma = chain->last;
+	tdma->op = op;
+	tdma->byte_cnt = (skip_ctx ? sizeof(op->desc) : sizeof(*op)) | BIT(31);
+	tdma->src = dma_handle;
+	tdma->flags = CESA_TDMA_DST_IN_SRAM | CESA_TDMA_OP;
+
+	return op;
+}
+
+int mv_cesa_dma_add_data_transfer(struct mv_cesa_tdma_chain *chain,
+				  dma_addr_t dst, dma_addr_t src, u32 size,
+				  u32 flags, gfp_t gfp_flags)
+{
+	struct mv_cesa_tdma_desc *tdma;
+
+	tdma = mv_cesa_dma_add_desc(chain, gfp_flags);
+	if (IS_ERR(tdma))
+		return PTR_ERR(tdma);
+
+	tdma->byte_cnt = size | BIT(31);
+	tdma->src = src;
+	tdma->dst = dst;
+
+	flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM);
+	tdma->flags = flags | CESA_TDMA_DATA;
+
+	return 0;
+}
+
+int mv_cesa_dma_add_dummy_launch(struct mv_cesa_tdma_chain *chain,
+				 u32 flags)
+{
+	struct mv_cesa_tdma_desc *tdma;
+
+	tdma = mv_cesa_dma_add_desc(chain, flags);
+	if (IS_ERR(tdma))
+		return PTR_ERR(tdma);
+
+	return 0;
+}
+
+int mv_cesa_dma_add_dummy_end(struct mv_cesa_tdma_chain *chain, u32 flags)
+{
+	struct mv_cesa_tdma_desc *tdma;
+
+	tdma = mv_cesa_dma_add_desc(chain, flags);
+	if (IS_ERR(tdma))
+		return PTR_ERR(tdma);
+
+	tdma->byte_cnt = BIT(31);
+
+	return 0;
+}
+
+int mv_cesa_dma_add_op_transfers(struct mv_cesa_tdma_chain *chain,
+				 struct mv_cesa_dma_iter *dma_iter,
+				 struct mv_cesa_sg_dma_iter *sgiter,
+				 gfp_t gfp_flags)
+{
+	u32 flags = sgiter->dir == DMA_TO_DEVICE ?
+		    CESA_TDMA_DST_IN_SRAM : CESA_TDMA_SRC_IN_SRAM;
+	unsigned int len;
+
+	do {
+		dma_addr_t dst, src;
+		int ret;
+
+		len = mv_cesa_req_dma_iter_transfer_len(dma_iter, sgiter);
+		if (sgiter->dir == DMA_TO_DEVICE) {
+			dst = CESA_SA_DATA_SRAM_OFFSET + sgiter->op_offset;
+			src = sg_dma_address(sgiter->sg) + sgiter->offset;
+		} else {
+			dst = sg_dma_address(sgiter->sg) + sgiter->offset;
+			src = CESA_SA_DATA_SRAM_OFFSET + sgiter->op_offset;
+		}
+
+		ret = mv_cesa_dma_add_data_transfer(chain, dst, src, len,
+						    flags, gfp_flags);
+		if (ret)
+			return ret;
+
+	} while (mv_cesa_req_dma_iter_next_transfer(dma_iter, sgiter, len));
+
+	return 0;
+}

+ 50 - 23
drivers/crypto/mv_cesa.c

@@ -9,6 +9,7 @@
 #include <crypto/aes.h>
 #include <crypto/algapi.h>
 #include <linux/crypto.h>
+#include <linux/genalloc.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kthread.h>
@@ -29,6 +30,8 @@
 #define MAX_HW_HASH_SIZE	0xFFFF
 #define MV_CESA_EXPIRE		500 /* msec */
 
+#define MV_CESA_DEFAULT_SRAM_SIZE	2048
+
 /*
  * STM:
  *   /---------------------------------------\
@@ -83,6 +86,8 @@ struct req_progress {
 struct crypto_priv {
 	void __iomem *reg;
 	void __iomem *sram;
+	struct gen_pool *sram_pool;
+	dma_addr_t sram_dma;
 	int irq;
 	struct clk *clk;
 	struct task_struct *queue_th;
@@ -595,7 +600,7 @@ static int queue_manag(void *data)
 	cpg->eng_st = ENGINE_IDLE;
 	do {
 		struct crypto_async_request *async_req = NULL;
-		struct crypto_async_request *backlog;
+		struct crypto_async_request *backlog = NULL;
 
 		__set_current_state(TASK_INTERRUPTIBLE);
 
@@ -1019,6 +1024,39 @@ static struct ahash_alg mv_hmac_sha1_alg = {
 		 }
 };
 
+static int mv_cesa_get_sram(struct platform_device *pdev,
+			    struct crypto_priv *cp)
+{
+	struct resource *res;
+	u32 sram_size = MV_CESA_DEFAULT_SRAM_SIZE;
+
+	of_property_read_u32(pdev->dev.of_node, "marvell,crypto-sram-size",
+			     &sram_size);
+
+	cp->sram_size = sram_size;
+	cp->sram_pool = of_get_named_gen_pool(pdev->dev.of_node,
+					      "marvell,crypto-srams", 0);
+	if (cp->sram_pool) {
+		cp->sram = gen_pool_dma_alloc(cp->sram_pool, sram_size,
+					      &cp->sram_dma);
+		if (cp->sram)
+			return 0;
+
+		return -ENOMEM;
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "sram");
+	if (!res || resource_size(res) < cp->sram_size)
+		return -EINVAL;
+
+	cp->sram = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(cp->sram))
+		return PTR_ERR(cp->sram);
+
+	return 0;
+}
+
 static int mv_probe(struct platform_device *pdev)
 {
 	struct crypto_priv *cp;
@@ -1041,24 +1079,17 @@ static int mv_probe(struct platform_device *pdev)
 
 	spin_lock_init(&cp->lock);
 	crypto_init_queue(&cp->queue, 50);
-	cp->reg = ioremap(res->start, resource_size(res));
-	if (!cp->reg) {
-		ret = -ENOMEM;
+	cp->reg = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(cp->reg)) {
+		ret = PTR_ERR(cp->reg);
 		goto err;
 	}
 
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sram");
-	if (!res) {
-		ret = -ENXIO;
-		goto err_unmap_reg;
-	}
-	cp->sram_size = resource_size(res);
+	ret = mv_cesa_get_sram(pdev, cp);
+	if (ret)
+		goto err;
+
 	cp->max_req_size = cp->sram_size - SRAM_CFG_SPACE;
-	cp->sram = ioremap(res->start, cp->sram_size);
-	if (!cp->sram) {
-		ret = -ENOMEM;
-		goto err_unmap_reg;
-	}
 
 	if (pdev->dev.of_node)
 		irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
@@ -1066,7 +1097,7 @@ static int mv_probe(struct platform_device *pdev)
 		irq = platform_get_irq(pdev, 0);
 	if (irq < 0 || irq == NO_IRQ) {
 		ret = irq;
-		goto err_unmap_sram;
+		goto err;
 	}
 	cp->irq = irq;
 
@@ -1076,7 +1107,7 @@ static int mv_probe(struct platform_device *pdev)
 	cp->queue_th = kthread_run(queue_manag, cp, "mv_crypto");
 	if (IS_ERR(cp->queue_th)) {
 		ret = PTR_ERR(cp->queue_th);
-		goto err_unmap_sram;
+		goto err;
 	}
 
 	ret = request_irq(irq, crypto_int, 0, dev_name(&pdev->dev),
@@ -1134,10 +1165,6 @@ err_irq:
 	}
 err_thread:
 	kthread_stop(cp->queue_th);
-err_unmap_sram:
-	iounmap(cp->sram);
-err_unmap_reg:
-	iounmap(cp->reg);
 err:
 	kfree(cp);
 	cpg = NULL;
@@ -1157,8 +1184,6 @@ static int mv_remove(struct platform_device *pdev)
 	kthread_stop(cp->queue_th);
 	free_irq(cp->irq, cp);
 	memset(cp->sram, 0, cp->sram_size);
-	iounmap(cp->sram);
-	iounmap(cp->reg);
 
 	if (!IS_ERR(cp->clk)) {
 		clk_disable_unprepare(cp->clk);
@@ -1172,6 +1197,8 @@ static int mv_remove(struct platform_device *pdev)
 
 static const struct of_device_id mv_cesa_of_match_table[] = {
 	{ .compatible = "marvell,orion-crypto", },
+	{ .compatible = "marvell,kirkwood-crypto", },
+	{ .compatible = "marvell,dove-crypto", },
 	{}
 };
 MODULE_DEVICE_TABLE(of, mv_cesa_of_match_table);

+ 4 - 4
drivers/crypto/n2_core.c

@@ -1281,10 +1281,10 @@ static const char md5_zero[MD5_DIGEST_SIZE] = {
 	0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
 };
 static const u32 md5_init[MD5_HASH_WORDS] = {
-	cpu_to_le32(0x67452301),
-	cpu_to_le32(0xefcdab89),
-	cpu_to_le32(0x98badcfe),
-	cpu_to_le32(0x10325476),
+	cpu_to_le32(MD5_H0),
+	cpu_to_le32(MD5_H1),
+	cpu_to_le32(MD5_H2),
+	cpu_to_le32(MD5_H3),
 };
 static const char sha1_zero[SHA1_DIGEST_SIZE] = {
 	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32,

+ 45 - 16
drivers/crypto/nx/Kconfig

@@ -1,26 +1,55 @@
+
 config CRYPTO_DEV_NX_ENCRYPT
-	tristate "Encryption acceleration support"
-	depends on PPC64 && IBMVIO
+	tristate "Encryption acceleration support on pSeries platform"
+	depends on PPC_PSERIES && IBMVIO && !CPU_LITTLE_ENDIAN
 	default y
 	select CRYPTO_AES
-	select CRYPTO_CBC
-	select CRYPTO_ECB
 	select CRYPTO_CCM
-	select CRYPTO_GCM
-	select CRYPTO_AUTHENC
-	select CRYPTO_XCBC
-	select CRYPTO_SHA256
-	select CRYPTO_SHA512
 	help
-	  Support for Power7+ in-Nest encryption acceleration. This
-	  module supports acceleration for AES and SHA2 algorithms. If you
-	  choose 'M' here, this module will be called nx_crypto.
+	  Support for PowerPC Nest (NX) encryption acceleration. This
+	  module supports acceleration for AES and SHA2 algorithms on
+	  the pSeries platform.  If you choose 'M' here, this module
+	  will be called nx_crypto.
 
 config CRYPTO_DEV_NX_COMPRESS
 	tristate "Compression acceleration support"
-	depends on PPC64 && IBMVIO
 	default y
 	help
-	  Support for Power7+ in-Nest compression acceleration. This
-	  module supports acceleration for AES and SHA2 algorithms. If you
-	  choose 'M' here, this module will be called nx_compress.
+	  Support for PowerPC Nest (NX) compression acceleration. This
+	  module supports acceleration for compressing memory with the 842
+	  algorithm.  One of the platform drivers must be selected also.
+	  If you choose 'M' here, this module will be called nx_compress.
+
+if CRYPTO_DEV_NX_COMPRESS
+
+config CRYPTO_DEV_NX_COMPRESS_PSERIES
+	tristate "Compression acceleration support on pSeries platform"
+	depends on PPC_PSERIES && IBMVIO
+	default y
+	help
+	  Support for PowerPC Nest (NX) compression acceleration. This
+	  module supports acceleration for compressing memory with the 842
+	  algorithm.  This supports NX hardware on the pSeries platform.
+	  If you choose 'M' here, this module will be called nx_compress_pseries.
+
+config CRYPTO_DEV_NX_COMPRESS_POWERNV
+	tristate "Compression acceleration support on PowerNV platform"
+	depends on PPC_POWERNV
+	default y
+	help
+	  Support for PowerPC Nest (NX) compression acceleration. This
+	  module supports acceleration for compressing memory with the 842
+	  algorithm.  This supports NX hardware on the PowerNV platform.
+	  If you choose 'M' here, this module will be called nx_compress_powernv.
+
+config CRYPTO_DEV_NX_COMPRESS_CRYPTO
+	tristate "Compression acceleration cryptographic interface"
+	select CRYPTO_ALGAPI
+	select 842_DECOMPRESS
+	default y
+	help
+	  Support for PowerPC Nest (NX) accelerators using the cryptographic
+	  API.  If you choose 'M' here, this module will be called
+	  nx_compress_crypto.
+
+endif

+ 8 - 1
drivers/crypto/nx/Makefile

@@ -10,5 +10,12 @@ nx-crypto-objs := nx.o \
 		  nx-sha256.o \
 		  nx-sha512.o
 
-obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o
+obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o nx-compress-platform.o
+obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o
+obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o
+obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_CRYPTO) += nx-compress-crypto.o
 nx-compress-objs := nx-842.o
+nx-compress-platform-objs := nx-842-platform.o
+nx-compress-pseries-objs := nx-842-pseries.o
+nx-compress-powernv-objs := nx-842-powernv.o
+nx-compress-crypto-objs := nx-842-crypto.o

+ 580 - 0
drivers/crypto/nx/nx-842-crypto.c

@@ -0,0 +1,580 @@
+/*
+ * Cryptographic API for the NX-842 hardware compression.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) IBM Corporation, 2011-2015
+ *
+ * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
+ *                   Seth Jennings <sjenning@linux.vnet.ibm.com>
+ *
+ * Rewrite: Dan Streetman <ddstreet@ieee.org>
+ *
+ * This is an interface to the NX-842 compression hardware in PowerPC
+ * processors.  Most of the complexity of this drvier is due to the fact that
+ * the NX-842 compression hardware requires the input and output data buffers
+ * to be specifically aligned, to be a specific multiple in length, and within
+ * specific minimum and maximum lengths.  Those restrictions, provided by the
+ * nx-842 driver via nx842_constraints, mean this driver must use bounce
+ * buffers and headers to correct misaligned in or out buffers, and to split
+ * input buffers that are too large.
+ *
+ * This driver will fall back to software decompression if the hardware
+ * decompression fails, so this driver's decompression should never fail as
+ * long as the provided compressed buffer is valid.  Any compressed buffer
+ * created by this driver will have a header (except ones where the input
+ * perfectly matches the constraints); so users of this driver cannot simply
+ * pass a compressed buffer created by this driver over to the 842 software
+ * decompression library.  Instead, users must use this driver to decompress;
+ * if the hardware fails or is unavailable, the compressed buffer will be
+ * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
+ * software decompression library.
+ *
+ * This does not fall back to software compression, however, since the caller
+ * of this function is specifically requesting hardware compression; if the
+ * hardware compression fails, the caller can fall back to software
+ * compression, and the raw 842 compressed buffer that the software compressor
+ * creates can be passed to this driver for hardware decompression; any
+ * buffer without our specific header magic is assumed to be a raw 842 buffer
+ * and passed directly to the hardware.  Note that the software compression
+ * library will produce a compressed buffer that is incompatible with the
+ * hardware decompressor if the original input buffer length is not a multiple
+ * of 8; if such a compressed buffer is passed to this driver for
+ * decompression, the hardware will reject it and this driver will then pass
+ * it over to the software library for decompression.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/vmalloc.h>
+#include <linux/sw842.h>
+#include <linux/ratelimit.h>
+
+#include "nx-842.h"
+
+/* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
+ * template (see lib/842/842.h), so this magic number will never appear at
+ * the start of a raw 842 compressed buffer.  That is important, as any buffer
+ * passed to us without this magic is assumed to be a raw 842 compressed
+ * buffer, and passed directly to the hardware to decompress.
+ */
+#define NX842_CRYPTO_MAGIC	(0xf842)
+#define NX842_CRYPTO_GROUP_MAX	(0x20)
+#define NX842_CRYPTO_HEADER_SIZE(g)				\
+	(sizeof(struct nx842_crypto_header) +			\
+	 sizeof(struct nx842_crypto_header_group) * (g))
+#define NX842_CRYPTO_HEADER_MAX_SIZE				\
+	NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
+
+/* bounce buffer size */
+#define BOUNCE_BUFFER_ORDER	(2)
+#define BOUNCE_BUFFER_SIZE					\
+	((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
+
+/* try longer on comp because we can fallback to sw decomp if hw is busy */
+#define COMP_BUSY_TIMEOUT	(250) /* ms */
+#define DECOMP_BUSY_TIMEOUT	(50) /* ms */
+
+struct nx842_crypto_header_group {
+	__be16 padding;			/* unused bytes at start of group */
+	__be32 compressed_length;	/* compressed bytes in group */
+	__be32 uncompressed_length;	/* bytes after decompression */
+} __packed;
+
+struct nx842_crypto_header {
+	__be16 magic;		/* NX842_CRYPTO_MAGIC */
+	__be16 ignore;		/* decompressed end bytes to ignore */
+	u8 groups;		/* total groups in this header */
+	struct nx842_crypto_header_group group[];
+} __packed;
+
+struct nx842_crypto_param {
+	u8 *in;
+	unsigned int iremain;
+	u8 *out;
+	unsigned int oremain;
+	unsigned int ototal;
+};
+
+static int update_param(struct nx842_crypto_param *p,
+			unsigned int slen, unsigned int dlen)
+{
+	if (p->iremain < slen)
+		return -EOVERFLOW;
+	if (p->oremain < dlen)
+		return -ENOSPC;
+
+	p->in += slen;
+	p->iremain -= slen;
+	p->out += dlen;
+	p->oremain -= dlen;
+	p->ototal += dlen;
+
+	return 0;
+}
+
+struct nx842_crypto_ctx {
+	u8 *wmem;
+	u8 *sbounce, *dbounce;
+
+	struct nx842_crypto_header header;
+	struct nx842_crypto_header_group group[NX842_CRYPTO_GROUP_MAX];
+};
+
+static int nx842_crypto_init(struct crypto_tfm *tfm)
+{
+	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->wmem = kmalloc(nx842_workmem_size(), GFP_KERNEL);
+	ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
+	ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
+	if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
+		kfree(ctx->wmem);
+		free_page((unsigned long)ctx->sbounce);
+		free_page((unsigned long)ctx->dbounce);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void nx842_crypto_exit(struct crypto_tfm *tfm)
+{
+	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	kfree(ctx->wmem);
+	free_page((unsigned long)ctx->sbounce);
+	free_page((unsigned long)ctx->dbounce);
+}
+
+static int read_constraints(struct nx842_constraints *c)
+{
+	int ret;
+
+	ret = nx842_constraints(c);
+	if (ret) {
+		pr_err_ratelimited("could not get nx842 constraints : %d\n",
+				   ret);
+		return ret;
+	}
+
+	/* limit maximum, to always have enough bounce buffer to decompress */
+	if (c->maximum > BOUNCE_BUFFER_SIZE) {
+		c->maximum = BOUNCE_BUFFER_SIZE;
+		pr_info_once("limiting nx842 maximum to %x\n", c->maximum);
+	}
+
+	return 0;
+}
+
+static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
+{
+	int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
+
+	/* compress should have added space for header */
+	if (s > be16_to_cpu(hdr->group[0].padding)) {
+		pr_err("Internal error: no space for header\n");
+		return -EINVAL;
+	}
+
+	memcpy(buf, hdr, s);
+
+	print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
+
+	return 0;
+}
+
+static int compress(struct nx842_crypto_ctx *ctx,
+		    struct nx842_crypto_param *p,
+		    struct nx842_crypto_header_group *g,
+		    struct nx842_constraints *c,
+		    u16 *ignore,
+		    unsigned int hdrsize)
+{
+	unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
+	unsigned int adj_slen = slen;
+	u8 *src = p->in, *dst = p->out;
+	int ret, dskip = 0;
+	ktime_t timeout;
+
+	if (p->iremain == 0)
+		return -EOVERFLOW;
+
+	if (p->oremain == 0 || hdrsize + c->minimum > dlen)
+		return -ENOSPC;
+
+	if (slen % c->multiple)
+		adj_slen = round_up(slen, c->multiple);
+	if (slen < c->minimum)
+		adj_slen = c->minimum;
+	if (slen > c->maximum)
+		adj_slen = slen = c->maximum;
+	if (adj_slen > slen || (u64)src % c->alignment) {
+		adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
+		slen = min(slen, BOUNCE_BUFFER_SIZE);
+		if (adj_slen > slen)
+			memset(ctx->sbounce + slen, 0, adj_slen - slen);
+		memcpy(ctx->sbounce, src, slen);
+		src = ctx->sbounce;
+		slen = adj_slen;
+		pr_debug("using comp sbounce buffer, len %x\n", slen);
+	}
+
+	dst += hdrsize;
+	dlen -= hdrsize;
+
+	if ((u64)dst % c->alignment) {
+		dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
+		dst += dskip;
+		dlen -= dskip;
+	}
+	if (dlen % c->multiple)
+		dlen = round_down(dlen, c->multiple);
+	if (dlen < c->minimum) {
+nospc:
+		dst = ctx->dbounce;
+		dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
+		dlen = round_down(dlen, c->multiple);
+		dskip = 0;
+		pr_debug("using comp dbounce buffer, len %x\n", dlen);
+	}
+	if (dlen > c->maximum)
+		dlen = c->maximum;
+
+	tmplen = dlen;
+	timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
+	do {
+		dlen = tmplen; /* reset dlen, if we're retrying */
+		ret = nx842_compress(src, slen, dst, &dlen, ctx->wmem);
+		/* possibly we should reduce the slen here, instead of
+		 * retrying with the dbounce buffer?
+		 */
+		if (ret == -ENOSPC && dst != ctx->dbounce)
+			goto nospc;
+	} while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
+	if (ret)
+		return ret;
+
+	dskip += hdrsize;
+
+	if (dst == ctx->dbounce)
+		memcpy(p->out + dskip, dst, dlen);
+
+	g->padding = cpu_to_be16(dskip);
+	g->compressed_length = cpu_to_be32(dlen);
+	g->uncompressed_length = cpu_to_be32(slen);
+
+	if (p->iremain < slen) {
+		*ignore = slen - p->iremain;
+		slen = p->iremain;
+	}
+
+	pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
+		 slen, *ignore, dlen, dskip);
+
+	return update_param(p, slen, dskip + dlen);
+}
+
+static int nx842_crypto_compress(struct crypto_tfm *tfm,
+				 const u8 *src, unsigned int slen,
+				 u8 *dst, unsigned int *dlen)
+{
+	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct nx842_crypto_header *hdr = &ctx->header;
+	struct nx842_crypto_param p;
+	struct nx842_constraints c;
+	unsigned int groups, hdrsize, h;
+	int ret, n;
+	bool add_header;
+	u16 ignore = 0;
+
+	p.in = (u8 *)src;
+	p.iremain = slen;
+	p.out = dst;
+	p.oremain = *dlen;
+	p.ototal = 0;
+
+	*dlen = 0;
+
+	ret = read_constraints(&c);
+	if (ret)
+		return ret;
+
+	groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
+		       DIV_ROUND_UP(p.iremain, c.maximum));
+	hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
+
+	/* skip adding header if the buffers meet all constraints */
+	add_header = (p.iremain % c.multiple	||
+		      p.iremain < c.minimum	||
+		      p.iremain > c.maximum	||
+		      (u64)p.in % c.alignment	||
+		      p.oremain % c.multiple	||
+		      p.oremain < c.minimum	||
+		      p.oremain > c.maximum	||
+		      (u64)p.out % c.alignment);
+
+	hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
+	hdr->groups = 0;
+	hdr->ignore = 0;
+
+	while (p.iremain > 0) {
+		n = hdr->groups++;
+		if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
+			return -ENOSPC;
+
+		/* header goes before first group */
+		h = !n && add_header ? hdrsize : 0;
+
+		if (ignore)
+			pr_warn("interal error, ignore is set %x\n", ignore);
+
+		ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
+		if (ret)
+			return ret;
+	}
+
+	if (!add_header && hdr->groups > 1) {
+		pr_err("Internal error: No header but multiple groups\n");
+		return -EINVAL;
+	}
+
+	/* ignore indicates the input stream needed to be padded */
+	hdr->ignore = cpu_to_be16(ignore);
+	if (ignore)
+		pr_debug("marked %d bytes as ignore\n", ignore);
+
+	if (add_header)
+		ret = nx842_crypto_add_header(hdr, dst);
+	if (ret)
+		return ret;
+
+	*dlen = p.ototal;
+
+	pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
+
+	return 0;
+}
+
+static int decompress(struct nx842_crypto_ctx *ctx,
+		      struct nx842_crypto_param *p,
+		      struct nx842_crypto_header_group *g,
+		      struct nx842_constraints *c,
+		      u16 ignore,
+		      bool usehw)
+{
+	unsigned int slen = be32_to_cpu(g->compressed_length);
+	unsigned int required_len = be32_to_cpu(g->uncompressed_length);
+	unsigned int dlen = p->oremain, tmplen;
+	unsigned int adj_slen = slen;
+	u8 *src = p->in, *dst = p->out;
+	u16 padding = be16_to_cpu(g->padding);
+	int ret, spadding = 0, dpadding = 0;
+	ktime_t timeout;
+
+	if (!slen || !required_len)
+		return -EINVAL;
+
+	if (p->iremain <= 0 || padding + slen > p->iremain)
+		return -EOVERFLOW;
+
+	if (p->oremain <= 0 || required_len - ignore > p->oremain)
+		return -ENOSPC;
+
+	src += padding;
+
+	if (!usehw)
+		goto usesw;
+
+	if (slen % c->multiple)
+		adj_slen = round_up(slen, c->multiple);
+	if (slen < c->minimum)
+		adj_slen = c->minimum;
+	if (slen > c->maximum)
+		goto usesw;
+	if (slen < adj_slen || (u64)src % c->alignment) {
+		/* we can append padding bytes because the 842 format defines
+		 * an "end" template (see lib/842/842_decompress.c) and will
+		 * ignore any bytes following it.
+		 */
+		if (slen < adj_slen)
+			memset(ctx->sbounce + slen, 0, adj_slen - slen);
+		memcpy(ctx->sbounce, src, slen);
+		src = ctx->sbounce;
+		spadding = adj_slen - slen;
+		slen = adj_slen;
+		pr_debug("using decomp sbounce buffer, len %x\n", slen);
+	}
+
+	if (dlen % c->multiple)
+		dlen = round_down(dlen, c->multiple);
+	if (dlen < required_len || (u64)dst % c->alignment) {
+		dst = ctx->dbounce;
+		dlen = min(required_len, BOUNCE_BUFFER_SIZE);
+		pr_debug("using decomp dbounce buffer, len %x\n", dlen);
+	}
+	if (dlen < c->minimum)
+		goto usesw;
+	if (dlen > c->maximum)
+		dlen = c->maximum;
+
+	tmplen = dlen;
+	timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
+	do {
+		dlen = tmplen; /* reset dlen, if we're retrying */
+		ret = nx842_decompress(src, slen, dst, &dlen, ctx->wmem);
+	} while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
+	if (ret) {
+usesw:
+		/* reset everything, sw doesn't have constraints */
+		src = p->in + padding;
+		slen = be32_to_cpu(g->compressed_length);
+		spadding = 0;
+		dst = p->out;
+		dlen = p->oremain;
+		dpadding = 0;
+		if (dlen < required_len) { /* have ignore bytes */
+			dst = ctx->dbounce;
+			dlen = BOUNCE_BUFFER_SIZE;
+		}
+		pr_info_ratelimited("using software 842 decompression\n");
+		ret = sw842_decompress(src, slen, dst, &dlen);
+	}
+	if (ret)
+		return ret;
+
+	slen -= spadding;
+
+	dlen -= ignore;
+	if (ignore)
+		pr_debug("ignoring last %x bytes\n", ignore);
+
+	if (dst == ctx->dbounce)
+		memcpy(p->out, dst, dlen);
+
+	pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
+		 slen, padding, dlen, ignore);
+
+	return update_param(p, slen + padding, dlen);
+}
+
+static int nx842_crypto_decompress(struct crypto_tfm *tfm,
+				   const u8 *src, unsigned int slen,
+				   u8 *dst, unsigned int *dlen)
+{
+	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct nx842_crypto_header *hdr;
+	struct nx842_crypto_param p;
+	struct nx842_constraints c;
+	int n, ret, hdr_len;
+	u16 ignore = 0;
+	bool usehw = true;
+
+	p.in = (u8 *)src;
+	p.iremain = slen;
+	p.out = dst;
+	p.oremain = *dlen;
+	p.ototal = 0;
+
+	*dlen = 0;
+
+	if (read_constraints(&c))
+		usehw = false;
+
+	hdr = (struct nx842_crypto_header *)src;
+
+	/* If it doesn't start with our header magic number, assume it's a raw
+	 * 842 compressed buffer and pass it directly to the hardware driver
+	 */
+	if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
+		struct nx842_crypto_header_group g = {
+			.padding =		0,
+			.compressed_length =	cpu_to_be32(p.iremain),
+			.uncompressed_length =	cpu_to_be32(p.oremain),
+		};
+
+		ret = decompress(ctx, &p, &g, &c, 0, usehw);
+		if (ret)
+			return ret;
+
+		*dlen = p.ototal;
+
+		return 0;
+	}
+
+	if (!hdr->groups) {
+		pr_err("header has no groups\n");
+		return -EINVAL;
+	}
+	if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
+		pr_err("header has too many groups %x, max %x\n",
+		       hdr->groups, NX842_CRYPTO_GROUP_MAX);
+		return -EINVAL;
+	}
+
+	hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
+	if (hdr_len > slen)
+		return -EOVERFLOW;
+
+	memcpy(&ctx->header, src, hdr_len);
+	hdr = &ctx->header;
+
+	for (n = 0; n < hdr->groups; n++) {
+		/* ignore applies to last group */
+		if (n + 1 == hdr->groups)
+			ignore = be16_to_cpu(hdr->ignore);
+
+		ret = decompress(ctx, &p, &hdr->group[n], &c, ignore, usehw);
+		if (ret)
+			return ret;
+	}
+
+	*dlen = p.ototal;
+
+	pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
+
+	return 0;
+}
+
+static struct crypto_alg alg = {
+	.cra_name		= "842",
+	.cra_driver_name	= "842-nx",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
+	.cra_ctxsize		= sizeof(struct nx842_crypto_ctx),
+	.cra_module		= THIS_MODULE,
+	.cra_init		= nx842_crypto_init,
+	.cra_exit		= nx842_crypto_exit,
+	.cra_u			= { .compress = {
+	.coa_compress		= nx842_crypto_compress,
+	.coa_decompress		= nx842_crypto_decompress } }
+};
+
+static int __init nx842_crypto_mod_init(void)
+{
+	return crypto_register_alg(&alg);
+}
+module_init(nx842_crypto_mod_init);
+
+static void __exit nx842_crypto_mod_exit(void)
+{
+	crypto_unregister_alg(&alg);
+}
+module_exit(nx842_crypto_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Interface");
+MODULE_ALIAS_CRYPTO("842");
+MODULE_ALIAS_CRYPTO("842-nx");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");

+ 84 - 0
drivers/crypto/nx/nx-842-platform.c

@@ -0,0 +1,84 @@
+
+#include "nx-842.h"
+
+/* this is needed, separate from the main nx-842.c driver, because that main
+ * driver loads the platform drivers during its init(), and it expects one
+ * (or none) of the platform drivers to set this pointer to its driver.
+ * That means this pointer can't be in the main nx-842 driver, because it
+ * wouldn't be accessible until after the main driver loaded, which wouldn't
+ * be possible as it's waiting for the platform driver to load.  So place it
+ * here.
+ */
+static struct nx842_driver *driver;
+static DEFINE_SPINLOCK(driver_lock);
+
+struct nx842_driver *nx842_platform_driver(void)
+{
+	return driver;
+}
+EXPORT_SYMBOL_GPL(nx842_platform_driver);
+
+bool nx842_platform_driver_set(struct nx842_driver *_driver)
+{
+	bool ret = false;
+
+	spin_lock(&driver_lock);
+
+	if (!driver) {
+		driver = _driver;
+		ret = true;
+	} else
+		WARN(1, "can't set platform driver, already set to %s\n",
+		     driver->name);
+
+	spin_unlock(&driver_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nx842_platform_driver_set);
+
+/* only call this from the platform driver exit function */
+void nx842_platform_driver_unset(struct nx842_driver *_driver)
+{
+	spin_lock(&driver_lock);
+
+	if (driver == _driver)
+		driver = NULL;
+	else if (driver)
+		WARN(1, "can't unset platform driver %s, currently set to %s\n",
+		     _driver->name, driver->name);
+	else
+		WARN(1, "can't unset platform driver, already unset\n");
+
+	spin_unlock(&driver_lock);
+}
+EXPORT_SYMBOL_GPL(nx842_platform_driver_unset);
+
+bool nx842_platform_driver_get(void)
+{
+	bool ret = false;
+
+	spin_lock(&driver_lock);
+
+	if (driver)
+		ret = try_module_get(driver->owner);
+
+	spin_unlock(&driver_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nx842_platform_driver_get);
+
+void nx842_platform_driver_put(void)
+{
+	spin_lock(&driver_lock);
+
+	if (driver)
+		module_put(driver->owner);
+
+	spin_unlock(&driver_lock);
+}
+EXPORT_SYMBOL_GPL(nx842_platform_driver_put);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
+MODULE_DESCRIPTION("842 H/W Compression platform driver");

+ 637 - 0
drivers/crypto/nx/nx-842-powernv.c

@@ -0,0 +1,637 @@
+/*
+ * Driver for IBM PowerNV 842 compression accelerator
+ *
+ * Copyright (C) 2015 Dan Streetman, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "nx-842.h"
+
+#include <linux/timer.h>
+
+#include <asm/prom.h>
+#include <asm/icswx.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
+MODULE_DESCRIPTION("842 H/W Compression driver for IBM PowerNV processors");
+
+#define WORKMEM_ALIGN	(CRB_ALIGN)
+#define CSB_WAIT_MAX	(5000) /* ms */
+
+struct nx842_workmem {
+	/* Below fields must be properly aligned */
+	struct coprocessor_request_block crb; /* CRB_ALIGN align */
+	struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */
+	struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */
+	/* Above fields must be properly aligned */
+
+	ktime_t start;
+
+	char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
+} __packed __aligned(WORKMEM_ALIGN);
+
+struct nx842_coproc {
+	unsigned int chip_id;
+	unsigned int ct;
+	unsigned int ci;
+	struct list_head list;
+};
+
+/* no cpu hotplug on powernv, so this list never changes after init */
+static LIST_HEAD(nx842_coprocs);
+static unsigned int nx842_ct;
+
+/**
+ * setup_indirect_dde - Setup an indirect DDE
+ *
+ * The DDE is setup with the the DDE count, byte count, and address of
+ * first direct DDE in the list.
+ */
+static void setup_indirect_dde(struct data_descriptor_entry *dde,
+			       struct data_descriptor_entry *ddl,
+			       unsigned int dde_count, unsigned int byte_count)
+{
+	dde->flags = 0;
+	dde->count = dde_count;
+	dde->index = 0;
+	dde->length = cpu_to_be32(byte_count);
+	dde->address = cpu_to_be64(nx842_get_pa(ddl));
+}
+
+/**
+ * setup_direct_dde - Setup single DDE from buffer
+ *
+ * The DDE is setup with the buffer and length.  The buffer must be properly
+ * aligned.  The used length is returned.
+ * Returns:
+ *   N    Successfully set up DDE with N bytes
+ */
+static unsigned int setup_direct_dde(struct data_descriptor_entry *dde,
+				     unsigned long pa, unsigned int len)
+{
+	unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa));
+
+	dde->flags = 0;
+	dde->count = 0;
+	dde->index = 0;
+	dde->length = cpu_to_be32(l);
+	dde->address = cpu_to_be64(pa);
+
+	return l;
+}
+
+/**
+ * setup_ddl - Setup DDL from buffer
+ *
+ * Returns:
+ *   0		Successfully set up DDL
+ */
+static int setup_ddl(struct data_descriptor_entry *dde,
+		     struct data_descriptor_entry *ddl,
+		     unsigned char *buf, unsigned int len,
+		     bool in)
+{
+	unsigned long pa = nx842_get_pa(buf);
+	int i, ret, total_len = len;
+
+	if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) {
+		pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n",
+			 in ? "input" : "output", pa, DDE_BUFFER_ALIGN);
+		return -EINVAL;
+	}
+
+	/* only need to check last mult; since buffer must be
+	 * DDE_BUFFER_ALIGN aligned, and that is a multiple of
+	 * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers
+	 * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT.
+	 */
+	if (len % DDE_BUFFER_LAST_MULT) {
+		pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n",
+			 in ? "input" : "output", len, DDE_BUFFER_LAST_MULT);
+		if (in)
+			return -EINVAL;
+		len = round_down(len, DDE_BUFFER_LAST_MULT);
+	}
+
+	/* use a single direct DDE */
+	if (len <= LEN_ON_PAGE(pa)) {
+		ret = setup_direct_dde(dde, pa, len);
+		WARN_ON(ret < len);
+		return 0;
+	}
+
+	/* use the DDL */
+	for (i = 0; i < DDL_LEN_MAX && len > 0; i++) {
+		ret = setup_direct_dde(&ddl[i], pa, len);
+		buf += ret;
+		len -= ret;
+		pa = nx842_get_pa(buf);
+	}
+
+	if (len > 0) {
+		pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n",
+			 total_len, in ? "input" : "output", len);
+		if (in)
+			return -EMSGSIZE;
+		total_len -= len;
+	}
+	setup_indirect_dde(dde, ddl, i, total_len);
+
+	return 0;
+}
+
+#define CSB_ERR(csb, msg, ...)					\
+	pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n",	\
+	       ##__VA_ARGS__, (csb)->flags,			\
+	       (csb)->cs, (csb)->cc, (csb)->ce,			\
+	       be32_to_cpu((csb)->count))
+
+#define CSB_ERR_ADDR(csb, msg, ...)				\
+	CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__,		\
+		(unsigned long)be64_to_cpu((csb)->address))
+
+/**
+ * wait_for_csb
+ */
+static int wait_for_csb(struct nx842_workmem *wmem,
+			struct coprocessor_status_block *csb)
+{
+	ktime_t start = wmem->start, now = ktime_get();
+	ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX);
+
+	while (!(ACCESS_ONCE(csb->flags) & CSB_V)) {
+		cpu_relax();
+		now = ktime_get();
+		if (ktime_after(now, timeout))
+			break;
+	}
+
+	/* hw has updated csb and output buffer */
+	barrier();
+
+	/* check CSB flags */
+	if (!(csb->flags & CSB_V)) {
+		CSB_ERR(csb, "CSB still not valid after %ld us, giving up",
+			(long)ktime_us_delta(now, start));
+		return -ETIMEDOUT;
+	}
+	if (csb->flags & CSB_F) {
+		CSB_ERR(csb, "Invalid CSB format");
+		return -EPROTO;
+	}
+	if (csb->flags & CSB_CH) {
+		CSB_ERR(csb, "Invalid CSB chaining state");
+		return -EPROTO;
+	}
+
+	/* verify CSB completion sequence is 0 */
+	if (csb->cs) {
+		CSB_ERR(csb, "Invalid CSB completion sequence");
+		return -EPROTO;
+	}
+
+	/* check CSB Completion Code */
+	switch (csb->cc) {
+	/* no error */
+	case CSB_CC_SUCCESS:
+		break;
+	case CSB_CC_TPBC_GT_SPBC:
+		/* not an error, but the compressed data is
+		 * larger than the uncompressed data :(
+		 */
+		break;
+
+	/* input data errors */
+	case CSB_CC_OPERAND_OVERLAP:
+		/* input and output buffers overlap */
+		CSB_ERR(csb, "Operand Overlap error");
+		return -EINVAL;
+	case CSB_CC_INVALID_OPERAND:
+		CSB_ERR(csb, "Invalid operand");
+		return -EINVAL;
+	case CSB_CC_NOSPC:
+		/* output buffer too small */
+		return -ENOSPC;
+	case CSB_CC_ABORT:
+		CSB_ERR(csb, "Function aborted");
+		return -EINTR;
+	case CSB_CC_CRC_MISMATCH:
+		CSB_ERR(csb, "CRC mismatch");
+		return -EINVAL;
+	case CSB_CC_TEMPL_INVALID:
+		CSB_ERR(csb, "Compressed data template invalid");
+		return -EINVAL;
+	case CSB_CC_TEMPL_OVERFLOW:
+		CSB_ERR(csb, "Compressed data template shows data past end");
+		return -EINVAL;
+
+	/* these should not happen */
+	case CSB_CC_INVALID_ALIGN:
+		/* setup_ddl should have detected this */
+		CSB_ERR_ADDR(csb, "Invalid alignment");
+		return -EINVAL;
+	case CSB_CC_DATA_LENGTH:
+		/* setup_ddl should have detected this */
+		CSB_ERR(csb, "Invalid data length");
+		return -EINVAL;
+	case CSB_CC_WR_TRANSLATION:
+	case CSB_CC_TRANSLATION:
+	case CSB_CC_TRANSLATION_DUP1:
+	case CSB_CC_TRANSLATION_DUP2:
+	case CSB_CC_TRANSLATION_DUP3:
+	case CSB_CC_TRANSLATION_DUP4:
+	case CSB_CC_TRANSLATION_DUP5:
+	case CSB_CC_TRANSLATION_DUP6:
+		/* should not happen, we use physical addrs */
+		CSB_ERR_ADDR(csb, "Translation error");
+		return -EPROTO;
+	case CSB_CC_WR_PROTECTION:
+	case CSB_CC_PROTECTION:
+	case CSB_CC_PROTECTION_DUP1:
+	case CSB_CC_PROTECTION_DUP2:
+	case CSB_CC_PROTECTION_DUP3:
+	case CSB_CC_PROTECTION_DUP4:
+	case CSB_CC_PROTECTION_DUP5:
+	case CSB_CC_PROTECTION_DUP6:
+		/* should not happen, we use physical addrs */
+		CSB_ERR_ADDR(csb, "Protection error");
+		return -EPROTO;
+	case CSB_CC_PRIVILEGE:
+		/* shouldn't happen, we're in HYP mode */
+		CSB_ERR(csb, "Insufficient Privilege error");
+		return -EPROTO;
+	case CSB_CC_EXCESSIVE_DDE:
+		/* shouldn't happen, setup_ddl doesn't use many dde's */
+		CSB_ERR(csb, "Too many DDEs in DDL");
+		return -EINVAL;
+	case CSB_CC_TRANSPORT:
+		/* shouldn't happen, we setup CRB correctly */
+		CSB_ERR(csb, "Invalid CRB");
+		return -EINVAL;
+	case CSB_CC_SEGMENTED_DDL:
+		/* shouldn't happen, setup_ddl creates DDL right */
+		CSB_ERR(csb, "Segmented DDL error");
+		return -EINVAL;
+	case CSB_CC_DDE_OVERFLOW:
+		/* shouldn't happen, setup_ddl creates DDL right */
+		CSB_ERR(csb, "DDE overflow error");
+		return -EINVAL;
+	case CSB_CC_SESSION:
+		/* should not happen with ICSWX */
+		CSB_ERR(csb, "Session violation error");
+		return -EPROTO;
+	case CSB_CC_CHAIN:
+		/* should not happen, we don't use chained CRBs */
+		CSB_ERR(csb, "Chained CRB error");
+		return -EPROTO;
+	case CSB_CC_SEQUENCE:
+		/* should not happen, we don't use chained CRBs */
+		CSB_ERR(csb, "CRB seqeunce number error");
+		return -EPROTO;
+	case CSB_CC_UNKNOWN_CODE:
+		CSB_ERR(csb, "Unknown subfunction code");
+		return -EPROTO;
+
+	/* hardware errors */
+	case CSB_CC_RD_EXTERNAL:
+	case CSB_CC_RD_EXTERNAL_DUP1:
+	case CSB_CC_RD_EXTERNAL_DUP2:
+	case CSB_CC_RD_EXTERNAL_DUP3:
+		CSB_ERR_ADDR(csb, "Read error outside coprocessor");
+		return -EPROTO;
+	case CSB_CC_WR_EXTERNAL:
+		CSB_ERR_ADDR(csb, "Write error outside coprocessor");
+		return -EPROTO;
+	case CSB_CC_INTERNAL:
+		CSB_ERR(csb, "Internal error in coprocessor");
+		return -EPROTO;
+	case CSB_CC_PROVISION:
+		CSB_ERR(csb, "Storage provision error");
+		return -EPROTO;
+	case CSB_CC_HW:
+		CSB_ERR(csb, "Correctable hardware error");
+		return -EPROTO;
+
+	default:
+		CSB_ERR(csb, "Invalid CC %d", csb->cc);
+		return -EPROTO;
+	}
+
+	/* check Completion Extension state */
+	if (csb->ce & CSB_CE_TERMINATION) {
+		CSB_ERR(csb, "CSB request was terminated");
+		return -EPROTO;
+	}
+	if (csb->ce & CSB_CE_INCOMPLETE) {
+		CSB_ERR(csb, "CSB request not complete");
+		return -EPROTO;
+	}
+	if (!(csb->ce & CSB_CE_TPBC)) {
+		CSB_ERR(csb, "TPBC not provided, unknown target length");
+		return -EPROTO;
+	}
+
+	/* successful completion */
+	pr_debug_ratelimited("Processed %u bytes in %lu us\n", csb->count,
+			     (unsigned long)ktime_us_delta(now, start));
+
+	return 0;
+}
+
+/**
+ * nx842_powernv_function - compress/decompress data using the 842 algorithm
+ *
+ * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
+ * This compresses or decompresses the provided input buffer into the provided
+ * output buffer.
+ *
+ * Upon return from this function @outlen contains the length of the
+ * output data.  If there is an error then @outlen will be 0 and an
+ * error will be specified by the return code from this function.
+ *
+ * The @workmem buffer should only be used by one function call at a time.
+ *
+ * @in: input buffer pointer
+ * @inlen: input buffer size
+ * @out: output buffer pointer
+ * @outlenp: output buffer size pointer
+ * @workmem: working memory buffer pointer, size determined by
+ *           nx842_powernv_driver.workmem_size
+ * @fc: function code, see CCW Function Codes in nx-842.h
+ *
+ * Returns:
+ *   0		Success, output of length @outlenp stored in the buffer at @out
+ *   -ENODEV	Hardware unavailable
+ *   -ENOSPC	Output buffer is to small
+ *   -EMSGSIZE	Input buffer too large
+ *   -EINVAL	buffer constraints do not fix nx842_constraints
+ *   -EPROTO	hardware error during operation
+ *   -ETIMEDOUT	hardware did not complete operation in reasonable time
+ *   -EINTR	operation was aborted
+ */
+static int nx842_powernv_function(const unsigned char *in, unsigned int inlen,
+				  unsigned char *out, unsigned int *outlenp,
+				  void *workmem, int fc)
+{
+	struct coprocessor_request_block *crb;
+	struct coprocessor_status_block *csb;
+	struct nx842_workmem *wmem;
+	int ret;
+	u64 csb_addr;
+	u32 ccw;
+	unsigned int outlen = *outlenp;
+
+	wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
+
+	*outlenp = 0;
+
+	/* shoudn't happen, we don't load without a coproc */
+	if (!nx842_ct) {
+		pr_err_ratelimited("coprocessor CT is 0");
+		return -ENODEV;
+	}
+
+	crb = &wmem->crb;
+	csb = &crb->csb;
+
+	/* Clear any previous values */
+	memset(crb, 0, sizeof(*crb));
+
+	/* set up DDLs */
+	ret = setup_ddl(&crb->source, wmem->ddl_in,
+			(unsigned char *)in, inlen, true);
+	if (ret)
+		return ret;
+	ret = setup_ddl(&crb->target, wmem->ddl_out,
+			out, outlen, false);
+	if (ret)
+		return ret;
+
+	/* set up CCW */
+	ccw = 0;
+	ccw = SET_FIELD(ccw, CCW_CT, nx842_ct);
+	ccw = SET_FIELD(ccw, CCW_CI_842, 0); /* use 0 for hw auto-selection */
+	ccw = SET_FIELD(ccw, CCW_FC_842, fc);
+
+	/* set up CRB's CSB addr */
+	csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS;
+	csb_addr |= CRB_CSB_AT; /* Addrs are phys */
+	crb->csb_addr = cpu_to_be64(csb_addr);
+
+	wmem->start = ktime_get();
+
+	/* do ICSWX */
+	ret = icswx(cpu_to_be32(ccw), crb);
+
+	pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret,
+			     (unsigned int)ccw,
+			     (unsigned int)be32_to_cpu(crb->ccw));
+
+	switch (ret) {
+	case ICSWX_INITIATED:
+		ret = wait_for_csb(wmem, csb);
+		break;
+	case ICSWX_BUSY:
+		pr_debug_ratelimited("842 Coprocessor busy\n");
+		ret = -EBUSY;
+		break;
+	case ICSWX_REJECTED:
+		pr_err_ratelimited("ICSWX rejected\n");
+		ret = -EPROTO;
+		break;
+	default:
+		pr_err_ratelimited("Invalid ICSWX return code %x\n", ret);
+		ret = -EPROTO;
+		break;
+	}
+
+	if (!ret)
+		*outlenp = be32_to_cpu(csb->count);
+
+	return ret;
+}
+
+/**
+ * nx842_powernv_compress - Compress data using the 842 algorithm
+ *
+ * Compression provided by the NX842 coprocessor on IBM PowerNV systems.
+ * The input buffer is compressed and the result is stored in the
+ * provided output buffer.
+ *
+ * Upon return from this function @outlen contains the length of the
+ * compressed data.  If there is an error then @outlen will be 0 and an
+ * error will be specified by the return code from this function.
+ *
+ * @in: input buffer pointer
+ * @inlen: input buffer size
+ * @out: output buffer pointer
+ * @outlenp: output buffer size pointer
+ * @workmem: working memory buffer pointer, size determined by
+ *           nx842_powernv_driver.workmem_size
+ *
+ * Returns: see @nx842_powernv_function()
+ */
+static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen,
+				  unsigned char *out, unsigned int *outlenp,
+				  void *wmem)
+{
+	return nx842_powernv_function(in, inlen, out, outlenp,
+				      wmem, CCW_FC_842_COMP_NOCRC);
+}
+
+/**
+ * nx842_powernv_decompress - Decompress data using the 842 algorithm
+ *
+ * Decompression provided by the NX842 coprocessor on IBM PowerNV systems.
+ * The input buffer is decompressed and the result is stored in the
+ * provided output buffer.
+ *
+ * Upon return from this function @outlen contains the length of the
+ * decompressed data.  If there is an error then @outlen will be 0 and an
+ * error will be specified by the return code from this function.
+ *
+ * @in: input buffer pointer
+ * @inlen: input buffer size
+ * @out: output buffer pointer
+ * @outlenp: output buffer size pointer
+ * @workmem: working memory buffer pointer, size determined by
+ *           nx842_powernv_driver.workmem_size
+ *
+ * Returns: see @nx842_powernv_function()
+ */
+static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen,
+				    unsigned char *out, unsigned int *outlenp,
+				    void *wmem)
+{
+	return nx842_powernv_function(in, inlen, out, outlenp,
+				      wmem, CCW_FC_842_DECOMP_NOCRC);
+}
+
+static int __init nx842_powernv_probe(struct device_node *dn)
+{
+	struct nx842_coproc *coproc;
+	struct property *ct_prop, *ci_prop;
+	unsigned int ct, ci;
+	int chip_id;
+
+	chip_id = of_get_ibm_chip_id(dn);
+	if (chip_id < 0) {
+		pr_err("ibm,chip-id missing\n");
+		return -EINVAL;
+	}
+	ct_prop = of_find_property(dn, "ibm,842-coprocessor-type", NULL);
+	if (!ct_prop) {
+		pr_err("ibm,842-coprocessor-type missing\n");
+		return -EINVAL;
+	}
+	ct = be32_to_cpu(*(unsigned int *)ct_prop->value);
+	ci_prop = of_find_property(dn, "ibm,842-coprocessor-instance", NULL);
+	if (!ci_prop) {
+		pr_err("ibm,842-coprocessor-instance missing\n");
+		return -EINVAL;
+	}
+	ci = be32_to_cpu(*(unsigned int *)ci_prop->value);
+
+	coproc = kmalloc(sizeof(*coproc), GFP_KERNEL);
+	if (!coproc)
+		return -ENOMEM;
+
+	coproc->chip_id = chip_id;
+	coproc->ct = ct;
+	coproc->ci = ci;
+	INIT_LIST_HEAD(&coproc->list);
+	list_add(&coproc->list, &nx842_coprocs);
+
+	pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci);
+
+	if (!nx842_ct)
+		nx842_ct = ct;
+	else if (nx842_ct != ct)
+		pr_err("NX842 chip %d, CT %d != first found CT %d\n",
+		       chip_id, ct, nx842_ct);
+
+	return 0;
+}
+
+static struct nx842_constraints nx842_powernv_constraints = {
+	.alignment =	DDE_BUFFER_ALIGN,
+	.multiple =	DDE_BUFFER_LAST_MULT,
+	.minimum =	DDE_BUFFER_LAST_MULT,
+	.maximum =	(DDL_LEN_MAX - 1) * PAGE_SIZE,
+};
+
+static struct nx842_driver nx842_powernv_driver = {
+	.name =		KBUILD_MODNAME,
+	.owner =	THIS_MODULE,
+	.workmem_size =	sizeof(struct nx842_workmem),
+	.constraints =	&nx842_powernv_constraints,
+	.compress =	nx842_powernv_compress,
+	.decompress =	nx842_powernv_decompress,
+};
+
+static __init int nx842_powernv_init(void)
+{
+	struct device_node *dn;
+
+	/* verify workmem size/align restrictions */
+	BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN);
+	BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN);
+	BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN);
+	/* verify buffer size/align restrictions */
+	BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN);
+	BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT);
+	BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT);
+
+	pr_info("loading\n");
+
+	for_each_compatible_node(dn, NULL, "ibm,power-nx")
+		nx842_powernv_probe(dn);
+
+	if (!nx842_ct) {
+		pr_err("no coprocessors found\n");
+		return -ENODEV;
+	}
+
+	if (!nx842_platform_driver_set(&nx842_powernv_driver)) {
+		struct nx842_coproc *coproc, *n;
+
+		list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) {
+			list_del(&coproc->list);
+			kfree(coproc);
+		}
+
+		return -EEXIST;
+	}
+
+	pr_info("loaded\n");
+
+	return 0;
+}
+module_init(nx842_powernv_init);
+
+static void __exit nx842_powernv_exit(void)
+{
+	struct nx842_coproc *coproc, *n;
+
+	nx842_platform_driver_unset(&nx842_powernv_driver);
+
+	list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) {
+		list_del(&coproc->list);
+		kfree(coproc);
+	}
+
+	pr_info("unloaded\n");
+}
+module_exit(nx842_powernv_exit);

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است