浏览代码

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "API:
   - Enforce the setting of keys for keyed aead/hash/skcipher
     algorithms.
   - Add multibuf speed tests in tcrypt.

  Algorithms:
   - Improve performance of sha3-generic.
   - Add native sha512 support on arm64.
   - Add v8.2 Crypto Extentions version of sha3/sm3 on arm64.
   - Avoid hmac nesting by requiring underlying algorithm to be unkeyed.
   - Add cryptd_max_cpu_qlen module parameter to cryptd.

  Drivers:
   - Add support for EIP97 engine in inside-secure.
   - Add inline IPsec support to chelsio.
   - Add RevB core support to crypto4xx.
   - Fix AEAD ICV check in crypto4xx.
   - Add stm32 crypto driver.
   - Add support for BCM63xx platforms in bcm2835 and remove bcm63xx.
   - Add Derived Key Protocol (DKP) support in caam.
   - Add Samsung Exynos True RNG driver.
   - Add support for Exynos5250+ SoCs in exynos PRNG driver"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (166 commits)
  crypto: picoxcell - Fix error handling in spacc_probe()
  crypto: arm64/sha512 - fix/improve new v8.2 Crypto Extensions code
  crypto: arm64/sm3 - new v8.2 Crypto Extensions implementation
  crypto: arm64/sha3 - new v8.2 Crypto Extensions implementation
  crypto: testmgr - add new testcases for sha3
  crypto: sha3-generic - export init/update/final routines
  crypto: sha3-generic - simplify code
  crypto: sha3-generic - rewrite KECCAK transform to help the compiler optimize
  crypto: sha3-generic - fixes for alignment and big endian operation
  crypto: aesni - handle zero length dst buffer
  crypto: artpec6 - remove select on non-existing CRYPTO_SHA384
  hwrng: bcm2835 - Remove redundant dev_err call in bcm2835_rng_probe()
  crypto: stm32 - remove redundant dev_err call in stm32_cryp_probe()
  crypto: axis - remove unnecessary platform_get_resource() error check
  crypto: testmgr - test misuse of result in ahash
  crypto: inside-secure - make function safexcel_try_push_requests static
  crypto: aes-generic - fix aes-generic regression on powerpc
  crypto: chelsio - Fix indentation warning
  crypto: arm64/sha1-ce - get rid of literal pool
  crypto: arm64/sha2-ce - move the round constant table to .rodata section
  ...
Linus Torvalds 7 年之前
父节点
当前提交
a103950e0d
共有 100 个文件被更改,包括 3910 次插入1697 次删除
  1. 22 0
      Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
  2. 2 1
      Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
  3. 3 1
      Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
  4. 19 0
      Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt
  5. 19 3
      Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
  6. 0 17
      Documentation/devicetree/bindings/rng/brcm,bcm6368.txt
  7. 8 0
      MAINTAINERS
  8. 4 6
      arch/arm/crypto/aes-neonbs-glue.c
  9. 2 0
      arch/arm/crypto/crc32-ce-glue.c
  10. 18 0
      arch/arm64/crypto/Kconfig
  11. 10 1
      arch/arm64/crypto/Makefile
  12. 87 0
      arch/arm64/crypto/aes-ce-core.S
  13. 12 103
      arch/arm64/crypto/aes-ce-glue.c
  14. 10 9
      arch/arm64/crypto/aes-cipher-core.S
  15. 1 0
      arch/arm64/crypto/aes-glue.c
  16. 4 4
      arch/arm64/crypto/aes-neon.S
  17. 4 3
      arch/arm64/crypto/crc32-ce-core.S
  18. 2 0
      arch/arm64/crypto/crc32-ce-glue.c
  19. 9 8
      arch/arm64/crypto/crct10dif-ce-core.S
  20. 9 11
      arch/arm64/crypto/sha1-ce-core.S
  21. 3 1
      arch/arm64/crypto/sha2-ce-core.S
  22. 210 0
      arch/arm64/crypto/sha3-ce-core.S
  23. 161 0
      arch/arm64/crypto/sha3-ce-glue.c
  24. 204 0
      arch/arm64/crypto/sha512-ce-core.S
  25. 119 0
      arch/arm64/crypto/sha512-ce-glue.c
  26. 1 0
      arch/arm64/crypto/sha512-glue.c
  27. 141 0
      arch/arm64/crypto/sm3-ce-core.S
  28. 92 0
      arch/arm64/crypto/sm3-ce-glue.c
  29. 1 0
      arch/powerpc/crypto/crc32c-vpmsum_glue.c
  30. 3 0
      arch/s390/crypto/crc32-vx.c
  31. 1 0
      arch/sparc/crypto/crc32c_glue.c
  32. 57 142
      arch/x86/crypto/aesni-intel_asm.S
  33. 56 14
      arch/x86/crypto/aesni-intel_glue.c
  34. 0 1
      arch/x86/crypto/chacha20_glue.c
  35. 1 0
      arch/x86/crypto/crc32-pclmul_glue.c
  36. 1 0
      arch/x86/crypto/crc32c-intel_glue.c
  37. 0 2
      arch/x86/crypto/poly1305_glue.c
  38. 4 180
      arch/x86/crypto/salsa20-i586-asm_32.S
  39. 0 114
      arch/x86/crypto/salsa20-x86_64-asm_64.S
  40. 40 65
      arch/x86/crypto/salsa20_glue.c
  41. 60 52
      arch/x86/crypto/twofish-x86_64-asm_64-3way.S
  42. 3 1
      crypto/Kconfig
  43. 1 0
      crypto/Makefile
  44. 1 4
      crypto/ablk_helper.c
  45. 14 5
      crypto/aead.c
  46. 6 4
      crypto/af_alg.c
  47. 28 5
      crypto/ahash.c
  48. 4 9
      crypto/algapi.c
  49. 5 10
      crypto/algif_aead.c
  50. 11 41
      crypto/algif_hash.c
  51. 13 46
      crypto/algif_skcipher.c
  52. 3 3
      crypto/api.c
  53. 2 2
      crypto/authenc.c
  54. 2 2
      crypto/authencesn.c
  55. 0 1
      crypto/blkcipher.c
  56. 1 2
      crypto/camellia_generic.c
  57. 1 2
      crypto/cast5_generic.c
  58. 1 2
      crypto/cast6_generic.c
  59. 13 20
      crypto/chacha20_generic.c
  60. 1 0
      crypto/crc32_generic.c
  61. 1 0
      crypto/crc32c_generic.c
  62. 10 7
      crypto/cryptd.c
  63. 2 2
      crypto/crypto_user.c
  64. 1 1
      crypto/ecc.c
  65. 0 5
      crypto/echainiv.c
  66. 2 2
      crypto/gcm.c
  67. 0 2
      crypto/gf128mul.c
  68. 0 6
      crypto/ghash-generic.c
  69. 2 6
      crypto/internal.h
  70. 2 2
      crypto/keywrap.c
  71. 5 6
      crypto/mcryptd.c
  72. 9 18
      crypto/poly1305_generic.c
  73. 1 1
      crypto/proc.c
  74. 101 139
      crypto/salsa20_generic.c
  75. 0 5
      crypto/seqiv.c
  76. 164 168
      crypto/sha3_generic.c
  77. 21 4
      crypto/shash.c
  78. 1 3
      crypto/simd.c
  79. 26 4
      crypto/skcipher.c
  80. 980 105
      crypto/tcrypt.c
  81. 40 1
      crypto/testmgr.c
  82. 550 0
      crypto/testmgr.h
  83. 2 3
      crypto/twofish_common.c
  84. 2 3
      crypto/twofish_generic.c
  85. 1 2
      crypto/xcbc.c
  86. 16 16
      drivers/char/hw_random/Kconfig
  87. 1 1
      drivers/char/hw_random/Makefile
  88. 117 52
      drivers/char/hw_random/bcm2835-rng.c
  89. 0 154
      drivers/char/hw_random/bcm63xx-rng.c
  90. 4 0
      drivers/char/hw_random/core.c
  91. 235 0
      drivers/char/hw_random/exynos-trng.c
  92. 3 10
      drivers/char/hw_random/imx-rngc.c
  93. 1 0
      drivers/char/hw_random/mtk-rng.c
  94. 12 12
      drivers/char/random.c
  95. 0 1
      drivers/crypto/Kconfig
  96. 1 5
      drivers/crypto/amcc/crypto4xx_alg.c
  97. 82 49
      drivers/crypto/amcc/crypto4xx_core.c
  98. 1 3
      drivers/crypto/amcc/crypto4xx_core.h
  99. 3 1
      drivers/crypto/amcc/crypto4xx_reg_def.h
  100. 1 1
      drivers/crypto/amcc/crypto4xx_trng.c

+ 22 - 0
Documentation/devicetree/bindings/crypto/arm-cryptocell.txt

@@ -0,0 +1,22 @@
+Arm TrustZone CryptoCell cryptographic engine
+
+Required properties:
+- compatible: Should be "arm,cryptocell-712-ree".
+- reg: Base physical address of the engine and length of memory mapped region.
+- interrupts: Interrupt number for the device.
+
+Optional properties:
+- interrupt-parent: The phandle for the interrupt controller that services
+  interrupts for this device.
+- clocks: Reference to the crypto engine clock.
+- dma-coherent: Present if dma operations are coherent.
+
+Examples:
+
+       arm_cc712: crypto@80000000 {
+               compatible = "arm,cryptocell-712-ree";
+               interrupt-parent = <&intc>;
+               interrupts = < 0 30 4 >;
+               reg = < 0x80000000 0x10000 >;
+
+       };

+ 2 - 1
Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt

@@ -1,7 +1,8 @@
 Inside Secure SafeXcel cryptographic engine
 
 Required properties:
-- compatible: Should be "inside-secure,safexcel-eip197".
+- compatible: Should be "inside-secure,safexcel-eip197" or
+              "inside-secure,safexcel-eip97".
 - reg: Base physical address of the engine and length of memory mapped region.
 - interrupts: Interrupt numbers for the rings and engine.
 - interrupt-names: Should be "ring0", "ring1", "ring2", "ring3", "eip", "mem".

+ 3 - 1
Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt

@@ -2,7 +2,9 @@ Exynos Pseudo Random Number Generator
 
 Required properties:
 
-- compatible  : Should be "samsung,exynos4-rng".
+- compatible  : One of:
+                - "samsung,exynos4-rng" for Exynos4210 and Exynos4412
+                - "samsung,exynos5250-prng" for Exynos5250+
 - reg         : Specifies base physical address and size of the registers map.
 - clocks      : Phandle to clock-controller plus clock-specifier pair.
 - clock-names : "secss" as a clock name.

+ 19 - 0
Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt

@@ -0,0 +1,19 @@
+* STMicroelectronics STM32 CRYP
+
+Required properties:
+- compatible: Should be "st,stm32f756-cryp".
+- reg: The address and length of the peripheral registers space
+- clocks: The input clock of the CRYP instance
+- interrupts: The CRYP interrupt
+
+Optional properties:
+- resets: The input reset of the CRYP instance
+
+Example:
+crypto@50060000 {
+	compatible = "st,stm32f756-cryp";
+	reg = <0x50060000 0x400>;
+	interrupts = <79>;
+	clocks = <&rcc 0 STM32F7_AHB2_CLOCK(CRYP)>;
+	resets = <&rcc STM32F7_AHB2_RESET(CRYP)>;
+};

+ 19 - 3
Documentation/devicetree/bindings/rng/brcm,bcm2835.txt

@@ -1,11 +1,19 @@
-BCM2835 Random number generator
+BCM2835/6368 Random number generator
 
 Required properties:
 
-- compatible : should be "brcm,bcm2835-rng"  or "brcm,bcm-nsp-rng" or
-  "brcm,bcm5301x-rng"
+- compatible : should be one of
+	"brcm,bcm2835-rng"
+	"brcm,bcm-nsp-rng"
+	"brcm,bcm5301x-rng" or
+	"brcm,bcm6368-rng"
 - reg : Specifies base physical address and size of the registers.
 
+Optional properties:
+
+- clocks : phandle to clock-controller plus clock-specifier pair
+- clock-names : "ipsec" as a clock name
+
 Example:
 
 rng {
@@ -17,3 +25,11 @@ rng@18033000 {
 	compatible = "brcm,bcm-nsp-rng";
 	reg = <0x18033000 0x14>;
 };
+
+random: rng@10004180 {
+	compatible = "brcm,bcm6368-rng";
+	reg = <0x10004180 0x14>;
+
+	clocks = <&periph_clk 18>;
+	clock-names = "ipsec";
+};

+ 0 - 17
Documentation/devicetree/bindings/rng/brcm,bcm6368.txt

@@ -1,17 +0,0 @@
-BCM6368 Random number generator
-
-Required properties:
-
-- compatible : should be "brcm,bcm6368-rng"
-- reg : Specifies base physical address and size of the registers
-- clocks : phandle to clock-controller plus clock-specifier pair
-- clock-names : "ipsec" as a clock name
-
-Example:
-	random: rng@10004180 {
-		compatible = "brcm,bcm6368-rng";
-		reg = <0x10004180 0x14>;
-
-		clocks = <&periph_clk 18>;
-		clock-names = "ipsec";
-	};

+ 8 - 0
MAINTAINERS

@@ -11964,6 +11964,13 @@ S:	Maintained
 F:	drivers/crypto/exynos-rng.c
 F:	Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
 
+SAMSUNG EXYNOS TRUE RANDOM NUMBER GENERATOR (TRNG) DRIVER
+M:	Łukasz Stelmach <l.stelmach@samsung.com>
+L:	linux-samsung-soc@vger.kernel.org
+S:	Maintained
+F:	drivers/char/hw_random/exynos-trng.c
+F:	Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.txt
+
 SAMSUNG FRAMEBUFFER DRIVER
 M:	Jingoo Han <jingoohan1@gmail.com>
 L:	linux-fbdev@vger.kernel.org
@@ -12026,6 +12033,7 @@ F:	drivers/media/i2c/s5k5baf.c
 SAMSUNG S5P Security SubSystem (SSS) DRIVER
 M:	Krzysztof Kozlowski <krzk@kernel.org>
 M:	Vladimir Zapolskiy <vz@mleia.com>
+M:	Kamil Konieczny <k.konieczny@partner.samsung.com>
 L:	linux-crypto@vger.kernel.org
 L:	linux-samsung-soc@vger.kernel.org
 S:	Maintained

+ 4 - 6
arch/arm/crypto/aes-neonbs-glue.c

@@ -181,9 +181,8 @@ static int cbc_init(struct crypto_tfm *tfm)
 	struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	ctx->enc_tfm = crypto_alloc_cipher("aes", 0, 0);
-	if (IS_ERR(ctx->enc_tfm))
-		return PTR_ERR(ctx->enc_tfm);
-	return 0;
+
+	return PTR_ERR_OR_ZERO(ctx->enc_tfm);
 }
 
 static void cbc_exit(struct crypto_tfm *tfm)
@@ -258,9 +257,8 @@ static int xts_init(struct crypto_tfm *tfm)
 	struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	ctx->tweak_tfm = crypto_alloc_cipher("aes", 0, 0);
-	if (IS_ERR(ctx->tweak_tfm))
-		return PTR_ERR(ctx->tweak_tfm);
-	return 0;
+
+	return PTR_ERR_OR_ZERO(ctx->tweak_tfm);
 }
 
 static void xts_exit(struct crypto_tfm *tfm)

+ 2 - 0
arch/arm/crypto/crc32-ce-glue.c

@@ -188,6 +188,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
 	.base.cra_name		= "crc32",
 	.base.cra_driver_name	= "crc32-arm-ce",
 	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 	.base.cra_blocksize	= 1,
 	.base.cra_module	= THIS_MODULE,
 }, {
@@ -203,6 +204,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
 	.base.cra_name		= "crc32c",
 	.base.cra_driver_name	= "crc32c-arm-ce",
 	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 	.base.cra_blocksize	= 1,
 	.base.cra_module	= THIS_MODULE,
 } };

+ 18 - 0
arch/arm64/crypto/Kconfig

@@ -29,6 +29,24 @@ config CRYPTO_SHA2_ARM64_CE
 	select CRYPTO_HASH
 	select CRYPTO_SHA256_ARM64
 
+config CRYPTO_SHA512_ARM64_CE
+	tristate "SHA-384/SHA-512 digest algorithm (ARMv8 Crypto Extensions)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_HASH
+	select CRYPTO_SHA512_ARM64
+
+config CRYPTO_SHA3_ARM64
+	tristate "SHA3 digest algorithm (ARMv8.2 Crypto Extensions)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_HASH
+	select CRYPTO_SHA3
+
+config CRYPTO_SM3_ARM64_CE
+	tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_HASH
+	select CRYPTO_SM3
+
 config CRYPTO_GHASH_ARM64_CE
 	tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
 	depends on KERNEL_MODE_NEON

+ 10 - 1
arch/arm64/crypto/Makefile

@@ -14,6 +14,15 @@ sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
 obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
 sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
 
+obj-$(CONFIG_CRYPTO_SHA512_ARM64_CE) += sha512-ce.o
+sha512-ce-y := sha512-ce-glue.o sha512-ce-core.o
+
+obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o
+sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
+
+obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o
+sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o
+
 obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
 ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
 
@@ -24,7 +33,7 @@ obj-$(CONFIG_CRYPTO_CRC32_ARM64_CE) += crc32-ce.o
 crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o
 
 obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
-CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
+aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
 
 obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
 aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o

+ 87 - 0
arch/arm64/crypto/aes-ce-core.S

@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.arch		armv8-a+crypto
+
+ENTRY(__aes_ce_encrypt)
+	sub		w3, w3, #2
+	ld1		{v0.16b}, [x2]
+	ld1		{v1.4s}, [x0], #16
+	cmp		w3, #10
+	bmi		0f
+	bne		3f
+	mov		v3.16b, v1.16b
+	b		2f
+0:	mov		v2.16b, v1.16b
+	ld1		{v3.4s}, [x0], #16
+1:	aese		v0.16b, v2.16b
+	aesmc		v0.16b, v0.16b
+2:	ld1		{v1.4s}, [x0], #16
+	aese		v0.16b, v3.16b
+	aesmc		v0.16b, v0.16b
+3:	ld1		{v2.4s}, [x0], #16
+	subs		w3, w3, #3
+	aese		v0.16b, v1.16b
+	aesmc		v0.16b, v0.16b
+	ld1		{v3.4s}, [x0], #16
+	bpl		1b
+	aese		v0.16b, v2.16b
+	eor		v0.16b, v0.16b, v3.16b
+	st1		{v0.16b}, [x1]
+	ret
+ENDPROC(__aes_ce_encrypt)
+
+ENTRY(__aes_ce_decrypt)
+	sub		w3, w3, #2
+	ld1		{v0.16b}, [x2]
+	ld1		{v1.4s}, [x0], #16
+	cmp		w3, #10
+	bmi		0f
+	bne		3f
+	mov		v3.16b, v1.16b
+	b		2f
+0:	mov		v2.16b, v1.16b
+	ld1		{v3.4s}, [x0], #16
+1:	aesd		v0.16b, v2.16b
+	aesimc		v0.16b, v0.16b
+2:	ld1		{v1.4s}, [x0], #16
+	aesd		v0.16b, v3.16b
+	aesimc		v0.16b, v0.16b
+3:	ld1		{v2.4s}, [x0], #16
+	subs		w3, w3, #3
+	aesd		v0.16b, v1.16b
+	aesimc		v0.16b, v0.16b
+	ld1		{v3.4s}, [x0], #16
+	bpl		1b
+	aesd		v0.16b, v2.16b
+	eor		v0.16b, v0.16b, v3.16b
+	st1		{v0.16b}, [x1]
+	ret
+ENDPROC(__aes_ce_decrypt)
+
+/*
+ * __aes_ce_sub() - use the aese instruction to perform the AES sbox
+ *                  substitution on each byte in 'input'
+ */
+ENTRY(__aes_ce_sub)
+	dup		v1.4s, w0
+	movi		v0.16b, #0
+	aese		v0.16b, v1.16b
+	umov		w0, v0.s[0]
+	ret
+ENDPROC(__aes_ce_sub)
+
+ENTRY(__aes_ce_invert)
+	ld1		{v0.4s}, [x1]
+	aesimc		v1.16b, v0.16b
+	st1		{v1.4s}, [x0]
+	ret
+ENDPROC(__aes_ce_invert)

+ 12 - 103
arch/arm64/crypto/aes-ce-cipher.c → arch/arm64/crypto/aes-ce-glue.c

@@ -29,6 +29,13 @@ struct aes_block {
 	u8 b[AES_BLOCK_SIZE];
 };
 
+asmlinkage void __aes_ce_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+asmlinkage void __aes_ce_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
+asmlinkage u32 __aes_ce_sub(u32 l);
+asmlinkage void __aes_ce_invert(struct aes_block *out,
+				const struct aes_block *in);
+
 static int num_rounds(struct crypto_aes_ctx *ctx)
 {
 	/*
@@ -44,10 +51,6 @@ static int num_rounds(struct crypto_aes_ctx *ctx)
 static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 {
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct aes_block *out = (struct aes_block *)dst;
-	struct aes_block const *in = (struct aes_block *)src;
-	void *dummy0;
-	int dummy1;
 
 	if (!may_use_simd()) {
 		__aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
@@ -55,49 +58,13 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	}
 
 	kernel_neon_begin();
-
-	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.4s}, [%[key]], #16		;"
-		"	cmp	%w[rounds], #10			;"
-		"	bmi	0f				;"
-		"	bne	3f				;"
-		"	mov	v3.16b, v1.16b			;"
-		"	b	2f				;"
-		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.4s}, [%[key]], #16		;"
-		"1:	aese	v0.16b, v2.16b			;"
-		"	aesmc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.4s}, [%[key]], #16		;"
-		"	aese	v0.16b, v3.16b			;"
-		"	aesmc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.4s}, [%[key]], #16		;"
-		"	subs	%w[rounds], %w[rounds], #3	;"
-		"	aese	v0.16b, v1.16b			;"
-		"	aesmc	v0.16b, v0.16b			;"
-		"	ld1	{v3.4s}, [%[key]], #16		;"
-		"	bpl	1b				;"
-		"	aese	v0.16b, v2.16b			;"
-		"	eor	v0.16b, v0.16b, v3.16b		;"
-		"	st1	{v0.16b}, %[out]		;"
-
-	:	[out]		"=Q"(*out),
-		[key]		"=r"(dummy0),
-		[rounds]	"=r"(dummy1)
-	:	[in]		"Q"(*in),
-				"1"(ctx->key_enc),
-				"2"(num_rounds(ctx) - 2)
-	:	"cc");
-
+	__aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
 	kernel_neon_end();
 }
 
 static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 {
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct aes_block *out = (struct aes_block *)dst;
-	struct aes_block const *in = (struct aes_block *)src;
-	void *dummy0;
-	int dummy1;
 
 	if (!may_use_simd()) {
 		__aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
@@ -105,62 +72,10 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	}
 
 	kernel_neon_begin();
-
-	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.4s}, [%[key]], #16		;"
-		"	cmp	%w[rounds], #10			;"
-		"	bmi	0f				;"
-		"	bne	3f				;"
-		"	mov	v3.16b, v1.16b			;"
-		"	b	2f				;"
-		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.4s}, [%[key]], #16		;"
-		"1:	aesd	v0.16b, v2.16b			;"
-		"	aesimc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.4s}, [%[key]], #16		;"
-		"	aesd	v0.16b, v3.16b			;"
-		"	aesimc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.4s}, [%[key]], #16		;"
-		"	subs	%w[rounds], %w[rounds], #3	;"
-		"	aesd	v0.16b, v1.16b			;"
-		"	aesimc	v0.16b, v0.16b			;"
-		"	ld1	{v3.4s}, [%[key]], #16		;"
-		"	bpl	1b				;"
-		"	aesd	v0.16b, v2.16b			;"
-		"	eor	v0.16b, v0.16b, v3.16b		;"
-		"	st1	{v0.16b}, %[out]		;"
-
-	:	[out]		"=Q"(*out),
-		[key]		"=r"(dummy0),
-		[rounds]	"=r"(dummy1)
-	:	[in]		"Q"(*in),
-				"1"(ctx->key_dec),
-				"2"(num_rounds(ctx) - 2)
-	:	"cc");
-
+	__aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
 	kernel_neon_end();
 }
 
-/*
- * aes_sub() - use the aese instruction to perform the AES sbox substitution
- *             on each byte in 'input'
- */
-static u32 aes_sub(u32 input)
-{
-	u32 ret;
-
-	__asm__("dup	v1.4s, %w[in]		;"
-		"movi	v0.16b, #0		;"
-		"aese	v0.16b, v1.16b		;"
-		"umov	%w[out], v0.4s[0]	;"
-
-	:	[out]	"=r"(ret)
-	:	[in]	"r"(input)
-	:		"v0","v1");
-
-	return ret;
-}
-
 int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 		     unsigned int key_len)
 {
@@ -189,7 +104,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 		u32 *rki = ctx->key_enc + (i * kwords);
 		u32 *rko = rki + kwords;
 
-		rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
+		rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
 		rko[1] = rko[0] ^ rki[1];
 		rko[2] = rko[1] ^ rki[2];
 		rko[3] = rko[2] ^ rki[3];
@@ -202,7 +117,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 		} else if (key_len == AES_KEYSIZE_256) {
 			if (i >= 6)
 				break;
-			rko[4] = aes_sub(rko[3]) ^ rki[4];
+			rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
 			rko[5] = rko[4] ^ rki[5];
 			rko[6] = rko[5] ^ rki[6];
 			rko[7] = rko[6] ^ rki[7];
@@ -221,13 +136,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 
 	key_dec[0] = key_enc[j];
 	for (i = 1, j--; j > 0; i++, j--)
-		__asm__("ld1	{v0.4s}, %[in]		;"
-			"aesimc	v1.16b, v0.16b		;"
-			"st1	{v1.4s}, %[out]	;"
-
-		:	[out]	"=Q"(key_dec[i])
-		:	[in]	"Q"(key_enc[j])
-		:		"v0","v1");
+		__aes_ce_invert(key_dec + i, key_enc + j);
 	key_dec[i] = key_enc[0];
 
 	kernel_neon_end();

+ 10 - 9
arch/arm64/crypto/aes-cipher-core.S

@@ -125,6 +125,16 @@ CPU_BE(	rev		w7, w7		)
 	ret
 	.endm
 
+ENTRY(__aes_arm64_encrypt)
+	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
+ENDPROC(__aes_arm64_encrypt)
+
+	.align		5
+ENTRY(__aes_arm64_decrypt)
+	do_crypt	iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
+ENDPROC(__aes_arm64_decrypt)
+
+	.section	".rodata", "a"
 	.align		L1_CACHE_SHIFT
 	.type		__aes_arm64_inverse_sbox, %object
 __aes_arm64_inverse_sbox:
@@ -161,12 +171,3 @@ __aes_arm64_inverse_sbox:
 	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
 	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
 	.size		__aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox
-
-ENTRY(__aes_arm64_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
-ENDPROC(__aes_arm64_encrypt)
-
-	.align		5
-ENTRY(__aes_arm64_decrypt)
-	do_crypt	iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
-ENDPROC(__aes_arm64_decrypt)

+ 1 - 0
arch/arm64/crypto/aes-glue.c

@@ -665,6 +665,7 @@ static int __init aes_init(void)
 
 unregister_simds:
 	aes_exit();
+	return err;
 unregister_ciphers:
 	crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
 	return err;

+ 4 - 4
arch/arm64/crypto/aes-neon.S

@@ -32,10 +32,10 @@
 
 	/* preload the entire Sbox */
 	.macro		prepare, sbox, shiftrows, temp
-	adr		\temp, \sbox
 	movi		v12.16b, #0x1b
-	ldr		q13, \shiftrows
-	ldr		q14, .Lror32by8
+	ldr_l		q13, \shiftrows, \temp
+	ldr_l		q14, .Lror32by8, \temp
+	adr_l		\temp, \sbox
 	ld1		{v16.16b-v19.16b}, [\temp], #64
 	ld1		{v20.16b-v23.16b}, [\temp], #64
 	ld1		{v24.16b-v27.16b}, [\temp], #64
@@ -272,7 +272,7 @@
 
 #include "aes-modes.S"
 
-	.text
+	.section	".rodata", "a"
 	.align		6
 .LForward_Sbox:
 	.byte		0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5

+ 4 - 3
arch/arm64/crypto/crc32-ce-core.S

@@ -50,7 +50,7 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-	.text
+	.section	".rodata", "a"
 	.align		6
 	.cpu		generic+crypto+crc
 
@@ -115,12 +115,13 @@
 	 * uint crc32_pmull_le(unsigned char const *buffer,
 	 *                     size_t len, uint crc32)
 	 */
+	.text
 ENTRY(crc32_pmull_le)
-	adr		x3, .Lcrc32_constants
+	adr_l		x3, .Lcrc32_constants
 	b		0f
 
 ENTRY(crc32c_pmull_le)
-	adr		x3, .Lcrc32c_constants
+	adr_l		x3, .Lcrc32c_constants
 
 0:	bic		LEN, LEN, #15
 	ld1		{v1.16b-v4.16b}, [BUF], #0x40

+ 2 - 0
arch/arm64/crypto/crc32-ce-glue.c

@@ -185,6 +185,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
 	.base.cra_name		= "crc32",
 	.base.cra_driver_name	= "crc32-arm64-ce",
 	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 	.base.cra_blocksize	= 1,
 	.base.cra_module	= THIS_MODULE,
 }, {
@@ -200,6 +201,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
 	.base.cra_name		= "crc32c",
 	.base.cra_driver_name	= "crc32c-arm64-ce",
 	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 	.base.cra_blocksize	= 1,
 	.base.cra_module	= THIS_MODULE,
 } };

+ 9 - 8
arch/arm64/crypto/crct10dif-ce-core.S

@@ -128,7 +128,7 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 	// XOR the initial_crc value
 	eor		v0.16b, v0.16b, v10.16b
 
-	ldr		q10, rk3	// xmm10 has rk3 and rk4
+	ldr_l		q10, rk3, x8	// xmm10 has rk3 and rk4
 					// type of pmull instruction
 					// will determine which constant to use
 
@@ -184,13 +184,13 @@ CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
 	// fold the 8 vector registers to 1 vector register with different
 	// constants
 
-	ldr		q10, rk9
+	ldr_l		q10, rk9, x8
 
 	.macro		fold16, reg, rk
 	pmull		v8.1q, \reg\().1d, v10.1d
 	pmull2		\reg\().1q, \reg\().2d, v10.2d
 	.ifnb		\rk
-	ldr		q10, \rk
+	ldr_l		q10, \rk, x8
 	.endif
 	eor		v7.16b, v7.16b, v8.16b
 	eor		v7.16b, v7.16b, \reg\().16b
@@ -251,7 +251,7 @@ CPU_LE(	ext		v1.16b, v1.16b, v1.16b, #8	)
 
 	// get rid of the extra data that was loaded before
 	// load the shift constant
-	adr		x4, tbl_shf_table + 16
+	adr_l		x4, tbl_shf_table + 16
 	sub		x4, x4, arg3
 	ld1		{v0.16b}, [x4]
 
@@ -275,7 +275,7 @@ CPU_LE(	ext		v1.16b, v1.16b, v1.16b, #8	)
 
 _128_done:
 	// compute crc of a 128-bit value
-	ldr		q10, rk5		// rk5 and rk6 in xmm10
+	ldr_l		q10, rk5, x8		// rk5 and rk6 in xmm10
 
 	// 64b fold
 	ext		v0.16b, vzr.16b, v7.16b, #8
@@ -291,7 +291,7 @@ _128_done:
 
 	// barrett reduction
 _barrett:
-	ldr		q10, rk7
+	ldr_l		q10, rk7, x8
 	mov		v0.d[0], v7.d[1]
 
 	pmull		v0.1q, v0.1d, v10.1d
@@ -321,7 +321,7 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 	b.eq		_128_done		// exactly 16 left
 	b.lt		_less_than_16_left
 
-	ldr		q10, rk1		// rk1 and rk2 in xmm10
+	ldr_l		q10, rk1, x8		// rk1 and rk2 in xmm10
 
 	// update the counter. subtract 32 instead of 16 to save one
 	// instruction from the loop
@@ -333,7 +333,7 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 
 _less_than_16_left:
 	// shl r9, 4
-	adr		x0, tbl_shf_table + 16
+	adr_l		x0, tbl_shf_table + 16
 	sub		x0, x0, arg3
 	ld1		{v0.16b}, [x0]
 	movi		v9.16b, #0x80
@@ -345,6 +345,7 @@ ENDPROC(crc_t10dif_pmull)
 // precomputed constants
 // these constants are precomputed from the poly:
 // 0x8bb70000 (0x8bb7 scaled to 32 bits)
+	.section	".rodata", "a"
 	.align		4
 // Q = 0x18BB70000
 // rk1 = 2^(32*3) mod Q << 32

+ 9 - 11
arch/arm64/crypto/sha1-ce-core.S

@@ -58,12 +58,11 @@
 	sha1su1		v\s0\().4s, v\s3\().4s
 	.endm
 
-	/*
-	 * The SHA1 round constants
-	 */
-	.align		4
-.Lsha1_rcon:
-	.word		0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
+	.macro		loadrc, k, val, tmp
+	movz		\tmp, :abs_g0_nc:\val
+	movk		\tmp, :abs_g1:\val
+	dup		\k, \tmp
+	.endm
 
 	/*
 	 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
@@ -71,11 +70,10 @@
 	 */
 ENTRY(sha1_ce_transform)
 	/* load round constants */
-	adr		x6, .Lsha1_rcon
-	ld1r		{k0.4s}, [x6], #4
-	ld1r		{k1.4s}, [x6], #4
-	ld1r		{k2.4s}, [x6], #4
-	ld1r		{k3.4s}, [x6]
+	loadrc		k0.4s, 0x5a827999, w6
+	loadrc		k1.4s, 0x6ed9eba1, w6
+	loadrc		k2.4s, 0x8f1bbcdc, w6
+	loadrc		k3.4s, 0xca62c1d6, w6
 
 	/* load state */
 	ld1		{dgav.4s}, [x0]

+ 3 - 1
arch/arm64/crypto/sha2-ce-core.S

@@ -53,6 +53,7 @@
 	/*
 	 * The SHA-256 round constants
 	 */
+	.section	".rodata", "a"
 	.align		4
 .Lsha2_rcon:
 	.word		0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
@@ -76,9 +77,10 @@
 	 * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
 	 *			  int blocks)
 	 */
+	.text
 ENTRY(sha2_ce_transform)
 	/* load round constants */
-	adr		x8, .Lsha2_rcon
+	adr_l		x8, .Lsha2_rcon
 	ld1		{ v0.4s- v3.4s}, [x8], #64
 	ld1		{ v4.4s- v7.4s}, [x8], #64
 	ld1		{ v8.4s-v11.4s}, [x8], #64

+ 210 - 0
arch/arm64/crypto/sha3-ce-core.S

@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
+ *
+ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.irp	b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+	.set	.Lv\b\().2d, \b
+	.set	.Lv\b\().16b, \b
+	.endr
+
+	/*
+	 * ARMv8.2 Crypto Extensions instructions
+	 */
+	.macro	eor3, rd, rn, rm, ra
+	.inst	0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
+	.endm
+
+	.macro	rax1, rd, rn, rm
+	.inst	0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
+	.endm
+
+	.macro	bcax, rd, rn, rm, ra
+	.inst	0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
+	.endm
+
+	.macro	xar, rd, rn, rm, imm6
+	.inst	0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
+	.endm
+
+	/*
+	 * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
+	 */
+	.text
+ENTRY(sha3_ce_transform)
+	/* load state */
+	add	x8, x0, #32
+	ld1	{ v0.1d- v3.1d}, [x0]
+	ld1	{ v4.1d- v7.1d}, [x8], #32
+	ld1	{ v8.1d-v11.1d}, [x8], #32
+	ld1	{v12.1d-v15.1d}, [x8], #32
+	ld1	{v16.1d-v19.1d}, [x8], #32
+	ld1	{v20.1d-v23.1d}, [x8], #32
+	ld1	{v24.1d}, [x8]
+
+0:	sub	w2, w2, #1
+	mov	w8, #24
+	adr_l	x9, .Lsha3_rcon
+
+	/* load input */
+	ld1	{v25.8b-v28.8b}, [x1], #32
+	ld1	{v29.8b-v31.8b}, [x1], #24
+	eor	v0.8b, v0.8b, v25.8b
+	eor	v1.8b, v1.8b, v26.8b
+	eor	v2.8b, v2.8b, v27.8b
+	eor	v3.8b, v3.8b, v28.8b
+	eor	v4.8b, v4.8b, v29.8b
+	eor	v5.8b, v5.8b, v30.8b
+	eor	v6.8b, v6.8b, v31.8b
+
+	tbnz	x3, #6, 2f		// SHA3-512
+
+	ld1	{v25.8b-v28.8b}, [x1], #32
+	ld1	{v29.8b-v30.8b}, [x1], #16
+	eor	 v7.8b,  v7.8b, v25.8b
+	eor	 v8.8b,  v8.8b, v26.8b
+	eor	 v9.8b,  v9.8b, v27.8b
+	eor	v10.8b, v10.8b, v28.8b
+	eor	v11.8b, v11.8b, v29.8b
+	eor	v12.8b, v12.8b, v30.8b
+
+	tbnz	x3, #4, 1f		// SHA3-384 or SHA3-224
+
+	// SHA3-256
+	ld1	{v25.8b-v28.8b}, [x1], #32
+	eor	v13.8b, v13.8b, v25.8b
+	eor	v14.8b, v14.8b, v26.8b
+	eor	v15.8b, v15.8b, v27.8b
+	eor	v16.8b, v16.8b, v28.8b
+	b	3f
+
+1:	tbz	x3, #2, 3f		// bit 2 cleared? SHA-384
+
+	// SHA3-224
+	ld1	{v25.8b-v28.8b}, [x1], #32
+	ld1	{v29.8b}, [x1], #8
+	eor	v13.8b, v13.8b, v25.8b
+	eor	v14.8b, v14.8b, v26.8b
+	eor	v15.8b, v15.8b, v27.8b
+	eor	v16.8b, v16.8b, v28.8b
+	eor	v17.8b, v17.8b, v29.8b
+	b	3f
+
+	// SHA3-512
+2:	ld1	{v25.8b-v26.8b}, [x1], #16
+	eor	 v7.8b,  v7.8b, v25.8b
+	eor	 v8.8b,  v8.8b, v26.8b
+
+3:	sub	w8, w8, #1
+
+	eor3	v29.16b,  v4.16b,  v9.16b, v14.16b
+	eor3	v26.16b,  v1.16b,  v6.16b, v11.16b
+	eor3	v28.16b,  v3.16b,  v8.16b, v13.16b
+	eor3	v25.16b,  v0.16b,  v5.16b, v10.16b
+	eor3	v27.16b,  v2.16b,  v7.16b, v12.16b
+	eor3	v29.16b, v29.16b, v19.16b, v24.16b
+	eor3	v26.16b, v26.16b, v16.16b, v21.16b
+	eor3	v28.16b, v28.16b, v18.16b, v23.16b
+	eor3	v25.16b, v25.16b, v15.16b, v20.16b
+	eor3	v27.16b, v27.16b, v17.16b, v22.16b
+
+	rax1	v30.2d, v29.2d, v26.2d	// bc[0]
+	rax1	v26.2d, v26.2d, v28.2d	// bc[2]
+	rax1	v28.2d, v28.2d, v25.2d	// bc[4]
+	rax1	v25.2d, v25.2d, v27.2d	// bc[1]
+	rax1	v27.2d, v27.2d, v29.2d	// bc[3]
+
+	eor	 v0.16b,  v0.16b, v30.16b
+	xar	 v29.2d,   v1.2d,  v25.2d, (64 - 1)
+	xar	  v1.2d,   v6.2d,  v25.2d, (64 - 44)
+	xar	  v6.2d,   v9.2d,  v28.2d, (64 - 20)
+	xar	  v9.2d,  v22.2d,  v26.2d, (64 - 61)
+	xar	 v22.2d,  v14.2d,  v28.2d, (64 - 39)
+	xar	 v14.2d,  v20.2d,  v30.2d, (64 - 18)
+	xar	 v31.2d,   v2.2d,  v26.2d, (64 - 62)
+	xar	  v2.2d,  v12.2d,  v26.2d, (64 - 43)
+	xar	 v12.2d,  v13.2d,  v27.2d, (64 - 25)
+	xar	 v13.2d,  v19.2d,  v28.2d, (64 - 8)
+	xar	 v19.2d,  v23.2d,  v27.2d, (64 - 56)
+	xar	 v23.2d,  v15.2d,  v30.2d, (64 - 41)
+	xar	 v15.2d,   v4.2d,  v28.2d, (64 - 27)
+	xar	 v28.2d,  v24.2d,  v28.2d, (64 - 14)
+	xar	 v24.2d,  v21.2d,  v25.2d, (64 - 2)
+	xar	  v8.2d,   v8.2d,  v27.2d, (64 - 55)
+	xar	  v4.2d,  v16.2d,  v25.2d, (64 - 45)
+	xar	 v16.2d,   v5.2d,  v30.2d, (64 - 36)
+	xar	  v5.2d,   v3.2d,  v27.2d, (64 - 28)
+	xar	 v27.2d,  v18.2d,  v27.2d, (64 - 21)
+	xar	  v3.2d,  v17.2d,  v26.2d, (64 - 15)
+	xar	 v25.2d,  v11.2d,  v25.2d, (64 - 10)
+	xar	 v26.2d,   v7.2d,  v26.2d, (64 - 6)
+	xar	 v30.2d,  v10.2d,  v30.2d, (64 - 3)
+
+	bcax	v20.16b, v31.16b, v22.16b,  v8.16b
+	bcax	v21.16b,  v8.16b, v23.16b, v22.16b
+	bcax	v22.16b, v22.16b, v24.16b, v23.16b
+	bcax	v23.16b, v23.16b, v31.16b, v24.16b
+	bcax	v24.16b, v24.16b,  v8.16b, v31.16b
+
+	ld1r	{v31.2d}, [x9], #8
+
+	bcax	v17.16b, v25.16b, v19.16b,  v3.16b
+	bcax	v18.16b,  v3.16b, v15.16b, v19.16b
+	bcax	v19.16b, v19.16b, v16.16b, v15.16b
+	bcax	v15.16b, v15.16b, v25.16b, v16.16b
+	bcax	v16.16b, v16.16b,  v3.16b, v25.16b
+
+	bcax	v10.16b, v29.16b, v12.16b, v26.16b
+	bcax	v11.16b, v26.16b, v13.16b, v12.16b
+	bcax	v12.16b, v12.16b, v14.16b, v13.16b
+	bcax	v13.16b, v13.16b, v29.16b, v14.16b
+	bcax	v14.16b, v14.16b, v26.16b, v29.16b
+
+	bcax	 v7.16b, v30.16b,  v9.16b,  v4.16b
+	bcax	 v8.16b,  v4.16b,  v5.16b,  v9.16b
+	bcax	 v9.16b,  v9.16b,  v6.16b,  v5.16b
+	bcax	 v5.16b,  v5.16b, v30.16b,  v6.16b
+	bcax	 v6.16b,  v6.16b,  v4.16b, v30.16b
+
+	bcax	 v3.16b, v27.16b,  v0.16b, v28.16b
+	bcax	 v4.16b, v28.16b,  v1.16b,  v0.16b
+	bcax	 v0.16b,  v0.16b,  v2.16b,  v1.16b
+	bcax	 v1.16b,  v1.16b, v27.16b,  v2.16b
+	bcax	 v2.16b,  v2.16b, v28.16b, v27.16b
+
+	eor	 v0.16b,  v0.16b, v31.16b
+
+	cbnz	w8, 3b
+	cbnz	w2, 0b
+
+	/* save state */
+	st1	{ v0.1d- v3.1d}, [x0], #32
+	st1	{ v4.1d- v7.1d}, [x0], #32
+	st1	{ v8.1d-v11.1d}, [x0], #32
+	st1	{v12.1d-v15.1d}, [x0], #32
+	st1	{v16.1d-v19.1d}, [x0], #32
+	st1	{v20.1d-v23.1d}, [x0], #32
+	st1	{v24.1d}, [x0]
+	ret
+ENDPROC(sha3_ce_transform)
+
+	.section	".rodata", "a"
+	.align		8
+.Lsha3_rcon:
+	.quad	0x0000000000000001, 0x0000000000008082, 0x800000000000808a
+	.quad	0x8000000080008000, 0x000000000000808b, 0x0000000080000001
+	.quad	0x8000000080008081, 0x8000000000008009, 0x000000000000008a
+	.quad	0x0000000000000088, 0x0000000080008009, 0x000000008000000a
+	.quad	0x000000008000808b, 0x800000000000008b, 0x8000000000008089
+	.quad	0x8000000000008003, 0x8000000000008002, 0x8000000000000080
+	.quad	0x000000000000800a, 0x800000008000000a, 0x8000000080008081
+	.quad	0x8000000000008080, 0x0000000080000001, 0x8000000080008008

+ 161 - 0
arch/arm64/crypto/sha3-ce-glue.c

@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sha3-ce-glue.c - core SHA-3 transform using v8.2 Crypto Extensions
+ *
+ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha3.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sha3_ce_transform(u64 *st, const u8 *data, int blocks,
+				  int md_len);
+
+static int sha3_update(struct shash_desc *desc, const u8 *data,
+		       unsigned int len)
+{
+	struct sha3_state *sctx = shash_desc_ctx(desc);
+	unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
+
+	if (!may_use_simd())
+		return crypto_sha3_update(desc, data, len);
+
+	if ((sctx->partial + len) >= sctx->rsiz) {
+		int blocks;
+
+		if (sctx->partial) {
+			int p = sctx->rsiz - sctx->partial;
+
+			memcpy(sctx->buf + sctx->partial, data, p);
+			kernel_neon_begin();
+			sha3_ce_transform(sctx->st, sctx->buf, 1, digest_size);
+			kernel_neon_end();
+
+			data += p;
+			len -= p;
+			sctx->partial = 0;
+		}
+
+		blocks = len / sctx->rsiz;
+		len %= sctx->rsiz;
+
+		if (blocks) {
+			kernel_neon_begin();
+			sha3_ce_transform(sctx->st, data, blocks, digest_size);
+			kernel_neon_end();
+			data += blocks * sctx->rsiz;
+		}
+	}
+
+	if (len) {
+		memcpy(sctx->buf + sctx->partial, data, len);
+		sctx->partial += len;
+	}
+	return 0;
+}
+
+static int sha3_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha3_state *sctx = shash_desc_ctx(desc);
+	unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
+	__le64 *digest = (__le64 *)out;
+	int i;
+
+	if (!may_use_simd())
+		return crypto_sha3_final(desc, out);
+
+	sctx->buf[sctx->partial++] = 0x06;
+	memset(sctx->buf + sctx->partial, 0, sctx->rsiz - sctx->partial);
+	sctx->buf[sctx->rsiz - 1] |= 0x80;
+
+	kernel_neon_begin();
+	sha3_ce_transform(sctx->st, sctx->buf, 1, digest_size);
+	kernel_neon_end();
+
+	for (i = 0; i < digest_size / 8; i++)
+		put_unaligned_le64(sctx->st[i], digest++);
+
+	if (digest_size & 4)
+		put_unaligned_le32(sctx->st[i], (__le32 *)digest);
+
+	*sctx = (struct sha3_state){};
+	return 0;
+}
+
+static struct shash_alg algs[] = { {
+	.digestsize		= SHA3_224_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= sha3_update,
+	.final			= sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-224",
+	.base.cra_driver_name	= "sha3-224-ce",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_224_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_priority	= 200,
+}, {
+	.digestsize		= SHA3_256_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= sha3_update,
+	.final			= sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-256",
+	.base.cra_driver_name	= "sha3-256-ce",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_256_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_priority	= 200,
+}, {
+	.digestsize		= SHA3_384_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= sha3_update,
+	.final			= sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-384",
+	.base.cra_driver_name	= "sha3-384-ce",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_384_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_priority	= 200,
+}, {
+	.digestsize		= SHA3_512_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= sha3_update,
+	.final			= sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-512",
+	.base.cra_driver_name	= "sha3-512-ce",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_512_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_priority	= 200,
+} };
+
+static int __init sha3_neon_mod_init(void)
+{
+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha3_neon_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(SHA3, sha3_neon_mod_init);
+module_exit(sha3_neon_mod_fini);

+ 204 - 0
arch/arm64/crypto/sha512-ce-core.S

@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sha512-ce-core.S - core SHA-384/SHA-512 transform using v8 Crypto Extensions
+ *
+ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.irp		b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
+	.set		.Lq\b, \b
+	.set		.Lv\b\().2d, \b
+	.endr
+
+	.macro		sha512h, rd, rn, rm
+	.inst		0xce608000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
+	.endm
+
+	.macro		sha512h2, rd, rn, rm
+	.inst		0xce608400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
+	.endm
+
+	.macro		sha512su0, rd, rn
+	.inst		0xcec08000 | .L\rd | (.L\rn << 5)
+	.endm
+
+	.macro		sha512su1, rd, rn, rm
+	.inst		0xce608800 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
+	.endm
+
+	/*
+	 * The SHA-512 round constants
+	 */
+	.section	".rodata", "a"
+	.align		4
+.Lsha512_rcon:
+	.quad		0x428a2f98d728ae22, 0x7137449123ef65cd
+	.quad		0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
+	.quad		0x3956c25bf348b538, 0x59f111f1b605d019
+	.quad		0x923f82a4af194f9b, 0xab1c5ed5da6d8118
+	.quad		0xd807aa98a3030242, 0x12835b0145706fbe
+	.quad		0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
+	.quad		0x72be5d74f27b896f, 0x80deb1fe3b1696b1
+	.quad		0x9bdc06a725c71235, 0xc19bf174cf692694
+	.quad		0xe49b69c19ef14ad2, 0xefbe4786384f25e3
+	.quad		0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
+	.quad		0x2de92c6f592b0275, 0x4a7484aa6ea6e483
+	.quad		0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
+	.quad		0x983e5152ee66dfab, 0xa831c66d2db43210
+	.quad		0xb00327c898fb213f, 0xbf597fc7beef0ee4
+	.quad		0xc6e00bf33da88fc2, 0xd5a79147930aa725
+	.quad		0x06ca6351e003826f, 0x142929670a0e6e70
+	.quad		0x27b70a8546d22ffc, 0x2e1b21385c26c926
+	.quad		0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
+	.quad		0x650a73548baf63de, 0x766a0abb3c77b2a8
+	.quad		0x81c2c92e47edaee6, 0x92722c851482353b
+	.quad		0xa2bfe8a14cf10364, 0xa81a664bbc423001
+	.quad		0xc24b8b70d0f89791, 0xc76c51a30654be30
+	.quad		0xd192e819d6ef5218, 0xd69906245565a910
+	.quad		0xf40e35855771202a, 0x106aa07032bbd1b8
+	.quad		0x19a4c116b8d2d0c8, 0x1e376c085141ab53
+	.quad		0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
+	.quad		0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
+	.quad		0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
+	.quad		0x748f82ee5defb2fc, 0x78a5636f43172f60
+	.quad		0x84c87814a1f0ab72, 0x8cc702081a6439ec
+	.quad		0x90befffa23631e28, 0xa4506cebde82bde9
+	.quad		0xbef9a3f7b2c67915, 0xc67178f2e372532b
+	.quad		0xca273eceea26619c, 0xd186b8c721c0c207
+	.quad		0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
+	.quad		0x06f067aa72176fba, 0x0a637dc5a2c898a6
+	.quad		0x113f9804bef90dae, 0x1b710b35131c471b
+	.quad		0x28db77f523047d84, 0x32caab7b40c72493
+	.quad		0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
+	.quad		0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
+	.quad		0x5fcb6fab3ad6faec, 0x6c44198c4a475817
+
+	.macro		dround, i0, i1, i2, i3, i4, rc0, rc1, in0, in1, in2, in3, in4
+	.ifnb		\rc1
+	ld1		{v\rc1\().2d}, [x4], #16
+	.endif
+	add		v5.2d, v\rc0\().2d, v\in0\().2d
+	ext		v6.16b, v\i2\().16b, v\i3\().16b, #8
+	ext		v5.16b, v5.16b, v5.16b, #8
+	ext		v7.16b, v\i1\().16b, v\i2\().16b, #8
+	add		v\i3\().2d, v\i3\().2d, v5.2d
+	.ifnb		\in1
+	ext		v5.16b, v\in3\().16b, v\in4\().16b, #8
+	sha512su0	v\in0\().2d, v\in1\().2d
+	.endif
+	sha512h		q\i3, q6, v7.2d
+	.ifnb		\in1
+	sha512su1	v\in0\().2d, v\in2\().2d, v5.2d
+	.endif
+	add		v\i4\().2d, v\i1\().2d, v\i3\().2d
+	sha512h2	q\i3, q\i1, v\i0\().2d
+	.endm
+
+	/*
+	 * void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
+	 *			  int blocks)
+	 */
+	.text
+ENTRY(sha512_ce_transform)
+	/* load state */
+	ld1		{v8.2d-v11.2d}, [x0]
+
+	/* load first 4 round constants */
+	adr_l		x3, .Lsha512_rcon
+	ld1		{v20.2d-v23.2d}, [x3], #64
+
+	/* load input */
+0:	ld1		{v12.2d-v15.2d}, [x1], #64
+	ld1		{v16.2d-v19.2d}, [x1], #64
+	sub		w2, w2, #1
+
+CPU_LE(	rev64		v12.16b, v12.16b	)
+CPU_LE(	rev64		v13.16b, v13.16b	)
+CPU_LE(	rev64		v14.16b, v14.16b	)
+CPU_LE(	rev64		v15.16b, v15.16b	)
+CPU_LE(	rev64		v16.16b, v16.16b	)
+CPU_LE(	rev64		v17.16b, v17.16b	)
+CPU_LE(	rev64		v18.16b, v18.16b	)
+CPU_LE(	rev64		v19.16b, v19.16b	)
+
+	mov		x4, x3				// rc pointer
+
+	mov		v0.16b, v8.16b
+	mov		v1.16b, v9.16b
+	mov		v2.16b, v10.16b
+	mov		v3.16b, v11.16b
+
+	// v0  ab  cd  --  ef  gh  ab
+	// v1  cd  --  ef  gh  ab  cd
+	// v2  ef  gh  ab  cd  --  ef
+	// v3  gh  ab  cd  --  ef  gh
+	// v4  --  ef  gh  ab  cd  --
+
+	dround		0, 1, 2, 3, 4, 20, 24, 12, 13, 19, 16, 17
+	dround		3, 0, 4, 2, 1, 21, 25, 13, 14, 12, 17, 18
+	dround		2, 3, 1, 4, 0, 22, 26, 14, 15, 13, 18, 19
+	dround		4, 2, 0, 1, 3, 23, 27, 15, 16, 14, 19, 12
+	dround		1, 4, 3, 0, 2, 24, 28, 16, 17, 15, 12, 13
+
+	dround		0, 1, 2, 3, 4, 25, 29, 17, 18, 16, 13, 14
+	dround		3, 0, 4, 2, 1, 26, 30, 18, 19, 17, 14, 15
+	dround		2, 3, 1, 4, 0, 27, 31, 19, 12, 18, 15, 16
+	dround		4, 2, 0, 1, 3, 28, 24, 12, 13, 19, 16, 17
+	dround		1, 4, 3, 0, 2, 29, 25, 13, 14, 12, 17, 18
+
+	dround		0, 1, 2, 3, 4, 30, 26, 14, 15, 13, 18, 19
+	dround		3, 0, 4, 2, 1, 31, 27, 15, 16, 14, 19, 12
+	dround		2, 3, 1, 4, 0, 24, 28, 16, 17, 15, 12, 13
+	dround		4, 2, 0, 1, 3, 25, 29, 17, 18, 16, 13, 14
+	dround		1, 4, 3, 0, 2, 26, 30, 18, 19, 17, 14, 15
+
+	dround		0, 1, 2, 3, 4, 27, 31, 19, 12, 18, 15, 16
+	dround		3, 0, 4, 2, 1, 28, 24, 12, 13, 19, 16, 17
+	dround		2, 3, 1, 4, 0, 29, 25, 13, 14, 12, 17, 18
+	dround		4, 2, 0, 1, 3, 30, 26, 14, 15, 13, 18, 19
+	dround		1, 4, 3, 0, 2, 31, 27, 15, 16, 14, 19, 12
+
+	dround		0, 1, 2, 3, 4, 24, 28, 16, 17, 15, 12, 13
+	dround		3, 0, 4, 2, 1, 25, 29, 17, 18, 16, 13, 14
+	dround		2, 3, 1, 4, 0, 26, 30, 18, 19, 17, 14, 15
+	dround		4, 2, 0, 1, 3, 27, 31, 19, 12, 18, 15, 16
+	dround		1, 4, 3, 0, 2, 28, 24, 12, 13, 19, 16, 17
+
+	dround		0, 1, 2, 3, 4, 29, 25, 13, 14, 12, 17, 18
+	dround		3, 0, 4, 2, 1, 30, 26, 14, 15, 13, 18, 19
+	dround		2, 3, 1, 4, 0, 31, 27, 15, 16, 14, 19, 12
+	dround		4, 2, 0, 1, 3, 24, 28, 16, 17, 15, 12, 13
+	dround		1, 4, 3, 0, 2, 25, 29, 17, 18, 16, 13, 14
+
+	dround		0, 1, 2, 3, 4, 26, 30, 18, 19, 17, 14, 15
+	dround		3, 0, 4, 2, 1, 27, 31, 19, 12, 18, 15, 16
+	dround		2, 3, 1, 4, 0, 28, 24, 12
+	dround		4, 2, 0, 1, 3, 29, 25, 13
+	dround		1, 4, 3, 0, 2, 30, 26, 14
+
+	dround		0, 1, 2, 3, 4, 31, 27, 15
+	dround		3, 0, 4, 2, 1, 24,   , 16
+	dround		2, 3, 1, 4, 0, 25,   , 17
+	dround		4, 2, 0, 1, 3, 26,   , 18
+	dround		1, 4, 3, 0, 2, 27,   , 19
+
+	/* update state */
+	add		v8.2d, v8.2d, v0.2d
+	add		v9.2d, v9.2d, v1.2d
+	add		v10.2d, v10.2d, v2.2d
+	add		v11.2d, v11.2d, v3.2d
+
+	/* handled all input blocks? */
+	cbnz		w2, 0b
+
+	/* store new state */
+3:	st1		{v8.2d-v11.2d}, [x0]
+	ret
+ENDPROC(sha512_ce_transform)

+ 119 - 0
arch/arm64/crypto/sha512-ce-glue.c

@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sha512-ce-glue.c - SHA-384/SHA-512 using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha512_base.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
+				    int blocks);
+
+asmlinkage void sha512_block_data_order(u64 *digest, u8 const *src, int blocks);
+
+static int sha512_ce_update(struct shash_desc *desc, const u8 *data,
+			    unsigned int len)
+{
+	if (!may_use_simd())
+		return sha512_base_do_update(desc, data, len,
+				(sha512_block_fn *)sha512_block_data_order);
+
+	kernel_neon_begin();
+	sha512_base_do_update(desc, data, len,
+			      (sha512_block_fn *)sha512_ce_transform);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int sha512_ce_finup(struct shash_desc *desc, const u8 *data,
+			   unsigned int len, u8 *out)
+{
+	if (!may_use_simd()) {
+		if (len)
+			sha512_base_do_update(desc, data, len,
+				(sha512_block_fn *)sha512_block_data_order);
+		sha512_base_do_finalize(desc,
+				(sha512_block_fn *)sha512_block_data_order);
+		return sha512_base_finish(desc, out);
+	}
+
+	kernel_neon_begin();
+	sha512_base_do_update(desc, data, len,
+			      (sha512_block_fn *)sha512_ce_transform);
+	sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_ce_transform);
+	kernel_neon_end();
+	return sha512_base_finish(desc, out);
+}
+
+static int sha512_ce_final(struct shash_desc *desc, u8 *out)
+{
+	if (!may_use_simd()) {
+		sha512_base_do_finalize(desc,
+				(sha512_block_fn *)sha512_block_data_order);
+		return sha512_base_finish(desc, out);
+	}
+
+	kernel_neon_begin();
+	sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_ce_transform);
+	kernel_neon_end();
+	return sha512_base_finish(desc, out);
+}
+
+static struct shash_alg algs[] = { {
+	.init			= sha384_base_init,
+	.update			= sha512_ce_update,
+	.final			= sha512_ce_final,
+	.finup			= sha512_ce_finup,
+	.descsize		= sizeof(struct sha512_state),
+	.digestsize		= SHA384_DIGEST_SIZE,
+	.base.cra_name		= "sha384",
+	.base.cra_driver_name	= "sha384-ce",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA512_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+}, {
+	.init			= sha512_base_init,
+	.update			= sha512_ce_update,
+	.final			= sha512_ce_final,
+	.finup			= sha512_ce_finup,
+	.descsize		= sizeof(struct sha512_state),
+	.digestsize		= SHA512_DIGEST_SIZE,
+	.base.cra_name		= "sha512",
+	.base.cra_driver_name	= "sha512-ce",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA512_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+} };
+
+static int __init sha512_ce_mod_init(void)
+{
+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha512_ce_mod_fini(void)
+{
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_cpu_feature_match(SHA512, sha512_ce_mod_init);
+module_exit(sha512_ce_mod_fini);

+ 1 - 0
arch/arm64/crypto/sha512-glue.c

@@ -27,6 +27,7 @@ MODULE_ALIAS_CRYPTO("sha512");
 
 asmlinkage void sha512_block_data_order(u32 *digest, const void *data,
 					unsigned int num_blks);
+EXPORT_SYMBOL(sha512_block_data_order);
 
 static int sha512_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)

+ 141 - 0
arch/arm64/crypto/sm3-ce-core.S

@@ -0,0 +1,141 @@
+/*
+ * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
+ *
+ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.irp		b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
+	.set		.Lv\b\().4s, \b
+	.endr
+
+	.macro		sm3partw1, rd, rn, rm
+	.inst		0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
+	.endm
+
+	.macro		sm3partw2, rd, rn, rm
+	.inst		0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
+	.endm
+
+	.macro		sm3ss1, rd, rn, rm, ra
+	.inst		0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
+	.endm
+
+	.macro		sm3tt1a, rd, rn, rm, imm2
+	.inst		0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
+	.endm
+
+	.macro		sm3tt1b, rd, rn, rm, imm2
+	.inst		0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
+	.endm
+
+	.macro		sm3tt2a, rd, rn, rm, imm2
+	.inst		0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
+	.endm
+
+	.macro		sm3tt2b, rd, rn, rm, imm2
+	.inst		0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
+	.endm
+
+	.macro		round, ab, s0, t0, t1, i
+	sm3ss1		v5.4s, v8.4s, \t0\().4s, v9.4s
+	shl		\t1\().4s, \t0\().4s, #1
+	sri		\t1\().4s, \t0\().4s, #31
+	sm3tt1\ab	v8.4s, v5.4s, v10.4s, \i
+	sm3tt2\ab	v9.4s, v5.4s, \s0\().4s, \i
+	.endm
+
+	.macro		qround, ab, s0, s1, s2, s3, s4
+	.ifnb		\s4
+	ext		\s4\().16b, \s1\().16b, \s2\().16b, #12
+	ext		v6.16b, \s0\().16b, \s1\().16b, #12
+	ext		v7.16b, \s2\().16b, \s3\().16b, #8
+	sm3partw1	\s4\().4s, \s0\().4s, \s3\().4s
+	.endif
+
+	eor		v10.16b, \s0\().16b, \s1\().16b
+
+	round		\ab, \s0, v11, v12, 0
+	round		\ab, \s0, v12, v11, 1
+	round		\ab, \s0, v11, v12, 2
+	round		\ab, \s0, v12, v11, 3
+
+	.ifnb		\s4
+	sm3partw2	\s4\().4s, v7.4s, v6.4s
+	.endif
+	.endm
+
+	/*
+	 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
+	 *                       int blocks)
+	 */
+	.text
+ENTRY(sm3_ce_transform)
+	/* load state */
+	ld1		{v8.4s-v9.4s}, [x0]
+	rev64		v8.4s, v8.4s
+	rev64		v9.4s, v9.4s
+	ext		v8.16b, v8.16b, v8.16b, #8
+	ext		v9.16b, v9.16b, v9.16b, #8
+
+	adr_l		x8, .Lt
+	ldp		s13, s14, [x8]
+
+	/* load input */
+0:	ld1		{v0.16b-v3.16b}, [x1], #64
+	sub		w2, w2, #1
+
+	mov		v15.16b, v8.16b
+	mov		v16.16b, v9.16b
+
+CPU_LE(	rev32		v0.16b, v0.16b		)
+CPU_LE(	rev32		v1.16b, v1.16b		)
+CPU_LE(	rev32		v2.16b, v2.16b		)
+CPU_LE(	rev32		v3.16b, v3.16b		)
+
+	ext		v11.16b, v13.16b, v13.16b, #4
+
+	qround		a, v0, v1, v2, v3, v4
+	qround		a, v1, v2, v3, v4, v0
+	qround		a, v2, v3, v4, v0, v1
+	qround		a, v3, v4, v0, v1, v2
+
+	ext		v11.16b, v14.16b, v14.16b, #4
+
+	qround		b, v4, v0, v1, v2, v3
+	qround		b, v0, v1, v2, v3, v4
+	qround		b, v1, v2, v3, v4, v0
+	qround		b, v2, v3, v4, v0, v1
+	qround		b, v3, v4, v0, v1, v2
+	qround		b, v4, v0, v1, v2, v3
+	qround		b, v0, v1, v2, v3, v4
+	qround		b, v1, v2, v3, v4, v0
+	qround		b, v2, v3, v4, v0, v1
+	qround		b, v3, v4
+	qround		b, v4, v0
+	qround		b, v0, v1
+
+	eor		v8.16b, v8.16b, v15.16b
+	eor		v9.16b, v9.16b, v16.16b
+
+	/* handled all input blocks? */
+	cbnz		w2, 0b
+
+	/* save state */
+	rev64		v8.4s, v8.4s
+	rev64		v9.4s, v9.4s
+	ext		v8.16b, v8.16b, v8.16b, #8
+	ext		v9.16b, v9.16b, v9.16b, #8
+	st1		{v8.4s-v9.4s}, [x0]
+	ret
+ENDPROC(sm3_ce_transform)
+
+	.section	".rodata", "a"
+	.align		3
+.Lt:	.word		0x79cc4519, 0x9d8a7a87

+ 92 - 0
arch/arm64/crypto/sm3-ce-glue.c

@@ -0,0 +1,92 @@
+/*
+ * sm3-ce-glue.c - SM3 secure hash using ARMv8.2 Crypto Extensions
+ *
+ * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sm3.h>
+#include <crypto/sm3_base.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("SM3 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
+				 int blocks);
+
+static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int len)
+{
+	if (!may_use_simd())
+		return crypto_sm3_update(desc, data, len);
+
+	kernel_neon_begin();
+	sm3_base_do_update(desc, data, len, sm3_ce_transform);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int sm3_ce_final(struct shash_desc *desc, u8 *out)
+{
+	if (!may_use_simd())
+		return crypto_sm3_finup(desc, NULL, 0, out);
+
+	kernel_neon_begin();
+	sm3_base_do_finalize(desc, sm3_ce_transform);
+	kernel_neon_end();
+
+	return sm3_base_finish(desc, out);
+}
+
+static int sm3_ce_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	if (!may_use_simd())
+		return crypto_sm3_finup(desc, data, len, out);
+
+	kernel_neon_begin();
+	sm3_base_do_update(desc, data, len, sm3_ce_transform);
+	kernel_neon_end();
+
+	return sm3_ce_final(desc, out);
+}
+
+static struct shash_alg sm3_alg = {
+	.digestsize		= SM3_DIGEST_SIZE,
+	.init			= sm3_base_init,
+	.update			= sm3_ce_update,
+	.final			= sm3_ce_final,
+	.finup			= sm3_ce_finup,
+	.descsize		= sizeof(struct sm3_state),
+	.base.cra_name		= "sm3",
+	.base.cra_driver_name	= "sm3-ce",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SM3_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_priority	= 200,
+};
+
+static int __init sm3_ce_mod_init(void)
+{
+	return crypto_register_shash(&sm3_alg);
+}
+
+static void __exit sm3_ce_mod_fini(void)
+{
+	crypto_unregister_shash(&sm3_alg);
+}
+
+module_cpu_feature_match(SM3, sm3_ce_mod_init);
+module_exit(sm3_ce_mod_fini);

+ 1 - 0
arch/powerpc/crypto/crc32c-vpmsum_glue.c

@@ -141,6 +141,7 @@ static struct shash_alg alg = {
 		.cra_name		= "crc32c",
 		.cra_driver_name	= "crc32c-vpmsum",
 		.cra_priority		= 200,
+		.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(u32),
 		.cra_module		= THIS_MODULE,

+ 3 - 0
arch/s390/crypto/crc32-vx.c

@@ -239,6 +239,7 @@ static struct shash_alg crc32_vx_algs[] = {
 			.cra_name	 = "crc32",
 			.cra_driver_name = "crc32-vx",
 			.cra_priority	 = 200,
+			.cra_flags	 = CRYPTO_ALG_OPTIONAL_KEY,
 			.cra_blocksize	 = CRC32_BLOCK_SIZE,
 			.cra_ctxsize	 = sizeof(struct crc_ctx),
 			.cra_module	 = THIS_MODULE,
@@ -259,6 +260,7 @@ static struct shash_alg crc32_vx_algs[] = {
 			.cra_name	 = "crc32be",
 			.cra_driver_name = "crc32be-vx",
 			.cra_priority	 = 200,
+			.cra_flags	 = CRYPTO_ALG_OPTIONAL_KEY,
 			.cra_blocksize	 = CRC32_BLOCK_SIZE,
 			.cra_ctxsize	 = sizeof(struct crc_ctx),
 			.cra_module	 = THIS_MODULE,
@@ -279,6 +281,7 @@ static struct shash_alg crc32_vx_algs[] = {
 			.cra_name	 = "crc32c",
 			.cra_driver_name = "crc32c-vx",
 			.cra_priority	 = 200,
+			.cra_flags	 = CRYPTO_ALG_OPTIONAL_KEY,
 			.cra_blocksize	 = CRC32_BLOCK_SIZE,
 			.cra_ctxsize	 = sizeof(struct crc_ctx),
 			.cra_module	 = THIS_MODULE,

+ 1 - 0
arch/sparc/crypto/crc32c_glue.c

@@ -133,6 +133,7 @@ static struct shash_alg alg = {
 		.cra_name		=	"crc32c",
 		.cra_driver_name	=	"crc32c-sparc64",
 		.cra_priority		=	SPARC_CR_OPCODE_PRIORITY,
+		.cra_flags		=	CRYPTO_ALG_OPTIONAL_KEY,
 		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
 		.cra_ctxsize		=	sizeof(u32),
 		.cra_alignmask		=	7,

+ 57 - 142
arch/x86/crypto/aesni-intel_asm.S

@@ -90,30 +90,6 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
 ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
             .octa 0x00000000000000000000000000000000
 
-.section .rodata
-.align 16
-.type aad_shift_arr, @object
-.size aad_shift_arr, 272
-aad_shift_arr:
-        .octa     0xffffffffffffffffffffffffffffffff
-        .octa     0xffffffffffffffffffffffffffffff0C
-        .octa     0xffffffffffffffffffffffffffff0D0C
-        .octa     0xffffffffffffffffffffffffff0E0D0C
-        .octa     0xffffffffffffffffffffffff0F0E0D0C
-        .octa     0xffffffffffffffffffffff0C0B0A0908
-        .octa     0xffffffffffffffffffff0D0C0B0A0908
-        .octa     0xffffffffffffffffff0E0D0C0B0A0908
-        .octa     0xffffffffffffffff0F0E0D0C0B0A0908
-        .octa     0xffffffffffffff0C0B0A090807060504
-        .octa     0xffffffffffff0D0C0B0A090807060504
-        .octa     0xffffffffff0E0D0C0B0A090807060504
-        .octa     0xffffffff0F0E0D0C0B0A090807060504
-        .octa     0xffffff0C0B0A09080706050403020100
-        .octa     0xffff0D0C0B0A09080706050403020100
-        .octa     0xff0E0D0C0B0A09080706050403020100
-        .octa     0x0F0E0D0C0B0A09080706050403020100
-
-
 .text
 
 
@@ -257,6 +233,37 @@ aad_shift_arr:
 	pxor      \TMP1, \GH            # result is in TMP1
 .endm
 
+# Reads DLEN bytes starting at DPTR and stores in XMMDst
+# where 0 < DLEN < 16
+# Clobbers %rax, DLEN and XMM1
+.macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst
+        cmp $8, \DLEN
+        jl _read_lt8_\@
+        mov (\DPTR), %rax
+        MOVQ_R64_XMM %rax, \XMMDst
+        sub $8, \DLEN
+        jz _done_read_partial_block_\@
+	xor %eax, %eax
+_read_next_byte_\@:
+        shl $8, %rax
+        mov 7(\DPTR, \DLEN, 1), %al
+        dec \DLEN
+        jnz _read_next_byte_\@
+        MOVQ_R64_XMM %rax, \XMM1
+	pslldq $8, \XMM1
+        por \XMM1, \XMMDst
+	jmp _done_read_partial_block_\@
+_read_lt8_\@:
+	xor %eax, %eax
+_read_next_byte_lt8_\@:
+        shl $8, %rax
+        mov -1(\DPTR, \DLEN, 1), %al
+        dec \DLEN
+        jnz _read_next_byte_lt8_\@
+        MOVQ_R64_XMM %rax, \XMMDst
+_done_read_partial_block_\@:
+.endm
+
 /*
 * if a = number of total plaintext bytes
 * b = floor(a/16)
@@ -273,62 +280,30 @@ aad_shift_arr:
 XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
         MOVADQ     SHUF_MASK(%rip), %xmm14
 	mov	   arg7, %r10           # %r10 = AAD
-	mov	   arg8, %r12           # %r12 = aadLen
-	mov	   %r12, %r11
+	mov	   arg8, %r11           # %r11 = aadLen
 	pxor	   %xmm\i, %xmm\i
 	pxor       \XMM2, \XMM2
 
 	cmp	   $16, %r11
-	jl	   _get_AAD_rest8\num_initial_blocks\operation
+	jl	   _get_AAD_rest\num_initial_blocks\operation
 _get_AAD_blocks\num_initial_blocks\operation:
 	movdqu	   (%r10), %xmm\i
 	PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
 	pxor	   %xmm\i, \XMM2
 	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
 	add	   $16, %r10
-	sub	   $16, %r12
 	sub	   $16, %r11
 	cmp	   $16, %r11
 	jge	   _get_AAD_blocks\num_initial_blocks\operation
 
 	movdqu	   \XMM2, %xmm\i
+
+	/* read the last <16B of AAD */
+_get_AAD_rest\num_initial_blocks\operation:
 	cmp	   $0, %r11
 	je	   _get_AAD_done\num_initial_blocks\operation
 
-	pxor	   %xmm\i,%xmm\i
-
-	/* read the last <16B of AAD. since we have at least 4B of
-	data right after the AAD (the ICV, and maybe some CT), we can
-	read 4B/8B blocks safely, and then get rid of the extra stuff */
-_get_AAD_rest8\num_initial_blocks\operation:
-	cmp	   $4, %r11
-	jle	   _get_AAD_rest4\num_initial_blocks\operation
-	movq	   (%r10), \TMP1
-	add	   $8, %r10
-	sub	   $8, %r11
-	pslldq	   $8, \TMP1
-	psrldq	   $8, %xmm\i
-	pxor	   \TMP1, %xmm\i
-	jmp	   _get_AAD_rest8\num_initial_blocks\operation
-_get_AAD_rest4\num_initial_blocks\operation:
-	cmp	   $0, %r11
-	jle	   _get_AAD_rest0\num_initial_blocks\operation
-	mov	   (%r10), %eax
-	movq	   %rax, \TMP1
-	add	   $4, %r10
-	sub	   $4, %r10
-	pslldq	   $12, \TMP1
-	psrldq	   $4, %xmm\i
-	pxor	   \TMP1, %xmm\i
-_get_AAD_rest0\num_initial_blocks\operation:
-	/* finalize: shift out the extra bytes we read, and align
-	left. since pslldq can only shift by an immediate, we use
-	vpshufb and an array of shuffle masks */
-	movq	   %r12, %r11
-	salq	   $4, %r11
-	movdqu	   aad_shift_arr(%r11), \TMP1
-	PSHUFB_XMM \TMP1, %xmm\i
-_get_AAD_rest_final\num_initial_blocks\operation:
+	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
 	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
 	pxor	   \XMM2, %xmm\i
 	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
@@ -532,62 +507,30 @@ _initial_blocks_done\num_initial_blocks\operation:
 XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
         MOVADQ     SHUF_MASK(%rip), %xmm14
 	mov	   arg7, %r10           # %r10 = AAD
-	mov	   arg8, %r12           # %r12 = aadLen
-	mov	   %r12, %r11
+	mov	   arg8, %r11           # %r11 = aadLen
 	pxor	   %xmm\i, %xmm\i
 	pxor	   \XMM2, \XMM2
 
 	cmp	   $16, %r11
-	jl	   _get_AAD_rest8\num_initial_blocks\operation
+	jl	   _get_AAD_rest\num_initial_blocks\operation
 _get_AAD_blocks\num_initial_blocks\operation:
 	movdqu	   (%r10), %xmm\i
 	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
 	pxor	   %xmm\i, \XMM2
 	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
 	add	   $16, %r10
-	sub	   $16, %r12
 	sub	   $16, %r11
 	cmp	   $16, %r11
 	jge	   _get_AAD_blocks\num_initial_blocks\operation
 
 	movdqu	   \XMM2, %xmm\i
+
+	/* read the last <16B of AAD */
+_get_AAD_rest\num_initial_blocks\operation:
 	cmp	   $0, %r11
 	je	   _get_AAD_done\num_initial_blocks\operation
 
-	pxor	   %xmm\i,%xmm\i
-
-	/* read the last <16B of AAD. since we have at least 4B of
-	data right after the AAD (the ICV, and maybe some PT), we can
-	read 4B/8B blocks safely, and then get rid of the extra stuff */
-_get_AAD_rest8\num_initial_blocks\operation:
-	cmp	   $4, %r11
-	jle	   _get_AAD_rest4\num_initial_blocks\operation
-	movq	   (%r10), \TMP1
-	add	   $8, %r10
-	sub	   $8, %r11
-	pslldq	   $8, \TMP1
-	psrldq	   $8, %xmm\i
-	pxor	   \TMP1, %xmm\i
-	jmp	   _get_AAD_rest8\num_initial_blocks\operation
-_get_AAD_rest4\num_initial_blocks\operation:
-	cmp	   $0, %r11
-	jle	   _get_AAD_rest0\num_initial_blocks\operation
-	mov	   (%r10), %eax
-	movq	   %rax, \TMP1
-	add	   $4, %r10
-	sub	   $4, %r10
-	pslldq	   $12, \TMP1
-	psrldq	   $4, %xmm\i
-	pxor	   \TMP1, %xmm\i
-_get_AAD_rest0\num_initial_blocks\operation:
-	/* finalize: shift out the extra bytes we read, and align
-	left. since pslldq can only shift by an immediate, we use
-	vpshufb and an array of shuffle masks */
-	movq	   %r12, %r11
-	salq	   $4, %r11
-	movdqu	   aad_shift_arr(%r11), \TMP1
-	PSHUFB_XMM \TMP1, %xmm\i
-_get_AAD_rest_final\num_initial_blocks\operation:
+	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
 	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
 	pxor	   \XMM2, %xmm\i
 	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
@@ -1386,14 +1329,6 @@ _esb_loop_\@:
 *
 *                        AAD Format with 64-bit Extended Sequence Number
 *
-* aadLen:
-*       from the definition of the spec, aadLen can only be 8 or 12 bytes.
-*       The code supports 16 too but for other sizes, the code will fail.
-*
-* TLen:
-*       from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
-*       For other sizes, the code will fail.
-*
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 *
 *****************************************************************************/
@@ -1487,19 +1422,16 @@ _zero_cipher_left_decrypt:
 	PSHUFB_XMM %xmm10, %xmm0
 
 	ENCRYPT_SINGLE_BLOCK  %xmm0, %xmm1    # E(K, Yn)
-	sub $16, %r11
-	add %r13, %r11
-	movdqu (%arg3,%r11,1), %xmm1   # receive the last <16 byte block
-	lea SHIFT_MASK+16(%rip), %r12
-	sub %r13, %r12
-# adjust the shuffle mask pointer to be able to shift 16-%r13 bytes
-# (%r13 is the number of bytes in plaintext mod 16)
-	movdqu (%r12), %xmm2           # get the appropriate shuffle mask
-	PSHUFB_XMM %xmm2, %xmm1            # right shift 16-%r13 butes
 
+	lea (%arg3,%r11,1), %r10
+	mov %r13, %r12
+	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
+
+	lea ALL_F+16(%rip), %r12
+	sub %r13, %r12
 	movdqa  %xmm1, %xmm2
 	pxor %xmm1, %xmm0            # Ciphertext XOR E(K, Yn)
-	movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
+	movdqu (%r12), %xmm1
 	# get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
 	pand %xmm1, %xmm0            # mask out top 16-%r13 bytes of %xmm0
 	pand    %xmm1, %xmm2
@@ -1508,9 +1440,6 @@ _zero_cipher_left_decrypt:
 
 	pxor %xmm2, %xmm8
 	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-	          # GHASH computation for the last <16 byte block
-	sub %r13, %r11
-	add $16, %r11
 
         # output %r13 bytes
 	MOVQ_R64_XMM	%xmm0, %rax
@@ -1664,14 +1593,6 @@ ENDPROC(aesni_gcm_dec)
 *
 *                         AAD Format with 64-bit Extended Sequence Number
 *
-* aadLen:
-*       from the definition of the spec, aadLen can only be 8 or 12 bytes.
-*       The code supports 16 too but for other sizes, the code will fail.
-*
-* TLen:
-*       from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
-*       For other sizes, the code will fail.
-*
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 ***************************************************************************/
 ENTRY(aesni_gcm_enc)
@@ -1764,19 +1685,16 @@ _zero_cipher_left_encrypt:
         movdqa SHUF_MASK(%rip), %xmm10
 	PSHUFB_XMM %xmm10, %xmm0
 
-
 	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm1        # Encrypt(K, Yn)
-	sub $16, %r11
-	add %r13, %r11
-	movdqu (%arg3,%r11,1), %xmm1     # receive the last <16 byte blocks
-	lea SHIFT_MASK+16(%rip), %r12
+
+	lea (%arg3,%r11,1), %r10
+	mov %r13, %r12
+	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
+
+	lea ALL_F+16(%rip), %r12
 	sub %r13, %r12
-	# adjust the shuffle mask pointer to be able to shift 16-r13 bytes
-	# (%r13 is the number of bytes in plaintext mod 16)
-	movdqu	(%r12), %xmm2           # get the appropriate shuffle mask
-	PSHUFB_XMM	%xmm2, %xmm1            # shift right 16-r13 byte
 	pxor	%xmm1, %xmm0            # Plaintext XOR Encrypt(K, Yn)
-	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
+	movdqu	(%r12), %xmm1
 	# get the appropriate mask to mask out top 16-r13 bytes of xmm0
 	pand	%xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
         movdqa SHUF_MASK(%rip), %xmm10
@@ -1785,9 +1703,6 @@ _zero_cipher_left_encrypt:
 	pxor	%xmm0, %xmm8
 	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
 	# GHASH computation for the last <16 byte block
-	sub	%r13, %r11
-	add	$16, %r11
-
 	movdqa SHUF_MASK(%rip), %xmm10
 	PSHUFB_XMM %xmm10, %xmm0
 

+ 56 - 14
arch/x86/crypto/aesni-intel_glue.c

@@ -690,8 +690,8 @@ static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
 	       rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
 }
 
-static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
-			   unsigned int key_len)
+static int gcmaes_wrapper_set_key(struct crypto_aead *parent, const u8 *key,
+				  unsigned int key_len)
 {
 	struct cryptd_aead **ctx = crypto_aead_ctx(parent);
 	struct cryptd_aead *cryptd_tfm = *ctx;
@@ -716,8 +716,8 @@ static int common_rfc4106_set_authsize(struct crypto_aead *aead,
 
 /* This is the Integrity Check Value (aka the authentication tag length and can
  * be 8, 12 or 16 bytes long. */
-static int rfc4106_set_authsize(struct crypto_aead *parent,
-				unsigned int authsize)
+static int gcmaes_wrapper_set_authsize(struct crypto_aead *parent,
+				       unsigned int authsize)
 {
 	struct cryptd_aead **ctx = crypto_aead_ctx(parent);
 	struct cryptd_aead *cryptd_tfm = *ctx;
@@ -824,7 +824,7 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
 	if (sg_is_last(req->src) &&
 	    (!PageHighMem(sg_page(req->src)) ||
 	    req->src->offset + req->src->length <= PAGE_SIZE) &&
-	    sg_is_last(req->dst) &&
+	    sg_is_last(req->dst) && req->dst->length &&
 	    (!PageHighMem(sg_page(req->dst)) ||
 	    req->dst->offset + req->dst->length <= PAGE_SIZE)) {
 		one_entry_in_sg = 1;
@@ -929,7 +929,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
 			      aes_ctx);
 }
 
-static int rfc4106_encrypt(struct aead_request *req)
+static int gcmaes_wrapper_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
@@ -945,7 +945,7 @@ static int rfc4106_encrypt(struct aead_request *req)
 	return crypto_aead_encrypt(req);
 }
 
-static int rfc4106_decrypt(struct aead_request *req)
+static int gcmaes_wrapper_decrypt(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
@@ -1117,7 +1117,7 @@ static int generic_gcmaes_decrypt(struct aead_request *req)
 {
 	__be32 counter = cpu_to_be32(1);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
 	void *aes_ctx = &(ctx->aes_key_expanded);
 	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
 
@@ -1128,6 +1128,30 @@ static int generic_gcmaes_decrypt(struct aead_request *req)
 			      aes_ctx);
 }
 
+static int generic_gcmaes_init(struct crypto_aead *aead)
+{
+	struct cryptd_aead *cryptd_tfm;
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_tfm = cryptd_alloc_aead("__driver-generic-gcm-aes-aesni",
+				       CRYPTO_ALG_INTERNAL,
+				       CRYPTO_ALG_INTERNAL);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+
+	*ctx = cryptd_tfm;
+	crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
+
+	return 0;
+}
+
+static void generic_gcmaes_exit(struct crypto_aead *aead)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_free_aead(*ctx);
+}
+
 static struct aead_alg aesni_aead_algs[] = { {
 	.setkey			= common_rfc4106_set_key,
 	.setauthsize		= common_rfc4106_set_authsize,
@@ -1147,10 +1171,10 @@ static struct aead_alg aesni_aead_algs[] = { {
 }, {
 	.init			= rfc4106_init,
 	.exit			= rfc4106_exit,
-	.setkey			= rfc4106_set_key,
-	.setauthsize		= rfc4106_set_authsize,
-	.encrypt		= rfc4106_encrypt,
-	.decrypt		= rfc4106_decrypt,
+	.setkey			= gcmaes_wrapper_set_key,
+	.setauthsize		= gcmaes_wrapper_set_authsize,
+	.encrypt		= gcmaes_wrapper_encrypt,
+	.decrypt		= gcmaes_wrapper_decrypt,
 	.ivsize			= GCM_RFC4106_IV_SIZE,
 	.maxauthsize		= 16,
 	.base = {
@@ -1169,14 +1193,32 @@ static struct aead_alg aesni_aead_algs[] = { {
 	.decrypt		= generic_gcmaes_decrypt,
 	.ivsize			= GCM_AES_IV_SIZE,
 	.maxauthsize		= 16,
+	.base = {
+		.cra_name		= "__generic-gcm-aes-aesni",
+		.cra_driver_name	= "__driver-generic-gcm-aes-aesni",
+		.cra_priority		= 0,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct generic_gcmaes_ctx),
+		.cra_alignmask		= AESNI_ALIGN - 1,
+		.cra_module		= THIS_MODULE,
+	},
+}, {
+	.init			= generic_gcmaes_init,
+	.exit			= generic_gcmaes_exit,
+	.setkey			= gcmaes_wrapper_set_key,
+	.setauthsize		= gcmaes_wrapper_set_authsize,
+	.encrypt		= gcmaes_wrapper_encrypt,
+	.decrypt		= gcmaes_wrapper_decrypt,
+	.ivsize			= GCM_AES_IV_SIZE,
+	.maxauthsize		= 16,
 	.base = {
 		.cra_name		= "gcm(aes)",
 		.cra_driver_name	= "generic-gcm-aesni",
 		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_ASYNC,
 		.cra_blocksize		= 1,
-		.cra_ctxsize		= sizeof(struct generic_gcmaes_ctx),
-		.cra_alignmask		= AESNI_ALIGN - 1,
+		.cra_ctxsize		= sizeof(struct cryptd_aead *),
 		.cra_module		= THIS_MODULE,
 	},
 } };

+ 0 - 1
arch/x86/crypto/chacha20_glue.c

@@ -107,7 +107,6 @@ static struct skcipher_alg alg = {
 	.base.cra_priority	= 300,
 	.base.cra_blocksize	= 1,
 	.base.cra_ctxsize	= sizeof(struct chacha20_ctx),
-	.base.cra_alignmask	= sizeof(u32) - 1,
 	.base.cra_module	= THIS_MODULE,
 
 	.min_keysize		= CHACHA20_KEY_SIZE,

+ 1 - 0
arch/x86/crypto/crc32-pclmul_glue.c

@@ -162,6 +162,7 @@ static struct shash_alg alg = {
 			.cra_name		= "crc32",
 			.cra_driver_name	= "crc32-pclmul",
 			.cra_priority		= 200,
+			.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 			.cra_blocksize		= CHKSUM_BLOCK_SIZE,
 			.cra_ctxsize		= sizeof(u32),
 			.cra_module		= THIS_MODULE,

+ 1 - 0
arch/x86/crypto/crc32c-intel_glue.c

@@ -226,6 +226,7 @@ static struct shash_alg alg = {
 		.cra_name		=	"crc32c",
 		.cra_driver_name	=	"crc32c-intel",
 		.cra_priority		=	200,
+		.cra_flags		=	CRYPTO_ALG_OPTIONAL_KEY,
 		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
 		.cra_ctxsize		=	sizeof(u32),
 		.cra_module		=	THIS_MODULE,

+ 0 - 2
arch/x86/crypto/poly1305_glue.c

@@ -164,14 +164,12 @@ static struct shash_alg alg = {
 	.init		= poly1305_simd_init,
 	.update		= poly1305_simd_update,
 	.final		= crypto_poly1305_final,
-	.setkey		= crypto_poly1305_setkey,
 	.descsize	= sizeof(struct poly1305_simd_desc_ctx),
 	.base		= {
 		.cra_name		= "poly1305",
 		.cra_driver_name	= "poly1305-simd",
 		.cra_priority		= 300,
 		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
-		.cra_alignmask		= sizeof(u32) - 1,
 		.cra_blocksize		= POLY1305_BLOCK_SIZE,
 		.cra_module		= THIS_MODULE,
 	},

+ 4 - 180
arch/x86/crypto/salsa20-i586-asm_32.S

@@ -1,6 +1,7 @@
-# salsa20_pm.s version 20051229
-# D. J. Bernstein
-# Public domain.
+# Derived from:
+#	salsa20_pm.s version 20051229
+#	D. J. Bernstein
+#	Public domain.
 
 #include <linux/linkage.h>
 
@@ -935,180 +936,3 @@ ENTRY(salsa20_encrypt_bytes)
 	# goto bytesatleast1
 	jmp	._bytesatleast1
 ENDPROC(salsa20_encrypt_bytes)
-
-# enter salsa20_keysetup
-ENTRY(salsa20_keysetup)
-	mov	%esp,%eax
-	and	$31,%eax
-	add	$256,%eax
-	sub	%eax,%esp
-	#   eax_stack = eax
-	movl	%eax,64(%esp)
-	#   ebx_stack = ebx
-	movl	%ebx,68(%esp)
-	#   esi_stack = esi
-	movl	%esi,72(%esp)
-	#   edi_stack = edi
-	movl	%edi,76(%esp)
-	#   ebp_stack = ebp
-	movl	%ebp,80(%esp)
-	#   k = arg2
-	movl	8(%esp,%eax),%ecx
-	#   kbits = arg3
-	movl	12(%esp,%eax),%edx
-	#   x = arg1
-	movl	4(%esp,%eax),%eax
-	#   in1 = *(uint32 *) (k + 0)
-	movl	0(%ecx),%ebx
-	#   in2 = *(uint32 *) (k + 4)
-	movl	4(%ecx),%esi
-	#   in3 = *(uint32 *) (k + 8)
-	movl	8(%ecx),%edi
-	#   in4 = *(uint32 *) (k + 12)
-	movl	12(%ecx),%ebp
-	#   *(uint32 *) (x + 4) = in1
-	movl	%ebx,4(%eax)
-	#   *(uint32 *) (x + 8) = in2
-	movl	%esi,8(%eax)
-	#   *(uint32 *) (x + 12) = in3
-	movl	%edi,12(%eax)
-	#   *(uint32 *) (x + 16) = in4
-	movl	%ebp,16(%eax)
-	#   kbits - 256
-	cmp	$256,%edx
-	#   goto kbits128 if unsigned<
-	jb	._kbits128
-._kbits256:
-	#     in11 = *(uint32 *) (k + 16)
-	movl	16(%ecx),%edx
-	#     in12 = *(uint32 *) (k + 20)
-	movl	20(%ecx),%ebx
-	#     in13 = *(uint32 *) (k + 24)
-	movl	24(%ecx),%esi
-	#     in14 = *(uint32 *) (k + 28)
-	movl	28(%ecx),%ecx
-	#     *(uint32 *) (x + 44) = in11
-	movl	%edx,44(%eax)
-	#     *(uint32 *) (x + 48) = in12
-	movl	%ebx,48(%eax)
-	#     *(uint32 *) (x + 52) = in13
-	movl	%esi,52(%eax)
-	#     *(uint32 *) (x + 56) = in14
-	movl	%ecx,56(%eax)
-	#     in0 = 1634760805
-	mov	$1634760805,%ecx
-	#     in5 = 857760878
-	mov	$857760878,%edx
-	#     in10 = 2036477234
-	mov	$2036477234,%ebx
-	#     in15 = 1797285236
-	mov	$1797285236,%esi
-	#     *(uint32 *) (x + 0) = in0
-	movl	%ecx,0(%eax)
-	#     *(uint32 *) (x + 20) = in5
-	movl	%edx,20(%eax)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ebx,40(%eax)
-	#     *(uint32 *) (x + 60) = in15
-	movl	%esi,60(%eax)
-	#   goto keysetupdone
-	jmp	._keysetupdone
-._kbits128:
-	#     in11 = *(uint32 *) (k + 0)
-	movl	0(%ecx),%edx
-	#     in12 = *(uint32 *) (k + 4)
-	movl	4(%ecx),%ebx
-	#     in13 = *(uint32 *) (k + 8)
-	movl	8(%ecx),%esi
-	#     in14 = *(uint32 *) (k + 12)
-	movl	12(%ecx),%ecx
-	#     *(uint32 *) (x + 44) = in11
-	movl	%edx,44(%eax)
-	#     *(uint32 *) (x + 48) = in12
-	movl	%ebx,48(%eax)
-	#     *(uint32 *) (x + 52) = in13
-	movl	%esi,52(%eax)
-	#     *(uint32 *) (x + 56) = in14
-	movl	%ecx,56(%eax)
-	#     in0 = 1634760805
-	mov	$1634760805,%ecx
-	#     in5 = 824206446
-	mov	$824206446,%edx
-	#     in10 = 2036477238
-	mov	$2036477238,%ebx
-	#     in15 = 1797285236
-	mov	$1797285236,%esi
-	#     *(uint32 *) (x + 0) = in0
-	movl	%ecx,0(%eax)
-	#     *(uint32 *) (x + 20) = in5
-	movl	%edx,20(%eax)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ebx,40(%eax)
-	#     *(uint32 *) (x + 60) = in15
-	movl	%esi,60(%eax)
-._keysetupdone:
-	#   eax = eax_stack
-	movl	64(%esp),%eax
-	#   ebx = ebx_stack
-	movl	68(%esp),%ebx
-	#   esi = esi_stack
-	movl	72(%esp),%esi
-	#   edi = edi_stack
-	movl	76(%esp),%edi
-	#   ebp = ebp_stack
-	movl	80(%esp),%ebp
-	# leave
-	add	%eax,%esp
-	ret
-ENDPROC(salsa20_keysetup)
-
-# enter salsa20_ivsetup
-ENTRY(salsa20_ivsetup)
-	mov	%esp,%eax
-	and	$31,%eax
-	add	$256,%eax
-	sub	%eax,%esp
-	#   eax_stack = eax
-	movl	%eax,64(%esp)
-	#   ebx_stack = ebx
-	movl	%ebx,68(%esp)
-	#   esi_stack = esi
-	movl	%esi,72(%esp)
-	#   edi_stack = edi
-	movl	%edi,76(%esp)
-	#   ebp_stack = ebp
-	movl	%ebp,80(%esp)
-	#   iv = arg2
-	movl	8(%esp,%eax),%ecx
-	#   x = arg1
-	movl	4(%esp,%eax),%eax
-	#   in6 = *(uint32 *) (iv + 0)
-	movl	0(%ecx),%edx
-	#   in7 = *(uint32 *) (iv + 4)
-	movl	4(%ecx),%ecx
-	#   in8 = 0
-	mov	$0,%ebx
-	#   in9 = 0
-	mov	$0,%esi
-	#   *(uint32 *) (x + 24) = in6
-	movl	%edx,24(%eax)
-	#   *(uint32 *) (x + 28) = in7
-	movl	%ecx,28(%eax)
-	#   *(uint32 *) (x + 32) = in8
-	movl	%ebx,32(%eax)
-	#   *(uint32 *) (x + 36) = in9
-	movl	%esi,36(%eax)
-	#   eax = eax_stack
-	movl	64(%esp),%eax
-	#   ebx = ebx_stack
-	movl	68(%esp),%ebx
-	#   esi = esi_stack
-	movl	72(%esp),%esi
-	#   edi = edi_stack
-	movl	76(%esp),%edi
-	#   ebp = ebp_stack
-	movl	80(%esp),%ebp
-	# leave
-	add	%eax,%esp
-	ret
-ENDPROC(salsa20_ivsetup)

+ 0 - 114
arch/x86/crypto/salsa20-x86_64-asm_64.S

@@ -803,117 +803,3 @@ ENTRY(salsa20_encrypt_bytes)
 	# goto bytesatleast1
 	jmp	._bytesatleast1
 ENDPROC(salsa20_encrypt_bytes)
-
-# enter salsa20_keysetup
-ENTRY(salsa20_keysetup)
-	mov	%rsp,%r11
-	and	$31,%r11
-	add	$256,%r11
-	sub	%r11,%rsp
-	#   k = arg2
-	mov	%rsi,%rsi
-	#   kbits = arg3
-	mov	%rdx,%rdx
-	#   x = arg1
-	mov	%rdi,%rdi
-	#   in0 = *(uint64 *) (k + 0)
-	movq	0(%rsi),%r8
-	#   in2 = *(uint64 *) (k + 8)
-	movq	8(%rsi),%r9
-	#   *(uint64 *) (x + 4) = in0
-	movq	%r8,4(%rdi)
-	#   *(uint64 *) (x + 12) = in2
-	movq	%r9,12(%rdi)
-	#                    unsigned<? kbits - 256
-	cmp	$256,%rdx
-	# comment:fp stack unchanged by jump
-	#   goto kbits128 if unsigned<
-	jb	._kbits128
-#   kbits256:
-._kbits256:
-	#     in10 = *(uint64 *) (k + 16)
-	movq	16(%rsi),%rdx
-	#     in12 = *(uint64 *) (k + 24)
-	movq	24(%rsi),%rsi
-	#     *(uint64 *) (x + 44) = in10
-	movq	%rdx,44(%rdi)
-	#     *(uint64 *) (x + 52) = in12
-	movq	%rsi,52(%rdi)
-	#     in0 = 1634760805
-	mov	$1634760805,%rsi
-	#     in4 = 857760878
-	mov	$857760878,%rdx
-	#     in10 = 2036477234
-	mov	$2036477234,%rcx
-	#     in14 = 1797285236
-	mov	$1797285236,%r8
-	#     *(uint32 *) (x + 0) = in0
-	movl	%esi,0(%rdi)
-	#     *(uint32 *) (x + 20) = in4
-	movl	%edx,20(%rdi)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ecx,40(%rdi)
-	#     *(uint32 *) (x + 60) = in14
-	movl	%r8d,60(%rdi)
-	# comment:fp stack unchanged by jump
-	#   goto keysetupdone
-	jmp	._keysetupdone
-#   kbits128:
-._kbits128:
-	#     in10 = *(uint64 *) (k + 0)
-	movq	0(%rsi),%rdx
-	#     in12 = *(uint64 *) (k + 8)
-	movq	8(%rsi),%rsi
-	#     *(uint64 *) (x + 44) = in10
-	movq	%rdx,44(%rdi)
-	#     *(uint64 *) (x + 52) = in12
-	movq	%rsi,52(%rdi)
-	#     in0 = 1634760805
-	mov	$1634760805,%rsi
-	#     in4 = 824206446
-	mov	$824206446,%rdx
-	#     in10 = 2036477238
-	mov	$2036477238,%rcx
-	#     in14 = 1797285236
-	mov	$1797285236,%r8
-	#     *(uint32 *) (x + 0) = in0
-	movl	%esi,0(%rdi)
-	#     *(uint32 *) (x + 20) = in4
-	movl	%edx,20(%rdi)
-	#     *(uint32 *) (x + 40) = in10
-	movl	%ecx,40(%rdi)
-	#     *(uint32 *) (x + 60) = in14
-	movl	%r8d,60(%rdi)
-#   keysetupdone:
-._keysetupdone:
-	# leave
-	add	%r11,%rsp
-	mov	%rdi,%rax
-	mov	%rsi,%rdx
-	ret
-ENDPROC(salsa20_keysetup)
-
-# enter salsa20_ivsetup
-ENTRY(salsa20_ivsetup)
-	mov	%rsp,%r11
-	and	$31,%r11
-	add	$256,%r11
-	sub	%r11,%rsp
-	#   iv = arg2
-	mov	%rsi,%rsi
-	#   x = arg1
-	mov	%rdi,%rdi
-	#   in6 = *(uint64 *) (iv + 0)
-	movq	0(%rsi),%rsi
-	#   in8 = 0
-	mov	$0,%r8
-	#   *(uint64 *) (x + 24) = in6
-	movq	%rsi,24(%rdi)
-	#   *(uint64 *) (x + 32) = in8
-	movq	%r8,32(%rdi)
-	# leave
-	add	%r11,%rsp
-	mov	%rdi,%rax
-	mov	%rsi,%rdx
-	ret
-ENDPROC(salsa20_ivsetup)

+ 40 - 65
arch/x86/crypto/salsa20_glue.c

@@ -11,6 +11,9 @@
  * - x86-64 version, renamed as salsa20-x86_64-asm_64.S
  *   available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
  *
+ * Also modified to set up the initial state using the generic C code rather
+ * than in assembly.
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
  * Software Foundation; either version 2 of the License, or (at your option)
@@ -18,93 +21,65 @@
  *
  */
 
-#include <crypto/algapi.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/salsa20.h>
 #include <linux/module.h>
-#include <linux/crypto.h>
-
-#define SALSA20_IV_SIZE        8U
-#define SALSA20_MIN_KEY_SIZE  16U
-#define SALSA20_MAX_KEY_SIZE  32U
-
-struct salsa20_ctx
-{
-	u32 input[16];
-};
 
-asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k,
-				 u32 keysize, u32 ivsize);
-asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv);
-asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx,
-				      const u8 *src, u8 *dst, u32 bytes);
+asmlinkage void salsa20_encrypt_bytes(u32 state[16], const u8 *src, u8 *dst,
+				      u32 bytes);
 
-static int setkey(struct crypto_tfm *tfm, const u8 *key,
-		  unsigned int keysize)
+static int salsa20_asm_crypt(struct skcipher_request *req)
 {
-	struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm);
-	salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8);
-	return 0;
-}
-
-static int encrypt(struct blkcipher_desc *desc,
-		   struct scatterlist *dst, struct scatterlist *src,
-		   unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	u32 state[16];
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, 64);
+	err = skcipher_walk_virt(&walk, req, true);
 
-	salsa20_ivsetup(ctx, walk.iv);
+	crypto_salsa20_init(state, ctx, walk.iv);
 
-	while (walk.nbytes >= 64) {
-		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-				      walk.dst.virt.addr,
-				      walk.nbytes - (walk.nbytes % 64));
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64);
-	}
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
 
-	if (walk.nbytes) {
-		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-				      walk.dst.virt.addr, walk.nbytes);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
+
+		salsa20_encrypt_bytes(state, walk.src.virt.addr,
+				      walk.dst.virt.addr, nbytes);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-static struct crypto_alg alg = {
-	.cra_name           =   "salsa20",
-	.cra_driver_name    =   "salsa20-asm",
-	.cra_priority       =   200,
-	.cra_flags          =   CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_type           =   &crypto_blkcipher_type,
-	.cra_blocksize      =   1,
-	.cra_ctxsize        =   sizeof(struct salsa20_ctx),
-	.cra_alignmask      =	3,
-	.cra_module         =   THIS_MODULE,
-	.cra_u              =   {
-		.blkcipher = {
-			.setkey         =   setkey,
-			.encrypt        =   encrypt,
-			.decrypt        =   encrypt,
-			.min_keysize    =   SALSA20_MIN_KEY_SIZE,
-			.max_keysize    =   SALSA20_MAX_KEY_SIZE,
-			.ivsize         =   SALSA20_IV_SIZE,
-		}
-	}
+static struct skcipher_alg alg = {
+	.base.cra_name		= "salsa20",
+	.base.cra_driver_name	= "salsa20-asm",
+	.base.cra_priority	= 200,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct salsa20_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= SALSA20_MIN_KEY_SIZE,
+	.max_keysize		= SALSA20_MAX_KEY_SIZE,
+	.ivsize			= SALSA20_IV_SIZE,
+	.chunksize		= SALSA20_BLOCK_SIZE,
+	.setkey			= crypto_salsa20_setkey,
+	.encrypt		= salsa20_asm_crypt,
+	.decrypt		= salsa20_asm_crypt,
 };
 
 static int __init init(void)
 {
-	return crypto_register_alg(&alg);
+	return crypto_register_skcipher(&alg);
 }
 
 static void __exit fini(void)
 {
-	crypto_unregister_alg(&alg);
+	crypto_unregister_skcipher(&alg);
 }
 
 module_init(init);

+ 60 - 52
arch/x86/crypto/twofish-x86_64-asm_64-3way.S

@@ -55,29 +55,31 @@
 #define RAB1bl %bl
 #define RAB2bl %cl
 
+#define CD0 0x0(%rsp)
+#define CD1 0x8(%rsp)
+#define CD2 0x10(%rsp)
+
+# used only before/after all rounds
 #define RCD0 %r8
 #define RCD1 %r9
 #define RCD2 %r10
 
-#define RCD0d %r8d
-#define RCD1d %r9d
-#define RCD2d %r10d
-
-#define RX0 %rbp
-#define RX1 %r11
-#define RX2 %r12
+# used only during rounds
+#define RX0 %r8
+#define RX1 %r9
+#define RX2 %r10
 
-#define RX0d %ebp
-#define RX1d %r11d
-#define RX2d %r12d
+#define RX0d %r8d
+#define RX1d %r9d
+#define RX2d %r10d
 
-#define RY0 %r13
-#define RY1 %r14
-#define RY2 %r15
+#define RY0 %r11
+#define RY1 %r12
+#define RY2 %r13
 
-#define RY0d %r13d
-#define RY1d %r14d
-#define RY2d %r15d
+#define RY0d %r11d
+#define RY1d %r12d
+#define RY2d %r13d
 
 #define RT0 %rdx
 #define RT1 %rsi
@@ -85,6 +87,8 @@
 #define RT0d %edx
 #define RT1d %esi
 
+#define RT1bl %sil
+
 #define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
 	movzbl ab ## bl,		tmp2 ## d; \
 	movzbl ab ## bh,		tmp1 ## d; \
@@ -92,6 +96,11 @@
 	op1##l T0(CTX, tmp2, 4),	dst ## d; \
 	op2##l T1(CTX, tmp1, 4),	dst ## d;
 
+#define swap_ab_with_cd(ab, cd, tmp)	\
+	movq cd, tmp;			\
+	movq ab, cd;			\
+	movq tmp, ab;
+
 /*
  * Combined G1 & G2 function. Reordered with help of rotates to have moves
  * at begining.
@@ -110,15 +119,15 @@
 	/* G1,2 && G2,2 */ \
 	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
 	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
-	xchgq cd ## 0, ab ## 0; \
+	swap_ab_with_cd(ab ## 0, cd ## 0, RT0); \
 	\
 	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
 	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
-	xchgq cd ## 1, ab ## 1; \
+	swap_ab_with_cd(ab ## 1, cd ## 1, RT0); \
 	\
 	do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
 	do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
-	xchgq cd ## 2, ab ## 2;
+	swap_ab_with_cd(ab ## 2, cd ## 2, RT0);
 
 #define enc_round_end(ab, x, y, n) \
 	addl y ## d,			x ## d; \
@@ -168,6 +177,16 @@
 	decrypt_round3(ba, dc, (n*2)+1); \
 	decrypt_round3(ba, dc, (n*2));
 
+#define push_cd()	\
+	pushq RCD2;	\
+	pushq RCD1;	\
+	pushq RCD0;
+
+#define pop_cd()	\
+	popq RCD0;	\
+	popq RCD1;	\
+	popq RCD2;
+
 #define inpack3(in, n, xy, m) \
 	movq 4*(n)(in),			xy ## 0; \
 	xorq w+4*m(CTX),		xy ## 0; \
@@ -223,11 +242,8 @@ ENTRY(__twofish_enc_blk_3way)
 	 *	%rdx: src, RIO
 	 *	%rcx: bool, if true: xor output
 	 */
-	pushq %r15;
-	pushq %r14;
 	pushq %r13;
 	pushq %r12;
-	pushq %rbp;
 	pushq %rbx;
 
 	pushq %rcx; /* bool xor */
@@ -235,40 +251,36 @@ ENTRY(__twofish_enc_blk_3way)
 
 	inpack_enc3();
 
-	encrypt_cycle3(RAB, RCD, 0);
-	encrypt_cycle3(RAB, RCD, 1);
-	encrypt_cycle3(RAB, RCD, 2);
-	encrypt_cycle3(RAB, RCD, 3);
-	encrypt_cycle3(RAB, RCD, 4);
-	encrypt_cycle3(RAB, RCD, 5);
-	encrypt_cycle3(RAB, RCD, 6);
-	encrypt_cycle3(RAB, RCD, 7);
+	push_cd();
+	encrypt_cycle3(RAB, CD, 0);
+	encrypt_cycle3(RAB, CD, 1);
+	encrypt_cycle3(RAB, CD, 2);
+	encrypt_cycle3(RAB, CD, 3);
+	encrypt_cycle3(RAB, CD, 4);
+	encrypt_cycle3(RAB, CD, 5);
+	encrypt_cycle3(RAB, CD, 6);
+	encrypt_cycle3(RAB, CD, 7);
+	pop_cd();
 
 	popq RIO; /* dst */
-	popq %rbp; /* bool xor */
+	popq RT1; /* bool xor */
 
-	testb %bpl, %bpl;
+	testb RT1bl, RT1bl;
 	jnz .L__enc_xor3;
 
 	outunpack_enc3(mov);
 
 	popq %rbx;
-	popq %rbp;
 	popq %r12;
 	popq %r13;
-	popq %r14;
-	popq %r15;
 	ret;
 
 .L__enc_xor3:
 	outunpack_enc3(xor);
 
 	popq %rbx;
-	popq %rbp;
 	popq %r12;
 	popq %r13;
-	popq %r14;
-	popq %r15;
 	ret;
 ENDPROC(__twofish_enc_blk_3way)
 
@@ -278,35 +290,31 @@ ENTRY(twofish_dec_blk_3way)
 	 *	%rsi: dst
 	 *	%rdx: src, RIO
 	 */
-	pushq %r15;
-	pushq %r14;
 	pushq %r13;
 	pushq %r12;
-	pushq %rbp;
 	pushq %rbx;
 
 	pushq %rsi; /* dst */
 
 	inpack_dec3();
 
-	decrypt_cycle3(RAB, RCD, 7);
-	decrypt_cycle3(RAB, RCD, 6);
-	decrypt_cycle3(RAB, RCD, 5);
-	decrypt_cycle3(RAB, RCD, 4);
-	decrypt_cycle3(RAB, RCD, 3);
-	decrypt_cycle3(RAB, RCD, 2);
-	decrypt_cycle3(RAB, RCD, 1);
-	decrypt_cycle3(RAB, RCD, 0);
+	push_cd();
+	decrypt_cycle3(RAB, CD, 7);
+	decrypt_cycle3(RAB, CD, 6);
+	decrypt_cycle3(RAB, CD, 5);
+	decrypt_cycle3(RAB, CD, 4);
+	decrypt_cycle3(RAB, CD, 3);
+	decrypt_cycle3(RAB, CD, 2);
+	decrypt_cycle3(RAB, CD, 1);
+	decrypt_cycle3(RAB, CD, 0);
+	pop_cd();
 
 	popq RIO; /* dst */
 
 	outunpack_dec3();
 
 	popq %rbx;
-	popq %rbp;
 	popq %r12;
 	popq %r13;
-	popq %r14;
-	popq %r15;
 	ret;
 ENDPROC(twofish_dec_blk_3way)

+ 3 - 1
crypto/Kconfig

@@ -131,7 +131,7 @@ config CRYPTO_DH
 
 config CRYPTO_ECDH
 	tristate "ECDH algorithm"
-	select CRYTPO_KPP
+	select CRYPTO_KPP
 	select CRYPTO_RNG_DEFAULT
 	help
 	  Generic implementation of the ECDH algorithm
@@ -1340,6 +1340,7 @@ config CRYPTO_SALSA20_586
 	tristate "Salsa20 stream cipher algorithm (i586)"
 	depends on (X86 || UML_X86) && !64BIT
 	select CRYPTO_BLKCIPHER
+	select CRYPTO_SALSA20
 	help
 	  Salsa20 stream cipher algorithm.
 
@@ -1353,6 +1354,7 @@ config CRYPTO_SALSA20_X86_64
 	tristate "Salsa20 stream cipher algorithm (x86_64)"
 	depends on (X86 || UML_X86) && 64BIT
 	select CRYPTO_BLKCIPHER
+	select CRYPTO_SALSA20
 	help
 	  Salsa20 stream cipher algorithm.
 

+ 1 - 0
crypto/Makefile

@@ -99,6 +99,7 @@ obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
 obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
 CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure)  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
+CFLAGS_aes_generic.o := $(call cc-option,-fno-code-hoisting) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83356
 obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
 obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
 obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o

+ 1 - 4
crypto/ablk_helper.c

@@ -18,9 +18,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  */
 
@@ -28,7 +26,6 @@
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <crypto/algapi.h>
 #include <crypto/cryptd.h>
 #include <crypto/ablk_helper.h>

+ 14 - 5
crypto/aead.c

@@ -54,11 +54,18 @@ int crypto_aead_setkey(struct crypto_aead *tfm,
 		       const u8 *key, unsigned int keylen)
 {
 	unsigned long alignmask = crypto_aead_alignmask(tfm);
+	int err;
 
 	if ((unsigned long)key & alignmask)
-		return setkey_unaligned(tfm, key, keylen);
+		err = setkey_unaligned(tfm, key, keylen);
+	else
+		err = crypto_aead_alg(tfm)->setkey(tfm, key, keylen);
+
+	if (err)
+		return err;
 
-	return crypto_aead_alg(tfm)->setkey(tfm, key, keylen);
+	crypto_aead_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_aead_setkey);
 
@@ -93,6 +100,8 @@ static int crypto_aead_init_tfm(struct crypto_tfm *tfm)
 	struct crypto_aead *aead = __crypto_aead_cast(tfm);
 	struct aead_alg *alg = crypto_aead_alg(aead);
 
+	crypto_aead_set_flags(aead, CRYPTO_TFM_NEED_KEY);
+
 	aead->authsize = alg->maxauthsize;
 
 	if (alg->exit)
@@ -295,7 +304,7 @@ int aead_init_geniv(struct crypto_aead *aead)
 	if (err)
 		goto out;
 
-	ctx->sknull = crypto_get_default_null_skcipher2();
+	ctx->sknull = crypto_get_default_null_skcipher();
 	err = PTR_ERR(ctx->sknull);
 	if (IS_ERR(ctx->sknull))
 		goto out;
@@ -315,7 +324,7 @@ out:
 	return err;
 
 drop_null:
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 	goto out;
 }
 EXPORT_SYMBOL_GPL(aead_init_geniv);
@@ -325,7 +334,7 @@ void aead_exit_geniv(struct crypto_aead *tfm)
 	struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_aead(ctx->child);
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 }
 EXPORT_SYMBOL_GPL(aead_exit_geniv);
 

+ 6 - 4
crypto/af_alg.c

@@ -150,7 +150,7 @@ EXPORT_SYMBOL_GPL(af_alg_release_parent);
 
 static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
-	const u32 forbidden = CRYPTO_ALG_INTERNAL;
+	const u32 allowed = CRYPTO_ALG_KERN_DRIVER_ONLY;
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct sockaddr_alg *sa = (void *)uaddr;
@@ -158,6 +158,10 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	void *private;
 	int err;
 
+	/* If caller uses non-allowed flag, return error. */
+	if ((sa->salg_feat & ~allowed) || (sa->salg_mask & ~allowed))
+		return -EINVAL;
+
 	if (sock->state == SS_CONNECTED)
 		return -EINVAL;
 
@@ -176,9 +180,7 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (IS_ERR(type))
 		return PTR_ERR(type);
 
-	private = type->bind(sa->salg_name,
-			     sa->salg_feat & ~forbidden,
-			     sa->salg_mask & ~forbidden);
+	private = type->bind(sa->salg_name, sa->salg_feat, sa->salg_mask);
 	if (IS_ERR(private)) {
 		module_put(type->owner);
 		return PTR_ERR(private);

+ 28 - 5
crypto/ahash.c

@@ -193,11 +193,18 @@ int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
 			unsigned int keylen)
 {
 	unsigned long alignmask = crypto_ahash_alignmask(tfm);
+	int err;
 
 	if ((unsigned long)key & alignmask)
-		return ahash_setkey_unaligned(tfm, key, keylen);
+		err = ahash_setkey_unaligned(tfm, key, keylen);
+	else
+		err = tfm->setkey(tfm, key, keylen);
+
+	if (err)
+		return err;
 
-	return tfm->setkey(tfm, key, keylen);
+	crypto_ahash_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_ahash_setkey);
 
@@ -368,7 +375,12 @@ EXPORT_SYMBOL_GPL(crypto_ahash_finup);
 
 int crypto_ahash_digest(struct ahash_request *req)
 {
-	return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->digest);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+
+	if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
+		return -ENOKEY;
+
+	return crypto_ahash_op(req, tfm->digest);
 }
 EXPORT_SYMBOL_GPL(crypto_ahash_digest);
 
@@ -450,7 +462,6 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
 	struct ahash_alg *alg = crypto_ahash_alg(hash);
 
 	hash->setkey = ahash_nosetkey;
-	hash->has_setkey = false;
 	hash->export = ahash_no_export;
 	hash->import = ahash_no_import;
 
@@ -465,7 +476,8 @@ static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
 
 	if (alg->setkey) {
 		hash->setkey = alg->setkey;
-		hash->has_setkey = true;
+		if (!(alg->halg.base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY))
+			crypto_ahash_set_flags(hash, CRYPTO_TFM_NEED_KEY);
 	}
 	if (alg->export)
 		hash->export = alg->export;
@@ -649,5 +661,16 @@ struct hash_alg_common *ahash_attr_alg(struct rtattr *rta, u32 type, u32 mask)
 }
 EXPORT_SYMBOL_GPL(ahash_attr_alg);
 
+bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg)
+{
+	struct crypto_alg *alg = &halg->base;
+
+	if (alg->cra_type != &crypto_ahash_type)
+		return crypto_shash_alg_has_setkey(__crypto_shash_alg(alg));
+
+	return __crypto_ahash_alg(alg)->setkey != NULL;
+}
+EXPORT_SYMBOL_GPL(crypto_hash_alg_has_setkey);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Asynchronous cryptographic hash type");

+ 4 - 9
crypto/algapi.c

@@ -62,7 +62,7 @@ static int crypto_check_alg(struct crypto_alg *alg)
 	if (alg->cra_priority < 0)
 		return -EINVAL;
 
-	atomic_set(&alg->cra_refcnt, 1);
+	refcount_set(&alg->cra_refcnt, 1);
 
 	return crypto_set_driver_name(alg);
 }
@@ -123,7 +123,6 @@ static void crypto_remove_instance(struct crypto_instance *inst,
 	if (!tmpl || !crypto_tmpl_get(tmpl))
 		return;
 
-	crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, &inst->alg);
 	list_move(&inst->alg.cra_list, list);
 	hlist_del(&inst->list);
 	inst->alg.cra_destroy = crypto_destroy_instance;
@@ -236,7 +235,7 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
 	if (!larval->adult)
 		goto free_larval;
 
-	atomic_set(&larval->alg.cra_refcnt, 1);
+	refcount_set(&larval->alg.cra_refcnt, 1);
 	memcpy(larval->alg.cra_driver_name, alg->cra_driver_name,
 	       CRYPTO_MAX_ALG_NAME);
 	larval->alg.cra_priority = alg->cra_priority;
@@ -392,7 +391,6 @@ static int crypto_remove_alg(struct crypto_alg *alg, struct list_head *list)
 
 	alg->cra_flags |= CRYPTO_ALG_DEAD;
 
-	crypto_notify(CRYPTO_MSG_ALG_UNREGISTER, alg);
 	list_del_init(&alg->cra_list);
 	crypto_remove_spawns(alg, list, NULL);
 
@@ -411,7 +409,7 @@ int crypto_unregister_alg(struct crypto_alg *alg)
 	if (ret)
 		return ret;
 
-	BUG_ON(atomic_read(&alg->cra_refcnt) != 1);
+	BUG_ON(refcount_read(&alg->cra_refcnt) != 1);
 	if (alg->cra_destroy)
 		alg->cra_destroy(alg);
 
@@ -470,7 +468,6 @@ int crypto_register_template(struct crypto_template *tmpl)
 	}
 
 	list_add(&tmpl->list, &crypto_template_list);
-	crypto_notify(CRYPTO_MSG_TMPL_REGISTER, tmpl);
 	err = 0;
 out:
 	up_write(&crypto_alg_sem);
@@ -497,12 +494,10 @@ void crypto_unregister_template(struct crypto_template *tmpl)
 		BUG_ON(err);
 	}
 
-	crypto_notify(CRYPTO_MSG_TMPL_UNREGISTER, tmpl);
-
 	up_write(&crypto_alg_sem);
 
 	hlist_for_each_entry_safe(inst, n, list, list) {
-		BUG_ON(atomic_read(&inst->alg.cra_refcnt) != 1);
+		BUG_ON(refcount_read(&inst->alg.cra_refcnt) != 1);
 		crypto_free_instance(inst);
 	}
 	crypto_remove_final(&users);

+ 5 - 10
crypto/algif_aead.c

@@ -42,7 +42,6 @@
 
 struct aead_tfm {
 	struct crypto_aead *aead;
-	bool has_key;
 	struct crypto_skcipher *null_tfm;
 };
 
@@ -398,7 +397,7 @@ static int aead_check_key(struct socket *sock)
 
 	err = -ENOKEY;
 	lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
-	if (!tfm->has_key)
+	if (crypto_aead_get_flags(tfm->aead) & CRYPTO_TFM_NEED_KEY)
 		goto unlock;
 
 	if (!pask->refcnt++)
@@ -491,7 +490,7 @@ static void *aead_bind(const char *name, u32 type, u32 mask)
 		return ERR_CAST(aead);
 	}
 
-	null_tfm = crypto_get_default_null_skcipher2();
+	null_tfm = crypto_get_default_null_skcipher();
 	if (IS_ERR(null_tfm)) {
 		crypto_free_aead(aead);
 		kfree(tfm);
@@ -509,7 +508,7 @@ static void aead_release(void *private)
 	struct aead_tfm *tfm = private;
 
 	crypto_free_aead(tfm->aead);
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 	kfree(tfm);
 }
 
@@ -523,12 +522,8 @@ static int aead_setauthsize(void *private, unsigned int authsize)
 static int aead_setkey(void *private, const u8 *key, unsigned int keylen)
 {
 	struct aead_tfm *tfm = private;
-	int err;
-
-	err = crypto_aead_setkey(tfm->aead, key, keylen);
-	tfm->has_key = !err;
 
-	return err;
+	return crypto_aead_setkey(tfm->aead, key, keylen);
 }
 
 static void aead_sock_destruct(struct sock *sk)
@@ -589,7 +584,7 @@ static int aead_accept_parent(void *private, struct sock *sk)
 {
 	struct aead_tfm *tfm = private;
 
-	if (!tfm->has_key)
+	if (crypto_aead_get_flags(tfm->aead) & CRYPTO_TFM_NEED_KEY)
 		return -ENOKEY;
 
 	return aead_accept_parent_nokey(private, sk);

+ 11 - 41
crypto/algif_hash.c

@@ -34,11 +34,6 @@ struct hash_ctx {
 	struct ahash_request req;
 };
 
-struct algif_hash_tfm {
-	struct crypto_ahash *hash;
-	bool has_key;
-};
-
 static int hash_alloc_result(struct sock *sk, struct hash_ctx *ctx)
 {
 	unsigned ds;
@@ -307,7 +302,7 @@ static int hash_check_key(struct socket *sock)
 	int err = 0;
 	struct sock *psk;
 	struct alg_sock *pask;
-	struct algif_hash_tfm *tfm;
+	struct crypto_ahash *tfm;
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 
@@ -321,7 +316,7 @@ static int hash_check_key(struct socket *sock)
 
 	err = -ENOKEY;
 	lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
-	if (!tfm->has_key)
+	if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
 		goto unlock;
 
 	if (!pask->refcnt++)
@@ -412,41 +407,17 @@ static struct proto_ops algif_hash_ops_nokey = {
 
 static void *hash_bind(const char *name, u32 type, u32 mask)
 {
-	struct algif_hash_tfm *tfm;
-	struct crypto_ahash *hash;
-
-	tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
-	if (!tfm)
-		return ERR_PTR(-ENOMEM);
-
-	hash = crypto_alloc_ahash(name, type, mask);
-	if (IS_ERR(hash)) {
-		kfree(tfm);
-		return ERR_CAST(hash);
-	}
-
-	tfm->hash = hash;
-
-	return tfm;
+	return crypto_alloc_ahash(name, type, mask);
 }
 
 static void hash_release(void *private)
 {
-	struct algif_hash_tfm *tfm = private;
-
-	crypto_free_ahash(tfm->hash);
-	kfree(tfm);
+	crypto_free_ahash(private);
 }
 
 static int hash_setkey(void *private, const u8 *key, unsigned int keylen)
 {
-	struct algif_hash_tfm *tfm = private;
-	int err;
-
-	err = crypto_ahash_setkey(tfm->hash, key, keylen);
-	tfm->has_key = !err;
-
-	return err;
+	return crypto_ahash_setkey(private, key, keylen);
 }
 
 static void hash_sock_destruct(struct sock *sk)
@@ -461,11 +432,10 @@ static void hash_sock_destruct(struct sock *sk)
 
 static int hash_accept_parent_nokey(void *private, struct sock *sk)
 {
-	struct hash_ctx *ctx;
+	struct crypto_ahash *tfm = private;
 	struct alg_sock *ask = alg_sk(sk);
-	struct algif_hash_tfm *tfm = private;
-	struct crypto_ahash *hash = tfm->hash;
-	unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(hash);
+	struct hash_ctx *ctx;
+	unsigned int len = sizeof(*ctx) + crypto_ahash_reqsize(tfm);
 
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
 	if (!ctx)
@@ -478,7 +448,7 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk)
 
 	ask->private = ctx;
 
-	ahash_request_set_tfm(&ctx->req, hash);
+	ahash_request_set_tfm(&ctx->req, tfm);
 	ahash_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				   crypto_req_done, &ctx->wait);
 
@@ -489,9 +459,9 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk)
 
 static int hash_accept_parent(void *private, struct sock *sk)
 {
-	struct algif_hash_tfm *tfm = private;
+	struct crypto_ahash *tfm = private;
 
-	if (!tfm->has_key && crypto_ahash_has_setkey(tfm->hash))
+	if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
 		return -ENOKEY;
 
 	return hash_accept_parent_nokey(private, sk);

+ 13 - 46
crypto/algif_skcipher.c

@@ -38,11 +38,6 @@
 #include <linux/net.h>
 #include <net/sock.h>
 
-struct skcipher_tfm {
-	struct crypto_skcipher *skcipher;
-	bool has_key;
-};
-
 static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 			    size_t size)
 {
@@ -50,8 +45,7 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 	struct alg_sock *ask = alg_sk(sk);
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
-	struct skcipher_tfm *skc = pask->private;
-	struct crypto_skcipher *tfm = skc->skcipher;
+	struct crypto_skcipher *tfm = pask->private;
 	unsigned ivsize = crypto_skcipher_ivsize(tfm);
 
 	return af_alg_sendmsg(sock, msg, size, ivsize);
@@ -65,8 +59,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
 	struct af_alg_ctx *ctx = ask->private;
-	struct skcipher_tfm *skc = pask->private;
-	struct crypto_skcipher *tfm = skc->skcipher;
+	struct crypto_skcipher *tfm = pask->private;
 	unsigned int bs = crypto_skcipher_blocksize(tfm);
 	struct af_alg_async_req *areq;
 	int err = 0;
@@ -220,7 +213,7 @@ static int skcipher_check_key(struct socket *sock)
 	int err = 0;
 	struct sock *psk;
 	struct alg_sock *pask;
-	struct skcipher_tfm *tfm;
+	struct crypto_skcipher *tfm;
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 
@@ -234,7 +227,7 @@ static int skcipher_check_key(struct socket *sock)
 
 	err = -ENOKEY;
 	lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
-	if (!tfm->has_key)
+	if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
 		goto unlock;
 
 	if (!pask->refcnt++)
@@ -313,41 +306,17 @@ static struct proto_ops algif_skcipher_ops_nokey = {
 
 static void *skcipher_bind(const char *name, u32 type, u32 mask)
 {
-	struct skcipher_tfm *tfm;
-	struct crypto_skcipher *skcipher;
-
-	tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
-	if (!tfm)
-		return ERR_PTR(-ENOMEM);
-
-	skcipher = crypto_alloc_skcipher(name, type, mask);
-	if (IS_ERR(skcipher)) {
-		kfree(tfm);
-		return ERR_CAST(skcipher);
-	}
-
-	tfm->skcipher = skcipher;
-
-	return tfm;
+	return crypto_alloc_skcipher(name, type, mask);
 }
 
 static void skcipher_release(void *private)
 {
-	struct skcipher_tfm *tfm = private;
-
-	crypto_free_skcipher(tfm->skcipher);
-	kfree(tfm);
+	crypto_free_skcipher(private);
 }
 
 static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
 {
-	struct skcipher_tfm *tfm = private;
-	int err;
-
-	err = crypto_skcipher_setkey(tfm->skcipher, key, keylen);
-	tfm->has_key = !err;
-
-	return err;
+	return crypto_skcipher_setkey(private, key, keylen);
 }
 
 static void skcipher_sock_destruct(struct sock *sk)
@@ -356,8 +325,7 @@ static void skcipher_sock_destruct(struct sock *sk)
 	struct af_alg_ctx *ctx = ask->private;
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
-	struct skcipher_tfm *skc = pask->private;
-	struct crypto_skcipher *tfm = skc->skcipher;
+	struct crypto_skcipher *tfm = pask->private;
 
 	af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
 	sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm));
@@ -369,22 +337,21 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 {
 	struct af_alg_ctx *ctx;
 	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_tfm *tfm = private;
-	struct crypto_skcipher *skcipher = tfm->skcipher;
+	struct crypto_skcipher *tfm = private;
 	unsigned int len = sizeof(*ctx);
 
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
-	ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(skcipher),
+	ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(tfm),
 			       GFP_KERNEL);
 	if (!ctx->iv) {
 		sock_kfree_s(sk, ctx, len);
 		return -ENOMEM;
 	}
 
-	memset(ctx->iv, 0, crypto_skcipher_ivsize(skcipher));
+	memset(ctx->iv, 0, crypto_skcipher_ivsize(tfm));
 
 	INIT_LIST_HEAD(&ctx->tsgl_list);
 	ctx->len = len;
@@ -404,9 +371,9 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 
 static int skcipher_accept_parent(void *private, struct sock *sk)
 {
-	struct skcipher_tfm *tfm = private;
+	struct crypto_skcipher *tfm = private;
 
-	if (!tfm->has_key && crypto_skcipher_has_setkey(tfm->skcipher))
+	if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
 		return -ENOKEY;
 
 	return skcipher_accept_parent_nokey(private, sk);

+ 3 - 3
crypto/api.c

@@ -137,7 +137,7 @@ static struct crypto_alg *crypto_larval_add(const char *name, u32 type,
 	if (IS_ERR(larval))
 		return ERR_CAST(larval);
 
-	atomic_set(&larval->alg.cra_refcnt, 2);
+	refcount_set(&larval->alg.cra_refcnt, 2);
 
 	down_write(&crypto_alg_sem);
 	alg = __crypto_alg_lookup(name, type, mask);
@@ -205,7 +205,8 @@ struct crypto_alg *crypto_alg_lookup(const char *name, u32 type, u32 mask)
 }
 EXPORT_SYMBOL_GPL(crypto_alg_lookup);
 
-struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
+static struct crypto_alg *crypto_larval_lookup(const char *name, u32 type,
+					       u32 mask)
 {
 	struct crypto_alg *alg;
 
@@ -231,7 +232,6 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
 
 	return crypto_larval_add(name, type, mask);
 }
-EXPORT_SYMBOL_GPL(crypto_larval_lookup);
 
 int crypto_probing_notify(unsigned long val, void *v)
 {

+ 2 - 2
crypto/authenc.c

@@ -329,7 +329,7 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
 	if (IS_ERR(enc))
 		goto err_free_ahash;
 
-	null = crypto_get_default_null_skcipher2();
+	null = crypto_get_default_null_skcipher();
 	err = PTR_ERR(null);
 	if (IS_ERR(null))
 		goto err_free_skcipher;
@@ -363,7 +363,7 @@ static void crypto_authenc_exit_tfm(struct crypto_aead *tfm)
 
 	crypto_free_ahash(ctx->auth);
 	crypto_free_skcipher(ctx->enc);
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 }
 
 static void crypto_authenc_free(struct aead_instance *inst)

+ 2 - 2
crypto/authencesn.c

@@ -352,7 +352,7 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm)
 	if (IS_ERR(enc))
 		goto err_free_ahash;
 
-	null = crypto_get_default_null_skcipher2();
+	null = crypto_get_default_null_skcipher();
 	err = PTR_ERR(null);
 	if (IS_ERR(null))
 		goto err_free_skcipher;
@@ -389,7 +389,7 @@ static void crypto_authenc_esn_exit_tfm(struct crypto_aead *tfm)
 
 	crypto_free_ahash(ctx->auth);
 	crypto_free_skcipher(ctx->enc);
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 }
 
 static void crypto_authenc_esn_free(struct aead_instance *inst)

+ 0 - 1
crypto/blkcipher.c

@@ -18,7 +18,6 @@
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 #include <linux/errno.h>
-#include <linux/hardirq.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/seq_file.h>

+ 1 - 2
crypto/camellia_generic.c

@@ -13,8 +13,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 /*

+ 1 - 2
crypto/cast5_generic.c

@@ -16,8 +16,7 @@
 * any later version.
 *
 * You should have received a copy of the GNU General Public License
-* along with this program; if not, write to the Free Software
-* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+* along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
 

+ 1 - 2
crypto/cast6_generic.c

@@ -13,8 +13,7 @@
  * any later version.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 

+ 13 - 20
crypto/chacha20_generic.c

@@ -9,44 +9,38 @@
  * (at your option) any later version.
  */
 
+#include <asm/unaligned.h>
 #include <crypto/algapi.h>
 #include <crypto/chacha20.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/module.h>
 
-static inline u32 le32_to_cpuvp(const void *p)
-{
-	return le32_to_cpup(p);
-}
-
 static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
 			     unsigned int bytes)
 {
-	u8 stream[CHACHA20_BLOCK_SIZE];
+	u32 stream[CHACHA20_BLOCK_WORDS];
 
 	if (dst != src)
 		memcpy(dst, src, bytes);
 
 	while (bytes >= CHACHA20_BLOCK_SIZE) {
 		chacha20_block(state, stream);
-		crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE);
+		crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE);
 		bytes -= CHACHA20_BLOCK_SIZE;
 		dst += CHACHA20_BLOCK_SIZE;
 	}
 	if (bytes) {
 		chacha20_block(state, stream);
-		crypto_xor(dst, stream, bytes);
+		crypto_xor(dst, (const u8 *)stream, bytes);
 	}
 }
 
 void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
 {
-	static const char constant[16] = "expand 32-byte k";
-
-	state[0]  = le32_to_cpuvp(constant +  0);
-	state[1]  = le32_to_cpuvp(constant +  4);
-	state[2]  = le32_to_cpuvp(constant +  8);
-	state[3]  = le32_to_cpuvp(constant + 12);
+	state[0]  = 0x61707865; /* "expa" */
+	state[1]  = 0x3320646e; /* "nd 3" */
+	state[2]  = 0x79622d32; /* "2-by" */
+	state[3]  = 0x6b206574; /* "te k" */
 	state[4]  = ctx->key[0];
 	state[5]  = ctx->key[1];
 	state[6]  = ctx->key[2];
@@ -55,10 +49,10 @@ void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
 	state[9]  = ctx->key[5];
 	state[10] = ctx->key[6];
 	state[11] = ctx->key[7];
-	state[12] = le32_to_cpuvp(iv +  0);
-	state[13] = le32_to_cpuvp(iv +  4);
-	state[14] = le32_to_cpuvp(iv +  8);
-	state[15] = le32_to_cpuvp(iv + 12);
+	state[12] = get_unaligned_le32(iv +  0);
+	state[13] = get_unaligned_le32(iv +  4);
+	state[14] = get_unaligned_le32(iv +  8);
+	state[15] = get_unaligned_le32(iv + 12);
 }
 EXPORT_SYMBOL_GPL(crypto_chacha20_init);
 
@@ -72,7 +66,7 @@ int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		return -EINVAL;
 
 	for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
-		ctx->key[i] = le32_to_cpuvp(key + i * sizeof(u32));
+		ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
 
 	return 0;
 }
@@ -111,7 +105,6 @@ static struct skcipher_alg alg = {
 	.base.cra_priority	= 100,
 	.base.cra_blocksize	= 1,
 	.base.cra_ctxsize	= sizeof(struct chacha20_ctx),
-	.base.cra_alignmask	= sizeof(u32) - 1,
 	.base.cra_module	= THIS_MODULE,
 
 	.min_keysize		= CHACHA20_KEY_SIZE,

+ 1 - 0
crypto/crc32_generic.c

@@ -133,6 +133,7 @@ static struct shash_alg alg = {
 		.cra_name		= "crc32",
 		.cra_driver_name	= "crc32-generic",
 		.cra_priority		= 100,
+		.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 		.cra_blocksize		= CHKSUM_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(u32),
 		.cra_module		= THIS_MODULE,

+ 1 - 0
crypto/crc32c_generic.c

@@ -146,6 +146,7 @@ static struct shash_alg alg = {
 		.cra_name		=	"crc32c",
 		.cra_driver_name	=	"crc32c-generic",
 		.cra_priority		=	100,
+		.cra_flags		=	CRYPTO_ALG_OPTIONAL_KEY,
 		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
 		.cra_alignmask		=	3,
 		.cra_ctxsize		=	sizeof(struct chksum_ctx),

+ 10 - 7
crypto/cryptd.c

@@ -32,7 +32,9 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 
-#define CRYPTD_MAX_CPU_QLEN 1000
+static unsigned int cryptd_max_cpu_qlen = 1000;
+module_param(cryptd_max_cpu_qlen, uint, 0);
+MODULE_PARM_DESC(cryptd_max_cpu_qlen, "Set cryptd Max queue depth");
 
 struct cryptd_cpu_queue {
 	struct crypto_queue queue;
@@ -116,6 +118,7 @@ static int cryptd_init_queue(struct cryptd_queue *queue,
 		crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
 		INIT_WORK(&cpu_queue->work, cryptd_queue_worker);
 	}
+	pr_info("cryptd: max_cpu_qlen set to %d\n", max_cpu_qlen);
 	return 0;
 }
 
@@ -893,10 +896,9 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 	if (err)
 		goto out_free_inst;
 
-	type = CRYPTO_ALG_ASYNC;
-	if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
-		type |= CRYPTO_ALG_INTERNAL;
-	inst->alg.halg.base.cra_flags = type;
+	inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC |
+		(alg->cra_flags & (CRYPTO_ALG_INTERNAL |
+				   CRYPTO_ALG_OPTIONAL_KEY));
 
 	inst->alg.halg.digestsize = salg->digestsize;
 	inst->alg.halg.statesize = salg->statesize;
@@ -911,7 +913,8 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 	inst->alg.finup  = cryptd_hash_finup_enqueue;
 	inst->alg.export = cryptd_hash_export;
 	inst->alg.import = cryptd_hash_import;
-	inst->alg.setkey = cryptd_hash_setkey;
+	if (crypto_shash_alg_has_setkey(salg))
+		inst->alg.setkey = cryptd_hash_setkey;
 	inst->alg.digest = cryptd_hash_digest_enqueue;
 
 	err = ahash_register_instance(tmpl, inst);
@@ -1372,7 +1375,7 @@ static int __init cryptd_init(void)
 {
 	int err;
 
-	err = cryptd_init_queue(&queue, CRYPTD_MAX_CPU_QLEN);
+	err = cryptd_init_queue(&queue, cryptd_max_cpu_qlen);
 	if (err)
 		return err;
 

+ 2 - 2
crypto/crypto_user.c

@@ -169,7 +169,7 @@ static int crypto_report_one(struct crypto_alg *alg,
 	ualg->cru_type = 0;
 	ualg->cru_mask = 0;
 	ualg->cru_flags = alg->cra_flags;
-	ualg->cru_refcnt = atomic_read(&alg->cra_refcnt);
+	ualg->cru_refcnt = refcount_read(&alg->cra_refcnt);
 
 	if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority))
 		goto nla_put_failure;
@@ -387,7 +387,7 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto drop_alg;
 
 	err = -EBUSY;
-	if (atomic_read(&alg->cra_refcnt) > 2)
+	if (refcount_read(&alg->cra_refcnt) > 2)
 		goto drop_alg;
 
 	err = crypto_unregister_instance((struct crypto_instance *)alg);

+ 1 - 1
crypto/ecc.c

@@ -964,7 +964,7 @@ int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey)
 	 * DRBG with a security strength of 256.
 	 */
 	if (crypto_get_default_rng())
-		err = -EFAULT;
+		return -EFAULT;
 
 	err = crypto_rng_get_bytes(crypto_default_rng, (u8 *)priv, nbytes);
 	crypto_put_default_rng();

+ 0 - 5
crypto/echainiv.c

@@ -118,8 +118,6 @@ static int echainiv_aead_create(struct crypto_template *tmpl,
 				struct rtattr **tb)
 {
 	struct aead_instance *inst;
-	struct crypto_aead_spawn *spawn;
-	struct aead_alg *alg;
 	int err;
 
 	inst = aead_geniv_alloc(tmpl, tb, 0, 0);
@@ -127,9 +125,6 @@ static int echainiv_aead_create(struct crypto_template *tmpl,
 	if (IS_ERR(inst))
 		return PTR_ERR(inst);
 
-	spawn = aead_instance_ctx(inst);
-	alg = crypto_spawn_aead_alg(spawn);
-
 	err = -EINVAL;
 	if (inst->alg.ivsize & (sizeof(u64) - 1) || !inst->alg.ivsize)
 		goto free_inst;

+ 2 - 2
crypto/gcm.c

@@ -1101,7 +1101,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm)
 	if (IS_ERR(aead))
 		return PTR_ERR(aead);
 
-	null = crypto_get_default_null_skcipher2();
+	null = crypto_get_default_null_skcipher();
 	err = PTR_ERR(null);
 	if (IS_ERR(null))
 		goto err_free_aead;
@@ -1129,7 +1129,7 @@ static void crypto_rfc4543_exit_tfm(struct crypto_aead *tfm)
 	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_aead(ctx->child);
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 }
 
 static void crypto_rfc4543_free(struct aead_instance *inst)

+ 0 - 2
crypto/gf128mul.c

@@ -160,8 +160,6 @@ void gf128mul_x8_ble(le128 *r, const le128 *x)
 {
 	u64 a = le64_to_cpu(x->a);
 	u64 b = le64_to_cpu(x->b);
-
-	/* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */
 	u64 _tt = gf128mul_table_be[a >> 56];
 
 	r->a = cpu_to_le64((a << 8) | (b >> 56));

+ 0 - 6
crypto/ghash-generic.c

@@ -56,9 +56,6 @@ static int ghash_update(struct shash_desc *desc,
 	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
 	u8 *dst = dctx->buffer;
 
-	if (!ctx->gf128)
-		return -ENOKEY;
-
 	if (dctx->bytes) {
 		int n = min(srclen, dctx->bytes);
 		u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
@@ -111,9 +108,6 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
 	u8 *buf = dctx->buffer;
 
-	if (!ctx->gf128)
-		return -ENOKEY;
-
 	ghash_flush(ctx, dctx);
 	memcpy(dst, buf, GHASH_BLOCK_SIZE);
 

+ 2 - 6
crypto/internal.h

@@ -30,9 +30,6 @@
 enum {
 	CRYPTO_MSG_ALG_REQUEST,
 	CRYPTO_MSG_ALG_REGISTER,
-	CRYPTO_MSG_ALG_UNREGISTER,
-	CRYPTO_MSG_TMPL_REGISTER,
-	CRYPTO_MSG_TMPL_UNREGISTER,
 };
 
 struct crypto_instance;
@@ -78,7 +75,6 @@ int crypto_init_compress_ops(struct crypto_tfm *tfm);
 
 struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask);
 void crypto_larval_kill(struct crypto_alg *alg);
-struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask);
 void crypto_alg_tested(const char *name, int err);
 
 void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
@@ -106,13 +102,13 @@ int crypto_type_has_alg(const char *name, const struct crypto_type *frontend,
 
 static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg)
 {
-	atomic_inc(&alg->cra_refcnt);
+	refcount_inc(&alg->cra_refcnt);
 	return alg;
 }
 
 static inline void crypto_alg_put(struct crypto_alg *alg)
 {
-	if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy)
+	if (refcount_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy)
 		alg->cra_destroy(alg);
 }
 

+ 2 - 2
crypto/keywrap.c

@@ -188,7 +188,7 @@ static int crypto_kw_decrypt(struct blkcipher_desc *desc,
 	}
 
 	/* Perform authentication check */
-	if (block.A != cpu_to_be64(0xa6a6a6a6a6a6a6a6))
+	if (block.A != cpu_to_be64(0xa6a6a6a6a6a6a6a6ULL))
 		ret = -EBADMSG;
 
 	memzero_explicit(&block, sizeof(struct crypto_kw_block));
@@ -221,7 +221,7 @@ static int crypto_kw_encrypt(struct blkcipher_desc *desc,
 	 * Place the predefined IV into block A -- for encrypt, the caller
 	 * does not need to provide an IV, but he needs to fetch the final IV.
 	 */
-	block.A = cpu_to_be64(0xa6a6a6a6a6a6a6a6);
+	block.A = cpu_to_be64(0xa6a6a6a6a6a6a6a6ULL);
 
 	/*
 	 * src scatterlist is read-only. dst scatterlist is r/w. During the

+ 5 - 6
crypto/mcryptd.c

@@ -26,7 +26,6 @@
 #include <linux/sched.h>
 #include <linux/sched/stat.h>
 #include <linux/slab.h>
-#include <linux/hardirq.h>
 
 #define MCRYPTD_MAX_CPU_QLEN 100
 #define MCRYPTD_BATCH 9
@@ -517,10 +516,9 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 	if (err)
 		goto out_free_inst;
 
-	type = CRYPTO_ALG_ASYNC;
-	if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
-		type |= CRYPTO_ALG_INTERNAL;
-	inst->alg.halg.base.cra_flags = type;
+	inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC |
+		(alg->cra_flags & (CRYPTO_ALG_INTERNAL |
+				   CRYPTO_ALG_OPTIONAL_KEY));
 
 	inst->alg.halg.digestsize = halg->digestsize;
 	inst->alg.halg.statesize = halg->statesize;
@@ -535,7 +533,8 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
 	inst->alg.finup  = mcryptd_hash_finup_enqueue;
 	inst->alg.export = mcryptd_hash_export;
 	inst->alg.import = mcryptd_hash_import;
-	inst->alg.setkey = mcryptd_hash_setkey;
+	if (crypto_hash_alg_has_setkey(halg))
+		inst->alg.setkey = mcryptd_hash_setkey;
 	inst->alg.digest = mcryptd_hash_digest_enqueue;
 
 	err = ahash_register_instance(tmpl, inst);

+ 9 - 18
crypto/poly1305_generic.c

@@ -47,17 +47,6 @@ int crypto_poly1305_init(struct shash_desc *desc)
 }
 EXPORT_SYMBOL_GPL(crypto_poly1305_init);
 
-int crypto_poly1305_setkey(struct crypto_shash *tfm,
-			   const u8 *key, unsigned int keylen)
-{
-	/* Poly1305 requires a unique key for each tag, which implies that
-	 * we can't set it on the tfm that gets accessed by multiple users
-	 * simultaneously. Instead we expect the key as the first 32 bytes in
-	 * the update() call. */
-	return -ENOTSUPP;
-}
-EXPORT_SYMBOL_GPL(crypto_poly1305_setkey);
-
 static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
 {
 	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
@@ -76,6 +65,11 @@ static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
 	dctx->s[3] = get_unaligned_le32(key + 12);
 }
 
+/*
+ * Poly1305 requires a unique key for each tag, which implies that we can't set
+ * it on the tfm that gets accessed by multiple users simultaneously. Instead we
+ * expect the key as the first 32 bytes in the update() call.
+ */
 unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
 					const u8 *src, unsigned int srclen)
 {
@@ -210,7 +204,6 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_update);
 int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 {
 	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
-	__le32 *mac = (__le32 *)dst;
 	u32 h0, h1, h2, h3, h4;
 	u32 g0, g1, g2, g3, g4;
 	u32 mask;
@@ -267,10 +260,10 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 	h3 = (h3 >> 18) | (h4 <<  8);
 
 	/* mac = (h + s) % (2^128) */
-	f = (f >> 32) + h0 + dctx->s[0]; mac[0] = cpu_to_le32(f);
-	f = (f >> 32) + h1 + dctx->s[1]; mac[1] = cpu_to_le32(f);
-	f = (f >> 32) + h2 + dctx->s[2]; mac[2] = cpu_to_le32(f);
-	f = (f >> 32) + h3 + dctx->s[3]; mac[3] = cpu_to_le32(f);
+	f = (f >> 32) + h0 + dctx->s[0]; put_unaligned_le32(f, dst +  0);
+	f = (f >> 32) + h1 + dctx->s[1]; put_unaligned_le32(f, dst +  4);
+	f = (f >> 32) + h2 + dctx->s[2]; put_unaligned_le32(f, dst +  8);
+	f = (f >> 32) + h3 + dctx->s[3]; put_unaligned_le32(f, dst + 12);
 
 	return 0;
 }
@@ -281,14 +274,12 @@ static struct shash_alg poly1305_alg = {
 	.init		= crypto_poly1305_init,
 	.update		= crypto_poly1305_update,
 	.final		= crypto_poly1305_final,
-	.setkey		= crypto_poly1305_setkey,
 	.descsize	= sizeof(struct poly1305_desc_ctx),
 	.base		= {
 		.cra_name		= "poly1305",
 		.cra_driver_name	= "poly1305-generic",
 		.cra_priority		= 100,
 		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
-		.cra_alignmask		= sizeof(u32) - 1,
 		.cra_blocksize		= POLY1305_BLOCK_SIZE,
 		.cra_module		= THIS_MODULE,
 	},

+ 1 - 1
crypto/proc.c

@@ -46,7 +46,7 @@ static int c_show(struct seq_file *m, void *p)
 	seq_printf(m, "driver       : %s\n", alg->cra_driver_name);
 	seq_printf(m, "module       : %s\n", module_name(alg->cra_module));
 	seq_printf(m, "priority     : %d\n", alg->cra_priority);
-	seq_printf(m, "refcnt       : %d\n", atomic_read(&alg->cra_refcnt));
+	seq_printf(m, "refcnt       : %u\n", refcount_read(&alg->cra_refcnt));
 	seq_printf(m, "selftest     : %s\n",
 		   (alg->cra_flags & CRYPTO_ALG_TESTED) ?
 		   "passed" : "unknown");

+ 101 - 139
crypto/salsa20_generic.c

@@ -19,49 +19,19 @@
  *
  */
 
-#include <linux/init.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/salsa20.h>
 #include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/crypto.h>
-#include <linux/types.h>
-#include <linux/bitops.h>
-#include <crypto/algapi.h>
-#include <asm/byteorder.h>
 
-#define SALSA20_IV_SIZE        8U
-#define SALSA20_MIN_KEY_SIZE  16U
-#define SALSA20_MAX_KEY_SIZE  32U
-
-/*
- * Start of code taken from D. J. Bernstein's reference implementation.
- * With some modifications and optimizations made to suit our needs.
- */
-
-/*
-salsa20-ref.c version 20051118
-D. J. Bernstein
-Public domain.
-*/
-
-#define U32TO8_LITTLE(p, v) \
-	{ (p)[0] = (v >>  0) & 0xff; (p)[1] = (v >>  8) & 0xff; \
-	  (p)[2] = (v >> 16) & 0xff; (p)[3] = (v >> 24) & 0xff; }
-#define U8TO32_LITTLE(p)   \
-	(((u32)((p)[0])      ) | ((u32)((p)[1]) <<  8) | \
-	 ((u32)((p)[2]) << 16) | ((u32)((p)[3]) << 24)   )
-
-struct salsa20_ctx
-{
-	u32 input[16];
-};
-
-static void salsa20_wordtobyte(u8 output[64], const u32 input[16])
+static void salsa20_block(u32 *state, __le32 *stream)
 {
 	u32 x[16];
 	int i;
 
-	memcpy(x, input, sizeof(x));
-	for (i = 20; i > 0; i -= 2) {
+	memcpy(x, state, sizeof(x));
+
+	for (i = 0; i < 20; i += 2) {
 		x[ 4] ^= rol32((x[ 0] + x[12]),  7);
 		x[ 8] ^= rol32((x[ 4] + x[ 0]),  9);
 		x[12] ^= rol32((x[ 8] + x[ 4]), 13);
@@ -95,145 +65,137 @@ static void salsa20_wordtobyte(u8 output[64], const u32 input[16])
 		x[14] ^= rol32((x[13] + x[12]), 13);
 		x[15] ^= rol32((x[14] + x[13]), 18);
 	}
-	for (i = 0; i < 16; ++i)
-		x[i] += input[i];
-	for (i = 0; i < 16; ++i)
-		U32TO8_LITTLE(output + 4 * i,x[i]);
-}
 
-static const char sigma[16] = "expand 32-byte k";
-static const char tau[16] = "expand 16-byte k";
+	for (i = 0; i < 16; i++)
+		stream[i] = cpu_to_le32(x[i] + state[i]);
+
+	if (++state[8] == 0)
+		state[9]++;
+}
 
-static void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k, u32 kbytes)
+static void salsa20_docrypt(u32 *state, u8 *dst, const u8 *src,
+			    unsigned int bytes)
 {
-	const char *constants;
+	__le32 stream[SALSA20_BLOCK_SIZE / sizeof(__le32)];
 
-	ctx->input[1] = U8TO32_LITTLE(k + 0);
-	ctx->input[2] = U8TO32_LITTLE(k + 4);
-	ctx->input[3] = U8TO32_LITTLE(k + 8);
-	ctx->input[4] = U8TO32_LITTLE(k + 12);
-	if (kbytes == 32) { /* recommended */
-		k += 16;
-		constants = sigma;
-	} else { /* kbytes == 16 */
-		constants = tau;
+	if (dst != src)
+		memcpy(dst, src, bytes);
+
+	while (bytes >= SALSA20_BLOCK_SIZE) {
+		salsa20_block(state, stream);
+		crypto_xor(dst, (const u8 *)stream, SALSA20_BLOCK_SIZE);
+		bytes -= SALSA20_BLOCK_SIZE;
+		dst += SALSA20_BLOCK_SIZE;
+	}
+	if (bytes) {
+		salsa20_block(state, stream);
+		crypto_xor(dst, (const u8 *)stream, bytes);
 	}
-	ctx->input[11] = U8TO32_LITTLE(k + 0);
-	ctx->input[12] = U8TO32_LITTLE(k + 4);
-	ctx->input[13] = U8TO32_LITTLE(k + 8);
-	ctx->input[14] = U8TO32_LITTLE(k + 12);
-	ctx->input[0] = U8TO32_LITTLE(constants + 0);
-	ctx->input[5] = U8TO32_LITTLE(constants + 4);
-	ctx->input[10] = U8TO32_LITTLE(constants + 8);
-	ctx->input[15] = U8TO32_LITTLE(constants + 12);
 }
 
-static void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv)
+void crypto_salsa20_init(u32 *state, const struct salsa20_ctx *ctx,
+			 const u8 *iv)
 {
-	ctx->input[6] = U8TO32_LITTLE(iv + 0);
-	ctx->input[7] = U8TO32_LITTLE(iv + 4);
-	ctx->input[8] = 0;
-	ctx->input[9] = 0;
+	memcpy(state, ctx->initial_state, sizeof(ctx->initial_state));
+	state[6] = get_unaligned_le32(iv + 0);
+	state[7] = get_unaligned_le32(iv + 4);
 }
+EXPORT_SYMBOL_GPL(crypto_salsa20_init);
 
-static void salsa20_encrypt_bytes(struct salsa20_ctx *ctx, u8 *dst,
-				  const u8 *src, unsigned int bytes)
+int crypto_salsa20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			  unsigned int keysize)
 {
-	u8 buf[64];
-
-	if (dst != src)
-		memcpy(dst, src, bytes);
-
-	while (bytes) {
-		salsa20_wordtobyte(buf, ctx->input);
-
-		ctx->input[8]++;
-		if (!ctx->input[8])
-			ctx->input[9]++;
+	static const char sigma[16] = "expand 32-byte k";
+	static const char tau[16] = "expand 16-byte k";
+	struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm);
+	const char *constants;
 
-		if (bytes <= 64) {
-			crypto_xor(dst, buf, bytes);
-			return;
-		}
+	if (keysize != SALSA20_MIN_KEY_SIZE &&
+	    keysize != SALSA20_MAX_KEY_SIZE)
+		return -EINVAL;
 
-		crypto_xor(dst, buf, 64);
-		bytes -= 64;
-		dst += 64;
+	ctx->initial_state[1] = get_unaligned_le32(key + 0);
+	ctx->initial_state[2] = get_unaligned_le32(key + 4);
+	ctx->initial_state[3] = get_unaligned_le32(key + 8);
+	ctx->initial_state[4] = get_unaligned_le32(key + 12);
+	if (keysize == 32) { /* recommended */
+		key += 16;
+		constants = sigma;
+	} else { /* keysize == 16 */
+		constants = tau;
 	}
-}
-
-/*
- * End of code taken from D. J. Bernstein's reference implementation.
- */
+	ctx->initial_state[11] = get_unaligned_le32(key + 0);
+	ctx->initial_state[12] = get_unaligned_le32(key + 4);
+	ctx->initial_state[13] = get_unaligned_le32(key + 8);
+	ctx->initial_state[14] = get_unaligned_le32(key + 12);
+	ctx->initial_state[0]  = get_unaligned_le32(constants + 0);
+	ctx->initial_state[5]  = get_unaligned_le32(constants + 4);
+	ctx->initial_state[10] = get_unaligned_le32(constants + 8);
+	ctx->initial_state[15] = get_unaligned_le32(constants + 12);
+
+	/* space for the nonce; it will be overridden for each request */
+	ctx->initial_state[6] = 0;
+	ctx->initial_state[7] = 0;
+
+	/* initial block number */
+	ctx->initial_state[8] = 0;
+	ctx->initial_state[9] = 0;
 
-static int setkey(struct crypto_tfm *tfm, const u8 *key,
-		  unsigned int keysize)
-{
-	struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm);
-	salsa20_keysetup(ctx, key, keysize);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(crypto_salsa20_setkey);
 
-static int encrypt(struct blkcipher_desc *desc,
-		   struct scatterlist *dst, struct scatterlist *src,
-		   unsigned int nbytes)
+static int salsa20_crypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	u32 state[16];
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, 64);
+	err = skcipher_walk_virt(&walk, req, true);
 
-	salsa20_ivsetup(ctx, walk.iv);
+	crypto_salsa20_init(state, ctx, walk.iv);
 
-	while (walk.nbytes >= 64) {
-		salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
-				      walk.src.virt.addr,
-				      walk.nbytes - (walk.nbytes % 64));
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64);
-	}
+	while (walk.nbytes > 0) {
+		unsigned int nbytes = walk.nbytes;
 
-	if (walk.nbytes) {
-		salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
-				      walk.src.virt.addr, walk.nbytes);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, walk.stride);
+
+		salsa20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
+				nbytes);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
 	return err;
 }
 
-static struct crypto_alg alg = {
-	.cra_name           =   "salsa20",
-	.cra_driver_name    =   "salsa20-generic",
-	.cra_priority       =   100,
-	.cra_flags          =   CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_type           =   &crypto_blkcipher_type,
-	.cra_blocksize      =   1,
-	.cra_ctxsize        =   sizeof(struct salsa20_ctx),
-	.cra_alignmask      =	3,
-	.cra_module         =   THIS_MODULE,
-	.cra_u              =   {
-		.blkcipher = {
-			.setkey         =   setkey,
-			.encrypt        =   encrypt,
-			.decrypt        =   encrypt,
-			.min_keysize    =   SALSA20_MIN_KEY_SIZE,
-			.max_keysize    =   SALSA20_MAX_KEY_SIZE,
-			.ivsize         =   SALSA20_IV_SIZE,
-		}
-	}
+static struct skcipher_alg alg = {
+	.base.cra_name		= "salsa20",
+	.base.cra_driver_name	= "salsa20-generic",
+	.base.cra_priority	= 100,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct salsa20_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= SALSA20_MIN_KEY_SIZE,
+	.max_keysize		= SALSA20_MAX_KEY_SIZE,
+	.ivsize			= SALSA20_IV_SIZE,
+	.chunksize		= SALSA20_BLOCK_SIZE,
+	.setkey			= crypto_salsa20_setkey,
+	.encrypt		= salsa20_crypt,
+	.decrypt		= salsa20_crypt,
 };
 
 static int __init salsa20_generic_mod_init(void)
 {
-	return crypto_register_alg(&alg);
+	return crypto_register_skcipher(&alg);
 }
 
 static void __exit salsa20_generic_mod_fini(void)
 {
-	crypto_unregister_alg(&alg);
+	crypto_unregister_skcipher(&alg);
 }
 
 module_init(salsa20_generic_mod_init);

+ 0 - 5
crypto/seqiv.c

@@ -144,8 +144,6 @@ static int seqiv_aead_decrypt(struct aead_request *req)
 static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct aead_instance *inst;
-	struct crypto_aead_spawn *spawn;
-	struct aead_alg *alg;
 	int err;
 
 	inst = aead_geniv_alloc(tmpl, tb, 0, 0);
@@ -153,9 +151,6 @@ static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
 	if (IS_ERR(inst))
 		return PTR_ERR(inst);
 
-	spawn = aead_instance_ctx(inst);
-	alg = crypto_spawn_aead_alg(spawn);
-
 	err = -EINVAL;
 	if (inst->alg.ivsize != sizeof(u64))
 		goto free_inst;

+ 164 - 168
crypto/sha3_generic.c

@@ -5,6 +5,7 @@
  * http://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf
  *
  * SHA-3 code by Jeff Garzik <jeff@garzik.org>
+ *               Ard Biesheuvel <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -17,12 +18,10 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <crypto/sha3.h>
-#include <asm/byteorder.h>
+#include <asm/unaligned.h>
 
 #define KECCAK_ROUNDS 24
 
-#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
-
 static const u64 keccakf_rndc[24] = {
 	0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL,
 	0x8000000080008000ULL, 0x000000000000808bULL, 0x0000000080000001ULL,
@@ -34,100 +33,133 @@ static const u64 keccakf_rndc[24] = {
 	0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
 };
 
-static const int keccakf_rotc[24] = {
-	1,  3,  6,  10, 15, 21, 28, 36, 45, 55, 2,  14,
-	27, 41, 56, 8,  25, 43, 62, 18, 39, 61, 20, 44
-};
-
-static const int keccakf_piln[24] = {
-	10, 7,  11, 17, 18, 3, 5,  16, 8,  21, 24, 4,
-	15, 23, 19, 13, 12, 2, 20, 14, 22, 9,  6,  1
-};
-
 /* update the state with given number of rounds */
 
-static void keccakf(u64 st[25])
+static void __attribute__((__optimize__("O3"))) keccakf(u64 st[25])
 {
-	int i, j, round;
-	u64 t, bc[5];
+	u64 t[5], tt, bc[5];
+	int round;
 
 	for (round = 0; round < KECCAK_ROUNDS; round++) {
 
 		/* Theta */
-		for (i = 0; i < 5; i++)
-			bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15]
-				^ st[i + 20];
-
-		for (i = 0; i < 5; i++) {
-			t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
-			for (j = 0; j < 25; j += 5)
-				st[j + i] ^= t;
-		}
+		bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
+		bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
+		bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
+		bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
+		bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
+
+		t[0] = bc[4] ^ rol64(bc[1], 1);
+		t[1] = bc[0] ^ rol64(bc[2], 1);
+		t[2] = bc[1] ^ rol64(bc[3], 1);
+		t[3] = bc[2] ^ rol64(bc[4], 1);
+		t[4] = bc[3] ^ rol64(bc[0], 1);
+
+		st[0] ^= t[0];
 
 		/* Rho Pi */
-		t = st[1];
-		for (i = 0; i < 24; i++) {
-			j = keccakf_piln[i];
-			bc[0] = st[j];
-			st[j] = ROTL64(t, keccakf_rotc[i]);
-			t = bc[0];
-		}
+		tt = st[1];
+		st[ 1] = rol64(st[ 6] ^ t[1], 44);
+		st[ 6] = rol64(st[ 9] ^ t[4], 20);
+		st[ 9] = rol64(st[22] ^ t[2], 61);
+		st[22] = rol64(st[14] ^ t[4], 39);
+		st[14] = rol64(st[20] ^ t[0], 18);
+		st[20] = rol64(st[ 2] ^ t[2], 62);
+		st[ 2] = rol64(st[12] ^ t[2], 43);
+		st[12] = rol64(st[13] ^ t[3], 25);
+		st[13] = rol64(st[19] ^ t[4],  8);
+		st[19] = rol64(st[23] ^ t[3], 56);
+		st[23] = rol64(st[15] ^ t[0], 41);
+		st[15] = rol64(st[ 4] ^ t[4], 27);
+		st[ 4] = rol64(st[24] ^ t[4], 14);
+		st[24] = rol64(st[21] ^ t[1],  2);
+		st[21] = rol64(st[ 8] ^ t[3], 55);
+		st[ 8] = rol64(st[16] ^ t[1], 45);
+		st[16] = rol64(st[ 5] ^ t[0], 36);
+		st[ 5] = rol64(st[ 3] ^ t[3], 28);
+		st[ 3] = rol64(st[18] ^ t[3], 21);
+		st[18] = rol64(st[17] ^ t[2], 15);
+		st[17] = rol64(st[11] ^ t[1], 10);
+		st[11] = rol64(st[ 7] ^ t[2],  6);
+		st[ 7] = rol64(st[10] ^ t[0],  3);
+		st[10] = rol64(    tt ^ t[1],  1);
 
 		/* Chi */
-		for (j = 0; j < 25; j += 5) {
-			for (i = 0; i < 5; i++)
-				bc[i] = st[j + i];
-			for (i = 0; i < 5; i++)
-				st[j + i] ^= (~bc[(i + 1) % 5]) &
-					     bc[(i + 2) % 5];
-		}
+		bc[ 0] = ~st[ 1] & st[ 2];
+		bc[ 1] = ~st[ 2] & st[ 3];
+		bc[ 2] = ~st[ 3] & st[ 4];
+		bc[ 3] = ~st[ 4] & st[ 0];
+		bc[ 4] = ~st[ 0] & st[ 1];
+		st[ 0] ^= bc[ 0];
+		st[ 1] ^= bc[ 1];
+		st[ 2] ^= bc[ 2];
+		st[ 3] ^= bc[ 3];
+		st[ 4] ^= bc[ 4];
+
+		bc[ 0] = ~st[ 6] & st[ 7];
+		bc[ 1] = ~st[ 7] & st[ 8];
+		bc[ 2] = ~st[ 8] & st[ 9];
+		bc[ 3] = ~st[ 9] & st[ 5];
+		bc[ 4] = ~st[ 5] & st[ 6];
+		st[ 5] ^= bc[ 0];
+		st[ 6] ^= bc[ 1];
+		st[ 7] ^= bc[ 2];
+		st[ 8] ^= bc[ 3];
+		st[ 9] ^= bc[ 4];
+
+		bc[ 0] = ~st[11] & st[12];
+		bc[ 1] = ~st[12] & st[13];
+		bc[ 2] = ~st[13] & st[14];
+		bc[ 3] = ~st[14] & st[10];
+		bc[ 4] = ~st[10] & st[11];
+		st[10] ^= bc[ 0];
+		st[11] ^= bc[ 1];
+		st[12] ^= bc[ 2];
+		st[13] ^= bc[ 3];
+		st[14] ^= bc[ 4];
+
+		bc[ 0] = ~st[16] & st[17];
+		bc[ 1] = ~st[17] & st[18];
+		bc[ 2] = ~st[18] & st[19];
+		bc[ 3] = ~st[19] & st[15];
+		bc[ 4] = ~st[15] & st[16];
+		st[15] ^= bc[ 0];
+		st[16] ^= bc[ 1];
+		st[17] ^= bc[ 2];
+		st[18] ^= bc[ 3];
+		st[19] ^= bc[ 4];
+
+		bc[ 0] = ~st[21] & st[22];
+		bc[ 1] = ~st[22] & st[23];
+		bc[ 2] = ~st[23] & st[24];
+		bc[ 3] = ~st[24] & st[20];
+		bc[ 4] = ~st[20] & st[21];
+		st[20] ^= bc[ 0];
+		st[21] ^= bc[ 1];
+		st[22] ^= bc[ 2];
+		st[23] ^= bc[ 3];
+		st[24] ^= bc[ 4];
 
 		/* Iota */
 		st[0] ^= keccakf_rndc[round];
 	}
 }
 
-static void sha3_init(struct sha3_state *sctx, unsigned int digest_sz)
-{
-	memset(sctx, 0, sizeof(*sctx));
-	sctx->md_len = digest_sz;
-	sctx->rsiz = 200 - 2 * digest_sz;
-	sctx->rsizw = sctx->rsiz / 8;
-}
-
-static int sha3_224_init(struct shash_desc *desc)
-{
-	struct sha3_state *sctx = shash_desc_ctx(desc);
-
-	sha3_init(sctx, SHA3_224_DIGEST_SIZE);
-	return 0;
-}
-
-static int sha3_256_init(struct shash_desc *desc)
+int crypto_sha3_init(struct shash_desc *desc)
 {
 	struct sha3_state *sctx = shash_desc_ctx(desc);
+	unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
 
-	sha3_init(sctx, SHA3_256_DIGEST_SIZE);
-	return 0;
-}
-
-static int sha3_384_init(struct shash_desc *desc)
-{
-	struct sha3_state *sctx = shash_desc_ctx(desc);
-
-	sha3_init(sctx, SHA3_384_DIGEST_SIZE);
-	return 0;
-}
-
-static int sha3_512_init(struct shash_desc *desc)
-{
-	struct sha3_state *sctx = shash_desc_ctx(desc);
+	sctx->rsiz = 200 - 2 * digest_size;
+	sctx->rsizw = sctx->rsiz / 8;
+	sctx->partial = 0;
 
-	sha3_init(sctx, SHA3_512_DIGEST_SIZE);
+	memset(sctx->st, 0, sizeof(sctx->st));
 	return 0;
 }
+EXPORT_SYMBOL(crypto_sha3_init);
 
-static int sha3_update(struct shash_desc *desc, const u8 *data,
+int crypto_sha3_update(struct shash_desc *desc, const u8 *data,
 		       unsigned int len)
 {
 	struct sha3_state *sctx = shash_desc_ctx(desc);
@@ -149,7 +181,7 @@ static int sha3_update(struct shash_desc *desc, const u8 *data,
 			unsigned int i;
 
 			for (i = 0; i < sctx->rsizw; i++)
-				sctx->st[i] ^= ((u64 *) src)[i];
+				sctx->st[i] ^= get_unaligned_le64(src + 8 * i);
 			keccakf(sctx->st);
 
 			done += sctx->rsiz;
@@ -163,125 +195,89 @@ static int sha3_update(struct shash_desc *desc, const u8 *data,
 
 	return 0;
 }
+EXPORT_SYMBOL(crypto_sha3_update);
 
-static int sha3_final(struct shash_desc *desc, u8 *out)
+int crypto_sha3_final(struct shash_desc *desc, u8 *out)
 {
 	struct sha3_state *sctx = shash_desc_ctx(desc);
 	unsigned int i, inlen = sctx->partial;
+	unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
+	__le64 *digest = (__le64 *)out;
 
 	sctx->buf[inlen++] = 0x06;
 	memset(sctx->buf + inlen, 0, sctx->rsiz - inlen);
 	sctx->buf[sctx->rsiz - 1] |= 0x80;
 
 	for (i = 0; i < sctx->rsizw; i++)
-		sctx->st[i] ^= ((u64 *) sctx->buf)[i];
+		sctx->st[i] ^= get_unaligned_le64(sctx->buf + 8 * i);
 
 	keccakf(sctx->st);
 
-	for (i = 0; i < sctx->rsizw; i++)
-		sctx->st[i] = cpu_to_le64(sctx->st[i]);
+	for (i = 0; i < digest_size / 8; i++)
+		put_unaligned_le64(sctx->st[i], digest++);
 
-	memcpy(out, sctx->st, sctx->md_len);
+	if (digest_size & 4)
+		put_unaligned_le32(sctx->st[i], (__le32 *)digest);
 
 	memset(sctx, 0, sizeof(*sctx));
 	return 0;
 }
-
-static struct shash_alg sha3_224 = {
-	.digestsize	=	SHA3_224_DIGEST_SIZE,
-	.init		=	sha3_224_init,
-	.update		=	sha3_update,
-	.final		=	sha3_final,
-	.descsize	=	sizeof(struct sha3_state),
-	.base		=	{
-		.cra_name	=	"sha3-224",
-		.cra_driver_name =	"sha3-224-generic",
-		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize	=	SHA3_224_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-};
-
-static struct shash_alg sha3_256 = {
-	.digestsize	=	SHA3_256_DIGEST_SIZE,
-	.init		=	sha3_256_init,
-	.update		=	sha3_update,
-	.final		=	sha3_final,
-	.descsize	=	sizeof(struct sha3_state),
-	.base		=	{
-		.cra_name	=	"sha3-256",
-		.cra_driver_name =	"sha3-256-generic",
-		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize	=	SHA3_256_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-};
-
-static struct shash_alg sha3_384 = {
-	.digestsize	=	SHA3_384_DIGEST_SIZE,
-	.init		=	sha3_384_init,
-	.update		=	sha3_update,
-	.final		=	sha3_final,
-	.descsize	=	sizeof(struct sha3_state),
-	.base		=	{
-		.cra_name	=	"sha3-384",
-		.cra_driver_name =	"sha3-384-generic",
-		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize	=	SHA3_384_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-};
-
-static struct shash_alg sha3_512 = {
-	.digestsize	=	SHA3_512_DIGEST_SIZE,
-	.init		=	sha3_512_init,
-	.update		=	sha3_update,
-	.final		=	sha3_final,
-	.descsize	=	sizeof(struct sha3_state),
-	.base		=	{
-		.cra_name	=	"sha3-512",
-		.cra_driver_name =	"sha3-512-generic",
-		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize	=	SHA3_512_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-};
+EXPORT_SYMBOL(crypto_sha3_final);
+
+static struct shash_alg algs[] = { {
+	.digestsize		= SHA3_224_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= crypto_sha3_update,
+	.final			= crypto_sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-224",
+	.base.cra_driver_name	= "sha3-224-generic",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_224_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+}, {
+	.digestsize		= SHA3_256_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= crypto_sha3_update,
+	.final			= crypto_sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-256",
+	.base.cra_driver_name	= "sha3-256-generic",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_256_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+}, {
+	.digestsize		= SHA3_384_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= crypto_sha3_update,
+	.final			= crypto_sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-384",
+	.base.cra_driver_name	= "sha3-384-generic",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_384_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+}, {
+	.digestsize		= SHA3_512_DIGEST_SIZE,
+	.init			= crypto_sha3_init,
+	.update			= crypto_sha3_update,
+	.final			= crypto_sha3_final,
+	.descsize		= sizeof(struct sha3_state),
+	.base.cra_name		= "sha3-512",
+	.base.cra_driver_name	= "sha3-512-generic",
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= SHA3_512_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+} };
 
 static int __init sha3_generic_mod_init(void)
 {
-	int ret;
-
-	ret = crypto_register_shash(&sha3_224);
-	if (ret < 0)
-		goto err_out;
-	ret = crypto_register_shash(&sha3_256);
-	if (ret < 0)
-		goto err_out_224;
-	ret = crypto_register_shash(&sha3_384);
-	if (ret < 0)
-		goto err_out_256;
-	ret = crypto_register_shash(&sha3_512);
-	if (ret < 0)
-		goto err_out_384;
-
-	return 0;
-
-err_out_384:
-	crypto_unregister_shash(&sha3_384);
-err_out_256:
-	crypto_unregister_shash(&sha3_256);
-err_out_224:
-	crypto_unregister_shash(&sha3_224);
-err_out:
-	return ret;
+	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
 }
 
 static void __exit sha3_generic_mod_fini(void)
 {
-	crypto_unregister_shash(&sha3_224);
-	crypto_unregister_shash(&sha3_256);
-	crypto_unregister_shash(&sha3_384);
-	crypto_unregister_shash(&sha3_512);
+	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
 }
 
 module_init(sha3_generic_mod_init);

+ 21 - 4
crypto/shash.c

@@ -58,11 +58,18 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
 {
 	struct shash_alg *shash = crypto_shash_alg(tfm);
 	unsigned long alignmask = crypto_shash_alignmask(tfm);
+	int err;
 
 	if ((unsigned long)key & alignmask)
-		return shash_setkey_unaligned(tfm, key, keylen);
+		err = shash_setkey_unaligned(tfm, key, keylen);
+	else
+		err = shash->setkey(tfm, key, keylen);
+
+	if (err)
+		return err;
 
-	return shash->setkey(tfm, key, keylen);
+	crypto_shash_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_shash_setkey);
 
@@ -181,6 +188,9 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
 	struct shash_alg *shash = crypto_shash_alg(tfm);
 	unsigned long alignmask = crypto_shash_alignmask(tfm);
 
+	if (crypto_shash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
+		return -ENOKEY;
+
 	if (((unsigned long)data | (unsigned long)out) & alignmask)
 		return shash_digest_unaligned(desc, data, len, out);
 
@@ -360,7 +370,8 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
 	crt->digest = shash_async_digest;
 	crt->setkey = shash_async_setkey;
 
-	crt->has_setkey = alg->setkey != shash_no_setkey;
+	crypto_ahash_set_flags(crt, crypto_shash_get_flags(shash) &
+				    CRYPTO_TFM_NEED_KEY);
 
 	if (alg->export)
 		crt->export = shash_async_export;
@@ -375,8 +386,14 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
 static int crypto_shash_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_shash *hash = __crypto_shash_cast(tfm);
+	struct shash_alg *alg = crypto_shash_alg(hash);
+
+	hash->descsize = alg->descsize;
+
+	if (crypto_shash_alg_has_setkey(alg) &&
+	    !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY))
+		crypto_shash_set_flags(hash, CRYPTO_TFM_NEED_KEY);
 
-	hash->descsize = crypto_shash_alg(hash)->descsize;
 	return 0;
 }
 

+ 1 - 3
crypto/simd.c

@@ -19,9 +19,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  */
 

+ 26 - 4
crypto/skcipher.c

@@ -598,8 +598,11 @@ static int skcipher_setkey_blkcipher(struct crypto_skcipher *tfm,
 	err = crypto_blkcipher_setkey(blkcipher, key, keylen);
 	crypto_skcipher_set_flags(tfm, crypto_blkcipher_get_flags(blkcipher) &
 				       CRYPTO_TFM_RES_MASK);
+	if (err)
+		return err;
 
-	return err;
+	crypto_skcipher_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
+	return 0;
 }
 
 static int skcipher_crypt_blkcipher(struct skcipher_request *req,
@@ -674,6 +677,9 @@ static int crypto_init_skcipher_ops_blkcipher(struct crypto_tfm *tfm)
 	skcipher->ivsize = crypto_blkcipher_ivsize(blkcipher);
 	skcipher->keysize = calg->cra_blkcipher.max_keysize;
 
+	if (skcipher->keysize)
+		crypto_skcipher_set_flags(skcipher, CRYPTO_TFM_NEED_KEY);
+
 	return 0;
 }
 
@@ -692,8 +698,11 @@ static int skcipher_setkey_ablkcipher(struct crypto_skcipher *tfm,
 	crypto_skcipher_set_flags(tfm,
 				  crypto_ablkcipher_get_flags(ablkcipher) &
 				  CRYPTO_TFM_RES_MASK);
+	if (err)
+		return err;
 
-	return err;
+	crypto_skcipher_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
+	return 0;
 }
 
 static int skcipher_crypt_ablkcipher(struct skcipher_request *req,
@@ -767,6 +776,9 @@ static int crypto_init_skcipher_ops_ablkcipher(struct crypto_tfm *tfm)
 			    sizeof(struct ablkcipher_request);
 	skcipher->keysize = calg->cra_ablkcipher.max_keysize;
 
+	if (skcipher->keysize)
+		crypto_skcipher_set_flags(skcipher, CRYPTO_TFM_NEED_KEY);
+
 	return 0;
 }
 
@@ -796,6 +808,7 @@ static int skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
 {
 	struct skcipher_alg *cipher = crypto_skcipher_alg(tfm);
 	unsigned long alignmask = crypto_skcipher_alignmask(tfm);
+	int err;
 
 	if (keylen < cipher->min_keysize || keylen > cipher->max_keysize) {
 		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
@@ -803,9 +816,15 @@ static int skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
 	}
 
 	if ((unsigned long)key & alignmask)
-		return skcipher_setkey_unaligned(tfm, key, keylen);
+		err = skcipher_setkey_unaligned(tfm, key, keylen);
+	else
+		err = cipher->setkey(tfm, key, keylen);
+
+	if (err)
+		return err;
 
-	return cipher->setkey(tfm, key, keylen);
+	crypto_skcipher_clear_flags(tfm, CRYPTO_TFM_NEED_KEY);
+	return 0;
 }
 
 static void crypto_skcipher_exit_tfm(struct crypto_tfm *tfm)
@@ -834,6 +853,9 @@ static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm)
 	skcipher->ivsize = alg->ivsize;
 	skcipher->keysize = alg->max_keysize;
 
+	if (skcipher->keysize)
+		crypto_skcipher_set_flags(skcipher, CRYPTO_TFM_NEED_KEY);
+
 	if (alg->exit)
 		skcipher->base.exit = crypto_skcipher_exit_tfm;
 

+ 980 - 105
crypto/tcrypt.c

@@ -67,6 +67,7 @@ static char *alg = NULL;
 static u32 type;
 static u32 mask;
 static int mode;
+static u32 num_mb = 8;
 static char *tvmem[TVMEMSIZE];
 
 static char *check[] = {
@@ -79,6 +80,66 @@ static char *check[] = {
 	NULL
 };
 
+static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 };
+static u32 aead_sizes[] = { 16, 64, 256, 512, 1024, 2048, 4096, 8192, 0 };
+
+#define XBUFSIZE 8
+#define MAX_IVLEN 32
+
+static int testmgr_alloc_buf(char *buf[XBUFSIZE])
+{
+	int i;
+
+	for (i = 0; i < XBUFSIZE; i++) {
+		buf[i] = (void *)__get_free_page(GFP_KERNEL);
+		if (!buf[i])
+			goto err_free_buf;
+	}
+
+	return 0;
+
+err_free_buf:
+	while (i-- > 0)
+		free_page((unsigned long)buf[i]);
+
+	return -ENOMEM;
+}
+
+static void testmgr_free_buf(char *buf[XBUFSIZE])
+{
+	int i;
+
+	for (i = 0; i < XBUFSIZE; i++)
+		free_page((unsigned long)buf[i]);
+}
+
+static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE],
+			 unsigned int buflen, const void *assoc,
+			 unsigned int aad_size)
+{
+	int np = (buflen + PAGE_SIZE - 1)/PAGE_SIZE;
+	int k, rem;
+
+	if (np > XBUFSIZE) {
+		rem = PAGE_SIZE;
+		np = XBUFSIZE;
+	} else {
+		rem = buflen % PAGE_SIZE;
+	}
+
+	sg_init_table(sg, np + 1);
+
+	sg_set_buf(&sg[0], assoc, aad_size);
+
+	if (rem)
+		np--;
+	for (k = 0; k < np; k++)
+		sg_set_buf(&sg[k + 1], xbuf[k], PAGE_SIZE);
+
+	if (rem)
+		sg_set_buf(&sg[k + 1], xbuf[k], rem);
+}
+
 static inline int do_one_aead_op(struct aead_request *req, int ret)
 {
 	struct crypto_wait *wait = req->base.data;
@@ -86,8 +147,44 @@ static inline int do_one_aead_op(struct aead_request *req, int ret)
 	return crypto_wait_req(ret, wait);
 }
 
-static int test_aead_jiffies(struct aead_request *req, int enc,
-				int blen, int secs)
+struct test_mb_aead_data {
+	struct scatterlist sg[XBUFSIZE];
+	struct scatterlist sgout[XBUFSIZE];
+	struct aead_request *req;
+	struct crypto_wait wait;
+	char *xbuf[XBUFSIZE];
+	char *xoutbuf[XBUFSIZE];
+	char *axbuf[XBUFSIZE];
+};
+
+static int do_mult_aead_op(struct test_mb_aead_data *data, int enc,
+				u32 num_mb)
+{
+	int i, rc[num_mb], err = 0;
+
+	/* Fire up a bunch of concurrent requests */
+	for (i = 0; i < num_mb; i++) {
+		if (enc == ENCRYPT)
+			rc[i] = crypto_aead_encrypt(data[i].req);
+		else
+			rc[i] = crypto_aead_decrypt(data[i].req);
+	}
+
+	/* Wait for all requests to finish */
+	for (i = 0; i < num_mb; i++) {
+		rc[i] = crypto_wait_req(rc[i], &data[i].wait);
+
+		if (rc[i]) {
+			pr_info("concurrent request %d error %d\n", i, rc[i]);
+			err = rc[i];
+		}
+	}
+
+	return err;
+}
+
+static int test_mb_aead_jiffies(struct test_mb_aead_data *data, int enc,
+				int blen, int secs, u32 num_mb)
 {
 	unsigned long start, end;
 	int bcount;
@@ -95,21 +192,18 @@ static int test_aead_jiffies(struct aead_request *req, int enc,
 
 	for (start = jiffies, end = start + secs * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
-		if (enc)
-			ret = do_one_aead_op(req, crypto_aead_encrypt(req));
-		else
-			ret = do_one_aead_op(req, crypto_aead_decrypt(req));
-
+		ret = do_mult_aead_op(data, enc, num_mb);
 		if (ret)
 			return ret;
 	}
 
-	printk("%d operations in %d seconds (%ld bytes)\n",
-	       bcount, secs, (long)bcount * blen);
+	pr_cont("%d operations in %d seconds (%ld bytes)\n",
+		bcount * num_mb, secs, (long)bcount * blen * num_mb);
 	return 0;
 }
 
-static int test_aead_cycles(struct aead_request *req, int enc, int blen)
+static int test_mb_aead_cycles(struct test_mb_aead_data *data, int enc,
+			       int blen, u32 num_mb)
 {
 	unsigned long cycles = 0;
 	int ret = 0;
@@ -117,11 +211,7 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen)
 
 	/* Warm-up run. */
 	for (i = 0; i < 4; i++) {
-		if (enc)
-			ret = do_one_aead_op(req, crypto_aead_encrypt(req));
-		else
-			ret = do_one_aead_op(req, crypto_aead_decrypt(req));
-
+		ret = do_mult_aead_op(data, enc, num_mb);
 		if (ret)
 			goto out;
 	}
@@ -131,10 +221,7 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen)
 		cycles_t start, end;
 
 		start = get_cycles();
-		if (enc)
-			ret = do_one_aead_op(req, crypto_aead_encrypt(req));
-		else
-			ret = do_one_aead_op(req, crypto_aead_decrypt(req));
+		ret = do_mult_aead_op(data, enc, num_mb);
 		end = get_cycles();
 
 		if (ret)
@@ -145,64 +232,276 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen)
 
 out:
 	if (ret == 0)
-		printk("1 operation in %lu cycles (%d bytes)\n",
-		       (cycles + 4) / 8, blen);
+		pr_cont("1 operation in %lu cycles (%d bytes)\n",
+			(cycles + 4) / (8 * num_mb), blen);
 
 	return ret;
 }
 
-static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 };
-static u32 aead_sizes[] = { 16, 64, 256, 512, 1024, 2048, 4096, 8192, 0 };
+static void test_mb_aead_speed(const char *algo, int enc, int secs,
+			       struct aead_speed_template *template,
+			       unsigned int tcount, u8 authsize,
+			       unsigned int aad_size, u8 *keysize, u32 num_mb)
+{
+	struct test_mb_aead_data *data;
+	struct crypto_aead *tfm;
+	unsigned int i, j, iv_len;
+	const char *key;
+	const char *e;
+	void *assoc;
+	u32 *b_size;
+	char *iv;
+	int ret;
 
-#define XBUFSIZE 8
-#define MAX_IVLEN 32
 
-static int testmgr_alloc_buf(char *buf[XBUFSIZE])
-{
-	int i;
+	if (aad_size >= PAGE_SIZE) {
+		pr_err("associate data length (%u) too big\n", aad_size);
+		return;
+	}
 
-	for (i = 0; i < XBUFSIZE; i++) {
-		buf[i] = (void *)__get_free_page(GFP_KERNEL);
-		if (!buf[i])
-			goto err_free_buf;
+	iv = kzalloc(MAX_IVLEN, GFP_KERNEL);
+	if (!iv)
+		return;
+
+	if (enc == ENCRYPT)
+		e = "encryption";
+	else
+		e = "decryption";
+
+	data = kcalloc(num_mb, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		goto out_free_iv;
+
+	tfm = crypto_alloc_aead(algo, 0, 0);
+	if (IS_ERR(tfm)) {
+		pr_err("failed to load transform for %s: %ld\n",
+			algo, PTR_ERR(tfm));
+		goto out_free_data;
 	}
 
-	return 0;
+	ret = crypto_aead_setauthsize(tfm, authsize);
 
-err_free_buf:
-	while (i-- > 0)
-		free_page((unsigned long)buf[i]);
+	for (i = 0; i < num_mb; ++i)
+		if (testmgr_alloc_buf(data[i].xbuf)) {
+			while (i--)
+				testmgr_free_buf(data[i].xbuf);
+			goto out_free_tfm;
+		}
 
-	return -ENOMEM;
+	for (i = 0; i < num_mb; ++i)
+		if (testmgr_alloc_buf(data[i].axbuf)) {
+			while (i--)
+				testmgr_free_buf(data[i].axbuf);
+			goto out_free_xbuf;
+		}
+
+	for (i = 0; i < num_mb; ++i)
+		if (testmgr_alloc_buf(data[i].xoutbuf)) {
+			while (i--)
+				testmgr_free_buf(data[i].xoutbuf);
+			goto out_free_axbuf;
+		}
+
+	for (i = 0; i < num_mb; ++i) {
+		data[i].req = aead_request_alloc(tfm, GFP_KERNEL);
+		if (!data[i].req) {
+			pr_err("alg: skcipher: Failed to allocate request for %s\n",
+			       algo);
+			while (i--)
+				aead_request_free(data[i].req);
+			goto out_free_xoutbuf;
+		}
+	}
+
+	for (i = 0; i < num_mb; ++i) {
+		crypto_init_wait(&data[i].wait);
+		aead_request_set_callback(data[i].req,
+					  CRYPTO_TFM_REQ_MAY_BACKLOG,
+					  crypto_req_done, &data[i].wait);
+	}
+
+	pr_info("\ntesting speed of multibuffer %s (%s) %s\n", algo,
+		get_driver_name(crypto_aead, tfm), e);
+
+	i = 0;
+	do {
+		b_size = aead_sizes;
+		do {
+			if (*b_size + authsize > XBUFSIZE * PAGE_SIZE) {
+				pr_err("template (%u) too big for buffer (%lu)\n",
+				       authsize + *b_size,
+				       XBUFSIZE * PAGE_SIZE);
+				goto out;
+			}
+
+			pr_info("test %u (%d bit key, %d byte blocks): ", i,
+				*keysize * 8, *b_size);
+
+			/* Set up tfm global state, i.e. the key */
+
+			memset(tvmem[0], 0xff, PAGE_SIZE);
+			key = tvmem[0];
+			for (j = 0; j < tcount; j++) {
+				if (template[j].klen == *keysize) {
+					key = template[j].key;
+					break;
+				}
+			}
+
+			crypto_aead_clear_flags(tfm, ~0);
+
+			ret = crypto_aead_setkey(tfm, key, *keysize);
+			if (ret) {
+				pr_err("setkey() failed flags=%x\n",
+				       crypto_aead_get_flags(tfm));
+				goto out;
+			}
+
+			iv_len = crypto_aead_ivsize(tfm);
+			if (iv_len)
+				memset(iv, 0xff, iv_len);
+
+			/* Now setup per request stuff, i.e. buffers */
+
+			for (j = 0; j < num_mb; ++j) {
+				struct test_mb_aead_data *cur = &data[j];
+
+				assoc = cur->axbuf[0];
+				memset(assoc, 0xff, aad_size);
+
+				sg_init_aead(cur->sg, cur->xbuf,
+					     *b_size + (enc ? 0 : authsize),
+					     assoc, aad_size);
+
+				sg_init_aead(cur->sgout, cur->xoutbuf,
+					     *b_size + (enc ? authsize : 0),
+					     assoc, aad_size);
+
+				aead_request_set_ad(cur->req, aad_size);
+
+				if (!enc) {
+
+					aead_request_set_crypt(cur->req,
+							       cur->sgout,
+							       cur->sg,
+							       *b_size, iv);
+					ret = crypto_aead_encrypt(cur->req);
+					ret = do_one_aead_op(cur->req, ret);
+
+					if (ret) {
+						pr_err("calculating auth failed failed (%d)\n",
+						       ret);
+						break;
+					}
+				}
+
+				aead_request_set_crypt(cur->req, cur->sg,
+						       cur->sgout, *b_size +
+						       (enc ? 0 : authsize),
+						       iv);
+
+			}
+
+			if (secs)
+				ret = test_mb_aead_jiffies(data, enc, *b_size,
+							   secs, num_mb);
+			else
+				ret = test_mb_aead_cycles(data, enc, *b_size,
+							  num_mb);
+
+			if (ret) {
+				pr_err("%s() failed return code=%d\n", e, ret);
+				break;
+			}
+			b_size++;
+			i++;
+		} while (*b_size);
+		keysize++;
+	} while (*keysize);
+
+out:
+	for (i = 0; i < num_mb; ++i)
+		aead_request_free(data[i].req);
+out_free_xoutbuf:
+	for (i = 0; i < num_mb; ++i)
+		testmgr_free_buf(data[i].xoutbuf);
+out_free_axbuf:
+	for (i = 0; i < num_mb; ++i)
+		testmgr_free_buf(data[i].axbuf);
+out_free_xbuf:
+	for (i = 0; i < num_mb; ++i)
+		testmgr_free_buf(data[i].xbuf);
+out_free_tfm:
+	crypto_free_aead(tfm);
+out_free_data:
+	kfree(data);
+out_free_iv:
+	kfree(iv);
 }
 
-static void testmgr_free_buf(char *buf[XBUFSIZE])
+static int test_aead_jiffies(struct aead_request *req, int enc,
+				int blen, int secs)
 {
-	int i;
+	unsigned long start, end;
+	int bcount;
+	int ret;
 
-	for (i = 0; i < XBUFSIZE; i++)
-		free_page((unsigned long)buf[i]);
+	for (start = jiffies, end = start + secs * HZ, bcount = 0;
+	     time_before(jiffies, end); bcount++) {
+		if (enc)
+			ret = do_one_aead_op(req, crypto_aead_encrypt(req));
+		else
+			ret = do_one_aead_op(req, crypto_aead_decrypt(req));
+
+		if (ret)
+			return ret;
+	}
+
+	printk("%d operations in %d seconds (%ld bytes)\n",
+	       bcount, secs, (long)bcount * blen);
+	return 0;
 }
 
-static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE],
-			unsigned int buflen)
+static int test_aead_cycles(struct aead_request *req, int enc, int blen)
 {
-	int np = (buflen + PAGE_SIZE - 1)/PAGE_SIZE;
-	int k, rem;
+	unsigned long cycles = 0;
+	int ret = 0;
+	int i;
 
-	if (np > XBUFSIZE) {
-		rem = PAGE_SIZE;
-		np = XBUFSIZE;
-	} else {
-		rem = buflen % PAGE_SIZE;
+	/* Warm-up run. */
+	for (i = 0; i < 4; i++) {
+		if (enc)
+			ret = do_one_aead_op(req, crypto_aead_encrypt(req));
+		else
+			ret = do_one_aead_op(req, crypto_aead_decrypt(req));
+
+		if (ret)
+			goto out;
 	}
 
-	sg_init_table(sg, np + 1);
-	np--;
-	for (k = 0; k < np; k++)
-		sg_set_buf(&sg[k + 1], xbuf[k], PAGE_SIZE);
+	/* The real thing. */
+	for (i = 0; i < 8; i++) {
+		cycles_t start, end;
+
+		start = get_cycles();
+		if (enc)
+			ret = do_one_aead_op(req, crypto_aead_encrypt(req));
+		else
+			ret = do_one_aead_op(req, crypto_aead_decrypt(req));
+		end = get_cycles();
+
+		if (ret)
+			goto out;
+
+		cycles += end - start;
+	}
+
+out:
+	if (ret == 0)
+		printk("1 operation in %lu cycles (%d bytes)\n",
+		       (cycles + 4) / 8, blen);
 
-	sg_set_buf(&sg[k + 1], xbuf[k], rem);
+	return ret;
 }
 
 static void test_aead_speed(const char *algo, int enc, unsigned int secs,
@@ -316,19 +615,37 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
 				goto out;
 			}
 
-			sg_init_aead(sg, xbuf,
-				    *b_size + (enc ? 0 : authsize));
+			sg_init_aead(sg, xbuf, *b_size + (enc ? 0 : authsize),
+				     assoc, aad_size);
 
 			sg_init_aead(sgout, xoutbuf,
-				    *b_size + (enc ? authsize : 0));
+				     *b_size + (enc ? authsize : 0), assoc,
+				     aad_size);
+
+			aead_request_set_ad(req, aad_size);
 
-			sg_set_buf(&sg[0], assoc, aad_size);
-			sg_set_buf(&sgout[0], assoc, aad_size);
+			if (!enc) {
+
+				/*
+				 * For decryption we need a proper auth so
+				 * we do the encryption path once with buffers
+				 * reversed (input <-> output) to calculate it
+				 */
+				aead_request_set_crypt(req, sgout, sg,
+						       *b_size, iv);
+				ret = do_one_aead_op(req,
+						     crypto_aead_encrypt(req));
+
+				if (ret) {
+					pr_err("calculating auth failed failed (%d)\n",
+					       ret);
+					break;
+				}
+			}
 
 			aead_request_set_crypt(req, sg, sgout,
 					       *b_size + (enc ? 0 : authsize),
 					       iv);
-			aead_request_set_ad(req, aad_size);
 
 			if (secs)
 				ret = test_aead_jiffies(req, enc, *b_size,
@@ -381,24 +698,98 @@ static inline int do_one_ahash_op(struct ahash_request *req, int ret)
 }
 
 struct test_mb_ahash_data {
-	struct scatterlist sg[TVMEMSIZE];
+	struct scatterlist sg[XBUFSIZE];
 	char result[64];
 	struct ahash_request *req;
 	struct crypto_wait wait;
 	char *xbuf[XBUFSIZE];
 };
 
-static void test_mb_ahash_speed(const char *algo, unsigned int sec,
-				struct hash_speed *speed)
+static inline int do_mult_ahash_op(struct test_mb_ahash_data *data, u32 num_mb)
+{
+	int i, rc[num_mb], err = 0;
+
+	/* Fire up a bunch of concurrent requests */
+	for (i = 0; i < num_mb; i++)
+		rc[i] = crypto_ahash_digest(data[i].req);
+
+	/* Wait for all requests to finish */
+	for (i = 0; i < num_mb; i++) {
+		rc[i] = crypto_wait_req(rc[i], &data[i].wait);
+
+		if (rc[i]) {
+			pr_info("concurrent request %d error %d\n", i, rc[i]);
+			err = rc[i];
+		}
+	}
+
+	return err;
+}
+
+static int test_mb_ahash_jiffies(struct test_mb_ahash_data *data, int blen,
+				 int secs, u32 num_mb)
+{
+	unsigned long start, end;
+	int bcount;
+	int ret;
+
+	for (start = jiffies, end = start + secs * HZ, bcount = 0;
+	     time_before(jiffies, end); bcount++) {
+		ret = do_mult_ahash_op(data, num_mb);
+		if (ret)
+			return ret;
+	}
+
+	pr_cont("%d operations in %d seconds (%ld bytes)\n",
+		bcount * num_mb, secs, (long)bcount * blen * num_mb);
+	return 0;
+}
+
+static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
+				u32 num_mb)
+{
+	unsigned long cycles = 0;
+	int ret = 0;
+	int i;
+
+	/* Warm-up run. */
+	for (i = 0; i < 4; i++) {
+		ret = do_mult_ahash_op(data, num_mb);
+		if (ret)
+			goto out;
+	}
+
+	/* The real thing. */
+	for (i = 0; i < 8; i++) {
+		cycles_t start, end;
+
+		start = get_cycles();
+		ret = do_mult_ahash_op(data, num_mb);
+		end = get_cycles();
+
+		if (ret)
+			goto out;
+
+		cycles += end - start;
+	}
+
+out:
+	if (ret == 0)
+		pr_cont("1 operation in %lu cycles (%d bytes)\n",
+			(cycles + 4) / (8 * num_mb), blen);
+
+	return ret;
+}
+
+static void test_mb_ahash_speed(const char *algo, unsigned int secs,
+				struct hash_speed *speed, u32 num_mb)
 {
 	struct test_mb_ahash_data *data;
 	struct crypto_ahash *tfm;
-	unsigned long start, end;
-	unsigned long cycles;
 	unsigned int i, j, k;
 	int ret;
 
-	data = kzalloc(sizeof(*data) * 8, GFP_KERNEL);
+	data = kcalloc(num_mb, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return;
 
@@ -409,7 +800,7 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec,
 		goto free_data;
 	}
 
-	for (i = 0; i < 8; ++i) {
+	for (i = 0; i < num_mb; ++i) {
 		if (testmgr_alloc_buf(data[i].xbuf))
 			goto out;
 
@@ -424,7 +815,12 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec,
 
 		ahash_request_set_callback(data[i].req, 0, crypto_req_done,
 					   &data[i].wait);
-		test_hash_sg_init(data[i].sg);
+
+		sg_init_table(data[i].sg, XBUFSIZE);
+		for (j = 0; j < XBUFSIZE; j++) {
+			sg_set_buf(data[i].sg + j, data[i].xbuf[j], PAGE_SIZE);
+			memset(data[i].xbuf[j], 0xff, PAGE_SIZE);
+		}
 	}
 
 	pr_info("\ntesting speed of multibuffer %s (%s)\n", algo,
@@ -435,16 +831,16 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec,
 		if (speed[i].blen != speed[i].plen)
 			continue;
 
-		if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) {
+		if (speed[i].blen > XBUFSIZE * PAGE_SIZE) {
 			pr_err("template (%u) too big for tvmem (%lu)\n",
-			       speed[i].blen, TVMEMSIZE * PAGE_SIZE);
+			       speed[i].blen, XBUFSIZE * PAGE_SIZE);
 			goto out;
 		}
 
 		if (speed[i].klen)
 			crypto_ahash_setkey(tfm, tvmem[0], speed[i].klen);
 
-		for (k = 0; k < 8; k++)
+		for (k = 0; k < num_mb; k++)
 			ahash_request_set_crypt(data[k].req, data[k].sg,
 						data[k].result, speed[i].blen);
 
@@ -453,34 +849,12 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec,
 			i, speed[i].blen, speed[i].plen,
 			speed[i].blen / speed[i].plen);
 
-		start = get_cycles();
-
-		for (k = 0; k < 8; k++) {
-			ret = crypto_ahash_digest(data[k].req);
-			if (ret == -EINPROGRESS) {
-				ret = 0;
-				continue;
-			}
-
-			if (ret)
-				break;
-
-			crypto_req_done(&data[k].req->base, 0);
-		}
-
-		for (j = 0; j < k; j++) {
-			struct crypto_wait *wait = &data[j].wait;
-			int wait_ret;
-
-			wait_ret = crypto_wait_req(-EINPROGRESS, wait);
-			if (wait_ret)
-				ret = wait_ret;
-		}
+		if (secs)
+			ret = test_mb_ahash_jiffies(data, speed[i].blen, secs,
+						    num_mb);
+		else
+			ret = test_mb_ahash_cycles(data, speed[i].blen, num_mb);
 
-		end = get_cycles();
-		cycles = end - start;
-		pr_cont("%6lu cycles/operation, %4lu cycles/byte\n",
-			cycles, cycles / (8 * speed[i].blen));
 
 		if (ret) {
 			pr_err("At least one hashing failed ret=%d\n", ret);
@@ -489,10 +863,10 @@ static void test_mb_ahash_speed(const char *algo, unsigned int sec,
 	}
 
 out:
-	for (k = 0; k < 8; ++k)
+	for (k = 0; k < num_mb; ++k)
 		ahash_request_free(data[k].req);
 
-	for (k = 0; k < 8; ++k)
+	for (k = 0; k < num_mb; ++k)
 		testmgr_free_buf(data[k].xbuf);
 
 	crypto_free_ahash(tfm);
@@ -736,6 +1110,254 @@ static void test_hash_speed(const char *algo, unsigned int secs,
 	return test_ahash_speed_common(algo, secs, speed, CRYPTO_ALG_ASYNC);
 }
 
+struct test_mb_skcipher_data {
+	struct scatterlist sg[XBUFSIZE];
+	struct skcipher_request *req;
+	struct crypto_wait wait;
+	char *xbuf[XBUFSIZE];
+};
+
+static int do_mult_acipher_op(struct test_mb_skcipher_data *data, int enc,
+				u32 num_mb)
+{
+	int i, rc[num_mb], err = 0;
+
+	/* Fire up a bunch of concurrent requests */
+	for (i = 0; i < num_mb; i++) {
+		if (enc == ENCRYPT)
+			rc[i] = crypto_skcipher_encrypt(data[i].req);
+		else
+			rc[i] = crypto_skcipher_decrypt(data[i].req);
+	}
+
+	/* Wait for all requests to finish */
+	for (i = 0; i < num_mb; i++) {
+		rc[i] = crypto_wait_req(rc[i], &data[i].wait);
+
+		if (rc[i]) {
+			pr_info("concurrent request %d error %d\n", i, rc[i]);
+			err = rc[i];
+		}
+	}
+
+	return err;
+}
+
+static int test_mb_acipher_jiffies(struct test_mb_skcipher_data *data, int enc,
+				int blen, int secs, u32 num_mb)
+{
+	unsigned long start, end;
+	int bcount;
+	int ret;
+
+	for (start = jiffies, end = start + secs * HZ, bcount = 0;
+	     time_before(jiffies, end); bcount++) {
+		ret = do_mult_acipher_op(data, enc, num_mb);
+		if (ret)
+			return ret;
+	}
+
+	pr_cont("%d operations in %d seconds (%ld bytes)\n",
+		bcount * num_mb, secs, (long)bcount * blen * num_mb);
+	return 0;
+}
+
+static int test_mb_acipher_cycles(struct test_mb_skcipher_data *data, int enc,
+			       int blen, u32 num_mb)
+{
+	unsigned long cycles = 0;
+	int ret = 0;
+	int i;
+
+	/* Warm-up run. */
+	for (i = 0; i < 4; i++) {
+		ret = do_mult_acipher_op(data, enc, num_mb);
+		if (ret)
+			goto out;
+	}
+
+	/* The real thing. */
+	for (i = 0; i < 8; i++) {
+		cycles_t start, end;
+
+		start = get_cycles();
+		ret = do_mult_acipher_op(data, enc, num_mb);
+		end = get_cycles();
+
+		if (ret)
+			goto out;
+
+		cycles += end - start;
+	}
+
+out:
+	if (ret == 0)
+		pr_cont("1 operation in %lu cycles (%d bytes)\n",
+			(cycles + 4) / (8 * num_mb), blen);
+
+	return ret;
+}
+
+static void test_mb_skcipher_speed(const char *algo, int enc, int secs,
+				   struct cipher_speed_template *template,
+				   unsigned int tcount, u8 *keysize, u32 num_mb)
+{
+	struct test_mb_skcipher_data *data;
+	struct crypto_skcipher *tfm;
+	unsigned int i, j, iv_len;
+	const char *key;
+	const char *e;
+	u32 *b_size;
+	char iv[128];
+	int ret;
+
+	if (enc == ENCRYPT)
+		e = "encryption";
+	else
+		e = "decryption";
+
+	data = kcalloc(num_mb, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return;
+
+	tfm = crypto_alloc_skcipher(algo, 0, 0);
+	if (IS_ERR(tfm)) {
+		pr_err("failed to load transform for %s: %ld\n",
+			algo, PTR_ERR(tfm));
+		goto out_free_data;
+	}
+
+	for (i = 0; i < num_mb; ++i)
+		if (testmgr_alloc_buf(data[i].xbuf)) {
+			while (i--)
+				testmgr_free_buf(data[i].xbuf);
+			goto out_free_tfm;
+		}
+
+
+	for (i = 0; i < num_mb; ++i)
+		if (testmgr_alloc_buf(data[i].xbuf)) {
+			while (i--)
+				testmgr_free_buf(data[i].xbuf);
+			goto out_free_tfm;
+		}
+
+
+	for (i = 0; i < num_mb; ++i) {
+		data[i].req = skcipher_request_alloc(tfm, GFP_KERNEL);
+		if (!data[i].req) {
+			pr_err("alg: skcipher: Failed to allocate request for %s\n",
+			       algo);
+			while (i--)
+				skcipher_request_free(data[i].req);
+			goto out_free_xbuf;
+		}
+	}
+
+	for (i = 0; i < num_mb; ++i) {
+		skcipher_request_set_callback(data[i].req,
+					      CRYPTO_TFM_REQ_MAY_BACKLOG,
+					      crypto_req_done, &data[i].wait);
+		crypto_init_wait(&data[i].wait);
+	}
+
+	pr_info("\ntesting speed of multibuffer %s (%s) %s\n", algo,
+		get_driver_name(crypto_skcipher, tfm), e);
+
+	i = 0;
+	do {
+		b_size = block_sizes;
+		do {
+			if (*b_size > XBUFSIZE * PAGE_SIZE) {
+				pr_err("template (%u) too big for buffer (%lu)\n",
+				       *b_size, XBUFSIZE * PAGE_SIZE);
+				goto out;
+			}
+
+			pr_info("test %u (%d bit key, %d byte blocks): ", i,
+				*keysize * 8, *b_size);
+
+			/* Set up tfm global state, i.e. the key */
+
+			memset(tvmem[0], 0xff, PAGE_SIZE);
+			key = tvmem[0];
+			for (j = 0; j < tcount; j++) {
+				if (template[j].klen == *keysize) {
+					key = template[j].key;
+					break;
+				}
+			}
+
+			crypto_skcipher_clear_flags(tfm, ~0);
+
+			ret = crypto_skcipher_setkey(tfm, key, *keysize);
+			if (ret) {
+				pr_err("setkey() failed flags=%x\n",
+				       crypto_skcipher_get_flags(tfm));
+				goto out;
+			}
+
+			iv_len = crypto_skcipher_ivsize(tfm);
+			if (iv_len)
+				memset(&iv, 0xff, iv_len);
+
+			/* Now setup per request stuff, i.e. buffers */
+
+			for (j = 0; j < num_mb; ++j) {
+				struct test_mb_skcipher_data *cur = &data[j];
+				unsigned int k = *b_size;
+				unsigned int pages = DIV_ROUND_UP(k, PAGE_SIZE);
+				unsigned int p = 0;
+
+				sg_init_table(cur->sg, pages);
+
+				while (k > PAGE_SIZE) {
+					sg_set_buf(cur->sg + p, cur->xbuf[p],
+						   PAGE_SIZE);
+					memset(cur->xbuf[p], 0xff, PAGE_SIZE);
+					p++;
+					k -= PAGE_SIZE;
+				}
+
+				sg_set_buf(cur->sg + p, cur->xbuf[p], k);
+				memset(cur->xbuf[p], 0xff, k);
+
+				skcipher_request_set_crypt(cur->req, cur->sg,
+							   cur->sg, *b_size,
+							   iv);
+			}
+
+			if (secs)
+				ret = test_mb_acipher_jiffies(data, enc,
+							      *b_size, secs,
+							      num_mb);
+			else
+				ret = test_mb_acipher_cycles(data, enc,
+							     *b_size, num_mb);
+
+			if (ret) {
+				pr_err("%s() failed flags=%x\n", e,
+				       crypto_skcipher_get_flags(tfm));
+				break;
+			}
+			b_size++;
+			i++;
+		} while (*b_size);
+		keysize++;
+	} while (*keysize);
+
+out:
+	for (i = 0; i < num_mb; ++i)
+		skcipher_request_free(data[i].req);
+out_free_xbuf:
+	for (i = 0; i < num_mb; ++i)
+		testmgr_free_buf(data[i].xbuf);
+out_free_tfm:
+	crypto_free_skcipher(tfm);
+out_free_data:
+	kfree(data);
+}
+
 static inline int do_one_acipher_op(struct skcipher_request *req, int ret)
 {
 	struct crypto_wait *wait = req->base.data;
@@ -1557,16 +2179,24 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 				NULL, 0, 16, 16, aead_speed_template_20);
 		test_aead_speed("gcm(aes)", ENCRYPT, sec,
 				NULL, 0, 16, 8, speed_template_16_24_32);
+		test_aead_speed("rfc4106(gcm(aes))", DECRYPT, sec,
+				NULL, 0, 16, 16, aead_speed_template_20);
+		test_aead_speed("gcm(aes)", DECRYPT, sec,
+				NULL, 0, 16, 8, speed_template_16_24_32);
 		break;
 
 	case 212:
 		test_aead_speed("rfc4309(ccm(aes))", ENCRYPT, sec,
 				NULL, 0, 16, 16, aead_speed_template_19);
+		test_aead_speed("rfc4309(ccm(aes))", DECRYPT, sec,
+				NULL, 0, 16, 16, aead_speed_template_19);
 		break;
 
 	case 213:
 		test_aead_speed("rfc7539esp(chacha20,poly1305)", ENCRYPT, sec,
 				NULL, 0, 16, 8, aead_speed_template_36);
+		test_aead_speed("rfc7539esp(chacha20,poly1305)", DECRYPT, sec,
+				NULL, 0, 16, 8, aead_speed_template_36);
 		break;
 
 	case 214:
@@ -1574,6 +2204,33 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 				  speed_template_32);
 		break;
 
+	case 215:
+		test_mb_aead_speed("rfc4106(gcm(aes))", ENCRYPT, sec, NULL,
+				   0, 16, 16, aead_speed_template_20, num_mb);
+		test_mb_aead_speed("gcm(aes)", ENCRYPT, sec, NULL, 0, 16, 8,
+				   speed_template_16_24_32, num_mb);
+		test_mb_aead_speed("rfc4106(gcm(aes))", DECRYPT, sec, NULL,
+				   0, 16, 16, aead_speed_template_20, num_mb);
+		test_mb_aead_speed("gcm(aes)", DECRYPT, sec, NULL, 0, 16, 8,
+				   speed_template_16_24_32, num_mb);
+		break;
+
+	case 216:
+		test_mb_aead_speed("rfc4309(ccm(aes))", ENCRYPT, sec, NULL, 0,
+				   16, 16, aead_speed_template_19, num_mb);
+		test_mb_aead_speed("rfc4309(ccm(aes))", DECRYPT, sec, NULL, 0,
+				   16, 16, aead_speed_template_19, num_mb);
+		break;
+
+	case 217:
+		test_mb_aead_speed("rfc7539esp(chacha20,poly1305)", ENCRYPT,
+				   sec, NULL, 0, 16, 8, aead_speed_template_36,
+				   num_mb);
+		test_mb_aead_speed("rfc7539esp(chacha20,poly1305)", DECRYPT,
+				   sec, NULL, 0, 16, 8, aead_speed_template_36,
+				   num_mb);
+		break;
+
 	case 300:
 		if (alg) {
 			test_hash_speed(alg, sec, generic_hash_speed_template);
@@ -1778,19 +2435,23 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 		if (mode > 400 && mode < 500) break;
 		/* fall through */
 	case 422:
-		test_mb_ahash_speed("sha1", sec, generic_hash_speed_template);
+		test_mb_ahash_speed("sha1", sec, generic_hash_speed_template,
+				    num_mb);
 		if (mode > 400 && mode < 500) break;
 		/* fall through */
 	case 423:
-		test_mb_ahash_speed("sha256", sec, generic_hash_speed_template);
+		test_mb_ahash_speed("sha256", sec, generic_hash_speed_template,
+				    num_mb);
 		if (mode > 400 && mode < 500) break;
 		/* fall through */
 	case 424:
-		test_mb_ahash_speed("sha512", sec, generic_hash_speed_template);
+		test_mb_ahash_speed("sha512", sec, generic_hash_speed_template,
+				    num_mb);
 		if (mode > 400 && mode < 500) break;
 		/* fall through */
 	case 425:
-		test_mb_ahash_speed("sm3", sec, generic_hash_speed_template);
+		test_mb_ahash_speed("sm3", sec, generic_hash_speed_template,
+				    num_mb);
 		if (mode > 400 && mode < 500) break;
 		/* fall through */
 	case 499:
@@ -2008,6 +2669,218 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 				   speed_template_8_32);
 		break;
 
+	case 600:
+		test_mb_skcipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ecb(aes)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("cbc(aes)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("cbc(aes)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("lrw(aes)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_40_48, num_mb);
+		test_mb_skcipher_speed("lrw(aes)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_40_48, num_mb);
+		test_mb_skcipher_speed("xts(aes)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		test_mb_skcipher_speed("xts(aes)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		test_mb_skcipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ctr(aes)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ctr(aes)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("cfb(aes)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("cfb(aes)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ofb(aes)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ofb(aes)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("rfc3686(ctr(aes))", ENCRYPT, sec, NULL,
+				       0, speed_template_20_28_36, num_mb);
+		test_mb_skcipher_speed("rfc3686(ctr(aes))", DECRYPT, sec, NULL,
+				       0, speed_template_20_28_36, num_mb);
+		break;
+
+	case 601:
+		test_mb_skcipher_speed("ecb(des3_ede)", ENCRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		test_mb_skcipher_speed("ecb(des3_ede)", DECRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		test_mb_skcipher_speed("cbc(des3_ede)", ENCRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		test_mb_skcipher_speed("cbc(des3_ede)", DECRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		test_mb_skcipher_speed("cfb(des3_ede)", ENCRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		test_mb_skcipher_speed("cfb(des3_ede)", DECRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		test_mb_skcipher_speed("ofb(des3_ede)", ENCRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		test_mb_skcipher_speed("ofb(des3_ede)", DECRYPT, sec,
+				       des3_speed_template, DES3_SPEED_VECTORS,
+				       speed_template_24, num_mb);
+		break;
+
+	case 602:
+		test_mb_skcipher_speed("ecb(des)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		test_mb_skcipher_speed("ecb(des)", DECRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		test_mb_skcipher_speed("cbc(des)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		test_mb_skcipher_speed("cbc(des)", DECRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		test_mb_skcipher_speed("cfb(des)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		test_mb_skcipher_speed("cfb(des)", DECRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		test_mb_skcipher_speed("ofb(des)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		test_mb_skcipher_speed("ofb(des)", DECRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		break;
+
+	case 603:
+		test_mb_skcipher_speed("ecb(serpent)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ecb(serpent)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("cbc(serpent)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("cbc(serpent)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ctr(serpent)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ctr(serpent)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("lrw(serpent)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_48, num_mb);
+		test_mb_skcipher_speed("lrw(serpent)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_48, num_mb);
+		test_mb_skcipher_speed("xts(serpent)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		test_mb_skcipher_speed("xts(serpent)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		break;
+
+	case 604:
+		test_mb_skcipher_speed("ecb(twofish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ecb(twofish)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("cbc(twofish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("cbc(twofish)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ctr(twofish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("ctr(twofish)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_24_32, num_mb);
+		test_mb_skcipher_speed("lrw(twofish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_40_48, num_mb);
+		test_mb_skcipher_speed("lrw(twofish)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_40_48, num_mb);
+		test_mb_skcipher_speed("xts(twofish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_48_64, num_mb);
+		test_mb_skcipher_speed("xts(twofish)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_48_64, num_mb);
+		break;
+
+	case 605:
+		test_mb_skcipher_speed("ecb(arc4)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8, num_mb);
+		break;
+
+	case 606:
+		test_mb_skcipher_speed("ecb(cast5)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8_16, num_mb);
+		test_mb_skcipher_speed("ecb(cast5)", DECRYPT, sec, NULL, 0,
+				       speed_template_8_16, num_mb);
+		test_mb_skcipher_speed("cbc(cast5)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8_16, num_mb);
+		test_mb_skcipher_speed("cbc(cast5)", DECRYPT, sec, NULL, 0,
+				       speed_template_8_16, num_mb);
+		test_mb_skcipher_speed("ctr(cast5)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8_16, num_mb);
+		test_mb_skcipher_speed("ctr(cast5)", DECRYPT, sec, NULL, 0,
+				       speed_template_8_16, num_mb);
+		break;
+
+	case 607:
+		test_mb_skcipher_speed("ecb(cast6)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ecb(cast6)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("cbc(cast6)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("cbc(cast6)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ctr(cast6)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ctr(cast6)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("lrw(cast6)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_48, num_mb);
+		test_mb_skcipher_speed("lrw(cast6)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_48, num_mb);
+		test_mb_skcipher_speed("xts(cast6)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		test_mb_skcipher_speed("xts(cast6)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		break;
+
+	case 608:
+		test_mb_skcipher_speed("ecb(camellia)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ecb(camellia)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("cbc(camellia)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("cbc(camellia)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ctr(camellia)", ENCRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("ctr(camellia)", DECRYPT, sec, NULL, 0,
+				       speed_template_16_32, num_mb);
+		test_mb_skcipher_speed("lrw(camellia)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_48, num_mb);
+		test_mb_skcipher_speed("lrw(camellia)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_48, num_mb);
+		test_mb_skcipher_speed("xts(camellia)", ENCRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		test_mb_skcipher_speed("xts(camellia)", DECRYPT, sec, NULL, 0,
+				       speed_template_32_64, num_mb);
+		break;
+
+	case 609:
+		test_mb_skcipher_speed("ecb(blowfish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8_32, num_mb);
+		test_mb_skcipher_speed("ecb(blowfish)", DECRYPT, sec, NULL, 0,
+				       speed_template_8_32, num_mb);
+		test_mb_skcipher_speed("cbc(blowfish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8_32, num_mb);
+		test_mb_skcipher_speed("cbc(blowfish)", DECRYPT, sec, NULL, 0,
+				       speed_template_8_32, num_mb);
+		test_mb_skcipher_speed("ctr(blowfish)", ENCRYPT, sec, NULL, 0,
+				       speed_template_8_32, num_mb);
+		test_mb_skcipher_speed("ctr(blowfish)", DECRYPT, sec, NULL, 0,
+				       speed_template_8_32, num_mb);
+		break;
+
 	case 1000:
 		test_available();
 		break;
@@ -2069,6 +2942,8 @@ module_param(mode, int, 0);
 module_param(sec, uint, 0);
 MODULE_PARM_DESC(sec, "Length in seconds of speed tests "
 		      "(defaults to zero which uses CPU cycles instead)");
+module_param(num_mb, uint, 0000);
+MODULE_PARM_DESC(num_mb, "Number of concurrent requests to be used in mb speed tests (defaults to 8)");
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Quick & dirty crypto testing module");

+ 40 - 1
crypto/testmgr.c

@@ -177,6 +177,18 @@ static void testmgr_free_buf(char *buf[XBUFSIZE])
 		free_page((unsigned long)buf[i]);
 }
 
+static int ahash_guard_result(char *result, char c, int size)
+{
+	int i;
+
+	for (i = 0; i < size; i++) {
+		if (result[i] != c)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int ahash_partial_update(struct ahash_request **preq,
 	struct crypto_ahash *tfm, const struct hash_testvec *template,
 	void *hash_buff, int k, int temp, struct scatterlist *sg,
@@ -185,7 +197,8 @@ static int ahash_partial_update(struct ahash_request **preq,
 	char *state;
 	struct ahash_request *req;
 	int statesize, ret = -EINVAL;
-	const char guard[] = { 0x00, 0xba, 0xad, 0x00 };
+	static const unsigned char guard[] = { 0x00, 0xba, 0xad, 0x00 };
+	int digestsize = crypto_ahash_digestsize(tfm);
 
 	req = *preq;
 	statesize = crypto_ahash_statesize(
@@ -196,12 +209,19 @@ static int ahash_partial_update(struct ahash_request **preq,
 		goto out_nostate;
 	}
 	memcpy(state + statesize, guard, sizeof(guard));
+	memset(result, 1, digestsize);
 	ret = crypto_ahash_export(req, state);
 	WARN_ON(memcmp(state + statesize, guard, sizeof(guard)));
 	if (ret) {
 		pr_err("alg: hash: Failed to export() for %s\n", algo);
 		goto out;
 	}
+	ret = ahash_guard_result(result, 1, digestsize);
+	if (ret) {
+		pr_err("alg: hash: Failed, export used req->result for %s\n",
+		       algo);
+		goto out;
+	}
 	ahash_request_free(req);
 	req = ahash_request_alloc(tfm, GFP_KERNEL);
 	if (!req) {
@@ -221,6 +241,12 @@ static int ahash_partial_update(struct ahash_request **preq,
 		pr_err("alg: hash: Failed to import() for %s\n", algo);
 		goto out;
 	}
+	ret = ahash_guard_result(result, 1, digestsize);
+	if (ret) {
+		pr_err("alg: hash: Failed, import used req->result for %s\n",
+		       algo);
+		goto out;
+	}
 	ret = crypto_wait_req(crypto_ahash_update(req), wait);
 	if (ret)
 		goto out;
@@ -316,18 +342,31 @@ static int __test_hash(struct crypto_ahash *tfm,
 				goto out;
 			}
 		} else {
+			memset(result, 1, digest_size);
 			ret = crypto_wait_req(crypto_ahash_init(req), &wait);
 			if (ret) {
 				pr_err("alg: hash: init failed on test %d "
 				       "for %s: ret=%d\n", j, algo, -ret);
 				goto out;
 			}
+			ret = ahash_guard_result(result, 1, digest_size);
+			if (ret) {
+				pr_err("alg: hash: init failed on test %d "
+				       "for %s: used req->result\n", j, algo);
+				goto out;
+			}
 			ret = crypto_wait_req(crypto_ahash_update(req), &wait);
 			if (ret) {
 				pr_err("alg: hash: update failed on test %d "
 				       "for %s: ret=%d\n", j, algo, -ret);
 				goto out;
 			}
+			ret = ahash_guard_result(result, 1, digest_size);
+			if (ret) {
+				pr_err("alg: hash: update failed on test %d "
+				       "for %s: used req->result\n", j, algo);
+				goto out;
+			}
 			ret = crypto_wait_req(crypto_ahash_final(req), &wait);
 			if (ret) {
 				pr_err("alg: hash: final failed on test %d "

+ 550 - 0
crypto/testmgr.h

@@ -1052,6 +1052,142 @@ static const struct hash_testvec sha3_224_tv_template[] = {
 				"\xc9\xfd\x55\x74\x49\x44\x79\xba"
 				"\x5c\x7e\x7a\xb7\x6e\xf2\x64\xea"
 				"\xd0\xfc\xce\x33",
+		.np	= 2,
+		.tap	= { 28, 28 },
+	}, {
+		.plaintext = "\x08\x9f\x13\xaa\x41\xd8\x4c\xe3"
+			     "\x7a\x11\x85\x1c\xb3\x27\xbe\x55"
+			     "\xec\x60\xf7\x8e\x02\x99\x30\xc7"
+			     "\x3b\xd2\x69\x00\x74\x0b\xa2\x16"
+			     "\xad\x44\xdb\x4f\xe6\x7d\x14\x88"
+			     "\x1f\xb6\x2a\xc1\x58\xef\x63\xfa"
+			     "\x91\x05\x9c\x33\xca\x3e\xd5\x6c"
+			     "\x03\x77\x0e\xa5\x19\xb0\x47\xde"
+			     "\x52\xe9\x80\x17\x8b\x22\xb9\x2d"
+			     "\xc4\x5b\xf2\x66\xfd\x94\x08\x9f"
+			     "\x36\xcd\x41\xd8\x6f\x06\x7a\x11"
+			     "\xa8\x1c\xb3\x4a\xe1\x55\xec\x83"
+			     "\x1a\x8e\x25\xbc\x30\xc7\x5e\xf5"
+			     "\x69\x00\x97\x0b\xa2\x39\xd0\x44"
+			     "\xdb\x72\x09\x7d\x14\xab\x1f\xb6"
+			     "\x4d\xe4\x58\xef\x86\x1d\x91\x28"
+			     "\xbf\x33\xca\x61\xf8\x6c\x03\x9a"
+			     "\x0e\xa5\x3c\xd3\x47\xde\x75\x0c"
+			     "\x80\x17\xae\x22\xb9\x50\xe7\x5b"
+			     "\xf2\x89\x20\x94\x2b\xc2\x36\xcd"
+			     "\x64\xfb\x6f\x06\x9d\x11\xa8\x3f"
+			     "\xd6\x4a\xe1\x78\x0f\x83\x1a\xb1"
+			     "\x25\xbc\x53\xea\x5e\xf5\x8c\x00"
+			     "\x97\x2e\xc5\x39\xd0\x67\xfe\x72"
+			     "\x09\xa0\x14\xab\x42\xd9\x4d\xe4"
+			     "\x7b\x12\x86\x1d\xb4\x28\xbf\x56"
+			     "\xed\x61\xf8\x8f\x03\x9a\x31\xc8"
+			     "\x3c\xd3\x6a\x01\x75\x0c\xa3\x17"
+			     "\xae\x45\xdc\x50\xe7\x7e\x15\x89"
+			     "\x20\xb7\x2b\xc2\x59\xf0\x64\xfb"
+			     "\x92\x06\x9d\x34\xcb\x3f\xd6\x6d"
+			     "\x04\x78\x0f\xa6\x1a\xb1\x48\xdf"
+			     "\x53\xea\x81\x18\x8c\x23\xba\x2e"
+			     "\xc5\x5c\xf3\x67\xfe\x95\x09\xa0"
+			     "\x37\xce\x42\xd9\x70\x07\x7b\x12"
+			     "\xa9\x1d\xb4\x4b\xe2\x56\xed\x84"
+			     "\x1b\x8f\x26\xbd\x31\xc8\x5f\xf6"
+			     "\x6a\x01\x98\x0c\xa3\x3a\xd1\x45"
+			     "\xdc\x73\x0a\x7e\x15\xac\x20\xb7"
+			     "\x4e\xe5\x59\xf0\x87\x1e\x92\x29"
+			     "\xc0\x34\xcb\x62\xf9\x6d\x04\x9b"
+			     "\x0f\xa6\x3d\xd4\x48\xdf\x76\x0d"
+			     "\x81\x18\xaf\x23\xba\x51\xe8\x5c"
+			     "\xf3\x8a\x21\x95\x2c\xc3\x37\xce"
+			     "\x65\xfc\x70\x07\x9e\x12\xa9\x40"
+			     "\xd7\x4b\xe2\x79\x10\x84\x1b\xb2"
+			     "\x26\xbd\x54\xeb\x5f\xf6\x8d\x01"
+			     "\x98\x2f\xc6\x3a\xd1\x68\xff\x73"
+			     "\x0a\xa1\x15\xac\x43\xda\x4e\xe5"
+			     "\x7c\x13\x87\x1e\xb5\x29\xc0\x57"
+			     "\xee\x62\xf9\x90\x04\x9b\x32\xc9"
+			     "\x3d\xd4\x6b\x02\x76\x0d\xa4\x18"
+			     "\xaf\x46\xdd\x51\xe8\x7f\x16\x8a"
+			     "\x21\xb8\x2c\xc3\x5a\xf1\x65\xfc"
+			     "\x93\x07\x9e\x35\xcc\x40\xd7\x6e"
+			     "\x05\x79\x10\xa7\x1b\xb2\x49\xe0"
+			     "\x54\xeb\x82\x19\x8d\x24\xbb\x2f"
+			     "\xc6\x5d\xf4\x68\xff\x96\x0a\xa1"
+			     "\x38\xcf\x43\xda\x71\x08\x7c\x13"
+			     "\xaa\x1e\xb5\x4c\xe3\x57\xee\x85"
+			     "\x1c\x90\x27\xbe\x32\xc9\x60\xf7"
+			     "\x6b\x02\x99\x0d\xa4\x3b\xd2\x46"
+			     "\xdd\x74\x0b\x7f\x16\xad\x21\xb8"
+			     "\x4f\xe6\x5a\xf1\x88\x1f\x93\x2a"
+			     "\xc1\x35\xcc\x63\xfa\x6e\x05\x9c"
+			     "\x10\xa7\x3e\xd5\x49\xe0\x77\x0e"
+			     "\x82\x19\xb0\x24\xbb\x52\xe9\x5d"
+			     "\xf4\x8b\x22\x96\x2d\xc4\x38\xcf"
+			     "\x66\xfd\x71\x08\x9f\x13\xaa\x41"
+			     "\xd8\x4c\xe3\x7a\x11\x85\x1c\xb3"
+			     "\x27\xbe\x55\xec\x60\xf7\x8e\x02"
+			     "\x99\x30\xc7\x3b\xd2\x69\x00\x74"
+			     "\x0b\xa2\x16\xad\x44\xdb\x4f\xe6"
+			     "\x7d\x14\x88\x1f\xb6\x2a\xc1\x58"
+			     "\xef\x63\xfa\x91\x05\x9c\x33\xca"
+			     "\x3e\xd5\x6c\x03\x77\x0e\xa5\x19"
+			     "\xb0\x47\xde\x52\xe9\x80\x17\x8b"
+			     "\x22\xb9\x2d\xc4\x5b\xf2\x66\xfd"
+			     "\x94\x08\x9f\x36\xcd\x41\xd8\x6f"
+			     "\x06\x7a\x11\xa8\x1c\xb3\x4a\xe1"
+			     "\x55\xec\x83\x1a\x8e\x25\xbc\x30"
+			     "\xc7\x5e\xf5\x69\x00\x97\x0b\xa2"
+			     "\x39\xd0\x44\xdb\x72\x09\x7d\x14"
+			     "\xab\x1f\xb6\x4d\xe4\x58\xef\x86"
+			     "\x1d\x91\x28\xbf\x33\xca\x61\xf8"
+			     "\x6c\x03\x9a\x0e\xa5\x3c\xd3\x47"
+			     "\xde\x75\x0c\x80\x17\xae\x22\xb9"
+			     "\x50\xe7\x5b\xf2\x89\x20\x94\x2b"
+			     "\xc2\x36\xcd\x64\xfb\x6f\x06\x9d"
+			     "\x11\xa8\x3f\xd6\x4a\xe1\x78\x0f"
+			     "\x83\x1a\xb1\x25\xbc\x53\xea\x5e"
+			     "\xf5\x8c\x00\x97\x2e\xc5\x39\xd0"
+			     "\x67\xfe\x72\x09\xa0\x14\xab\x42"
+			     "\xd9\x4d\xe4\x7b\x12\x86\x1d\xb4"
+			     "\x28\xbf\x56\xed\x61\xf8\x8f\x03"
+			     "\x9a\x31\xc8\x3c\xd3\x6a\x01\x75"
+			     "\x0c\xa3\x17\xae\x45\xdc\x50\xe7"
+			     "\x7e\x15\x89\x20\xb7\x2b\xc2\x59"
+			     "\xf0\x64\xfb\x92\x06\x9d\x34\xcb"
+			     "\x3f\xd6\x6d\x04\x78\x0f\xa6\x1a"
+			     "\xb1\x48\xdf\x53\xea\x81\x18\x8c"
+			     "\x23\xba\x2e\xc5\x5c\xf3\x67\xfe"
+			     "\x95\x09\xa0\x37\xce\x42\xd9\x70"
+			     "\x07\x7b\x12\xa9\x1d\xb4\x4b\xe2"
+			     "\x56\xed\x84\x1b\x8f\x26\xbd\x31"
+			     "\xc8\x5f\xf6\x6a\x01\x98\x0c\xa3"
+			     "\x3a\xd1\x45\xdc\x73\x0a\x7e\x15"
+			     "\xac\x20\xb7\x4e\xe5\x59\xf0\x87"
+			     "\x1e\x92\x29\xc0\x34\xcb\x62\xf9"
+			     "\x6d\x04\x9b\x0f\xa6\x3d\xd4\x48"
+			     "\xdf\x76\x0d\x81\x18\xaf\x23\xba"
+			     "\x51\xe8\x5c\xf3\x8a\x21\x95\x2c"
+			     "\xc3\x37\xce\x65\xfc\x70\x07\x9e"
+			     "\x12\xa9\x40\xd7\x4b\xe2\x79\x10"
+			     "\x84\x1b\xb2\x26\xbd\x54\xeb\x5f"
+			     "\xf6\x8d\x01\x98\x2f\xc6\x3a\xd1"
+			     "\x68\xff\x73\x0a\xa1\x15\xac\x43"
+			     "\xda\x4e\xe5\x7c\x13\x87\x1e\xb5"
+			     "\x29\xc0\x57\xee\x62\xf9\x90\x04"
+			     "\x9b\x32\xc9\x3d\xd4\x6b\x02\x76"
+			     "\x0d\xa4\x18\xaf\x46\xdd\x51\xe8"
+			     "\x7f\x16\x8a\x21\xb8\x2c\xc3\x5a"
+			     "\xf1\x65\xfc\x93\x07\x9e\x35\xcc"
+			     "\x40\xd7\x6e\x05\x79\x10\xa7\x1b"
+			     "\xb2\x49\xe0\x54\xeb\x82\x19\x8d"
+			     "\x24\xbb\x2f\xc6\x5d\xf4\x68\xff"
+			     "\x96\x0a\xa1\x38\xcf\x43\xda\x71"
+			     "\x08\x7c\x13\xaa\x1e\xb5\x4c",
+		.psize     = 1023,
+		.digest    = "\x7d\x0f\x2f\xb7\x65\x3b\xa7\x26"
+			     "\xc3\x88\x20\x71\x15\x06\xe8\x2d"
+			     "\xa3\x92\x44\xab\x3e\xe7\xff\x86"
+			     "\xb6\x79\x10\x72",
 	},
 };
 
@@ -1077,6 +1213,142 @@ static const struct hash_testvec sha3_256_tv_template[] = {
 				"\x49\x10\x03\x76\xa8\x23\x5e\x2c"
 				"\x82\xe1\xb9\x99\x8a\x99\x9e\x21"
 				"\xdb\x32\xdd\x97\x49\x6d\x33\x76",
+		.np	= 2,
+		.tap	= { 28, 28 },
+	}, {
+		.plaintext = "\x08\x9f\x13\xaa\x41\xd8\x4c\xe3"
+			     "\x7a\x11\x85\x1c\xb3\x27\xbe\x55"
+			     "\xec\x60\xf7\x8e\x02\x99\x30\xc7"
+			     "\x3b\xd2\x69\x00\x74\x0b\xa2\x16"
+			     "\xad\x44\xdb\x4f\xe6\x7d\x14\x88"
+			     "\x1f\xb6\x2a\xc1\x58\xef\x63\xfa"
+			     "\x91\x05\x9c\x33\xca\x3e\xd5\x6c"
+			     "\x03\x77\x0e\xa5\x19\xb0\x47\xde"
+			     "\x52\xe9\x80\x17\x8b\x22\xb9\x2d"
+			     "\xc4\x5b\xf2\x66\xfd\x94\x08\x9f"
+			     "\x36\xcd\x41\xd8\x6f\x06\x7a\x11"
+			     "\xa8\x1c\xb3\x4a\xe1\x55\xec\x83"
+			     "\x1a\x8e\x25\xbc\x30\xc7\x5e\xf5"
+			     "\x69\x00\x97\x0b\xa2\x39\xd0\x44"
+			     "\xdb\x72\x09\x7d\x14\xab\x1f\xb6"
+			     "\x4d\xe4\x58\xef\x86\x1d\x91\x28"
+			     "\xbf\x33\xca\x61\xf8\x6c\x03\x9a"
+			     "\x0e\xa5\x3c\xd3\x47\xde\x75\x0c"
+			     "\x80\x17\xae\x22\xb9\x50\xe7\x5b"
+			     "\xf2\x89\x20\x94\x2b\xc2\x36\xcd"
+			     "\x64\xfb\x6f\x06\x9d\x11\xa8\x3f"
+			     "\xd6\x4a\xe1\x78\x0f\x83\x1a\xb1"
+			     "\x25\xbc\x53\xea\x5e\xf5\x8c\x00"
+			     "\x97\x2e\xc5\x39\xd0\x67\xfe\x72"
+			     "\x09\xa0\x14\xab\x42\xd9\x4d\xe4"
+			     "\x7b\x12\x86\x1d\xb4\x28\xbf\x56"
+			     "\xed\x61\xf8\x8f\x03\x9a\x31\xc8"
+			     "\x3c\xd3\x6a\x01\x75\x0c\xa3\x17"
+			     "\xae\x45\xdc\x50\xe7\x7e\x15\x89"
+			     "\x20\xb7\x2b\xc2\x59\xf0\x64\xfb"
+			     "\x92\x06\x9d\x34\xcb\x3f\xd6\x6d"
+			     "\x04\x78\x0f\xa6\x1a\xb1\x48\xdf"
+			     "\x53\xea\x81\x18\x8c\x23\xba\x2e"
+			     "\xc5\x5c\xf3\x67\xfe\x95\x09\xa0"
+			     "\x37\xce\x42\xd9\x70\x07\x7b\x12"
+			     "\xa9\x1d\xb4\x4b\xe2\x56\xed\x84"
+			     "\x1b\x8f\x26\xbd\x31\xc8\x5f\xf6"
+			     "\x6a\x01\x98\x0c\xa3\x3a\xd1\x45"
+			     "\xdc\x73\x0a\x7e\x15\xac\x20\xb7"
+			     "\x4e\xe5\x59\xf0\x87\x1e\x92\x29"
+			     "\xc0\x34\xcb\x62\xf9\x6d\x04\x9b"
+			     "\x0f\xa6\x3d\xd4\x48\xdf\x76\x0d"
+			     "\x81\x18\xaf\x23\xba\x51\xe8\x5c"
+			     "\xf3\x8a\x21\x95\x2c\xc3\x37\xce"
+			     "\x65\xfc\x70\x07\x9e\x12\xa9\x40"
+			     "\xd7\x4b\xe2\x79\x10\x84\x1b\xb2"
+			     "\x26\xbd\x54\xeb\x5f\xf6\x8d\x01"
+			     "\x98\x2f\xc6\x3a\xd1\x68\xff\x73"
+			     "\x0a\xa1\x15\xac\x43\xda\x4e\xe5"
+			     "\x7c\x13\x87\x1e\xb5\x29\xc0\x57"
+			     "\xee\x62\xf9\x90\x04\x9b\x32\xc9"
+			     "\x3d\xd4\x6b\x02\x76\x0d\xa4\x18"
+			     "\xaf\x46\xdd\x51\xe8\x7f\x16\x8a"
+			     "\x21\xb8\x2c\xc3\x5a\xf1\x65\xfc"
+			     "\x93\x07\x9e\x35\xcc\x40\xd7\x6e"
+			     "\x05\x79\x10\xa7\x1b\xb2\x49\xe0"
+			     "\x54\xeb\x82\x19\x8d\x24\xbb\x2f"
+			     "\xc6\x5d\xf4\x68\xff\x96\x0a\xa1"
+			     "\x38\xcf\x43\xda\x71\x08\x7c\x13"
+			     "\xaa\x1e\xb5\x4c\xe3\x57\xee\x85"
+			     "\x1c\x90\x27\xbe\x32\xc9\x60\xf7"
+			     "\x6b\x02\x99\x0d\xa4\x3b\xd2\x46"
+			     "\xdd\x74\x0b\x7f\x16\xad\x21\xb8"
+			     "\x4f\xe6\x5a\xf1\x88\x1f\x93\x2a"
+			     "\xc1\x35\xcc\x63\xfa\x6e\x05\x9c"
+			     "\x10\xa7\x3e\xd5\x49\xe0\x77\x0e"
+			     "\x82\x19\xb0\x24\xbb\x52\xe9\x5d"
+			     "\xf4\x8b\x22\x96\x2d\xc4\x38\xcf"
+			     "\x66\xfd\x71\x08\x9f\x13\xaa\x41"
+			     "\xd8\x4c\xe3\x7a\x11\x85\x1c\xb3"
+			     "\x27\xbe\x55\xec\x60\xf7\x8e\x02"
+			     "\x99\x30\xc7\x3b\xd2\x69\x00\x74"
+			     "\x0b\xa2\x16\xad\x44\xdb\x4f\xe6"
+			     "\x7d\x14\x88\x1f\xb6\x2a\xc1\x58"
+			     "\xef\x63\xfa\x91\x05\x9c\x33\xca"
+			     "\x3e\xd5\x6c\x03\x77\x0e\xa5\x19"
+			     "\xb0\x47\xde\x52\xe9\x80\x17\x8b"
+			     "\x22\xb9\x2d\xc4\x5b\xf2\x66\xfd"
+			     "\x94\x08\x9f\x36\xcd\x41\xd8\x6f"
+			     "\x06\x7a\x11\xa8\x1c\xb3\x4a\xe1"
+			     "\x55\xec\x83\x1a\x8e\x25\xbc\x30"
+			     "\xc7\x5e\xf5\x69\x00\x97\x0b\xa2"
+			     "\x39\xd0\x44\xdb\x72\x09\x7d\x14"
+			     "\xab\x1f\xb6\x4d\xe4\x58\xef\x86"
+			     "\x1d\x91\x28\xbf\x33\xca\x61\xf8"
+			     "\x6c\x03\x9a\x0e\xa5\x3c\xd3\x47"
+			     "\xde\x75\x0c\x80\x17\xae\x22\xb9"
+			     "\x50\xe7\x5b\xf2\x89\x20\x94\x2b"
+			     "\xc2\x36\xcd\x64\xfb\x6f\x06\x9d"
+			     "\x11\xa8\x3f\xd6\x4a\xe1\x78\x0f"
+			     "\x83\x1a\xb1\x25\xbc\x53\xea\x5e"
+			     "\xf5\x8c\x00\x97\x2e\xc5\x39\xd0"
+			     "\x67\xfe\x72\x09\xa0\x14\xab\x42"
+			     "\xd9\x4d\xe4\x7b\x12\x86\x1d\xb4"
+			     "\x28\xbf\x56\xed\x61\xf8\x8f\x03"
+			     "\x9a\x31\xc8\x3c\xd3\x6a\x01\x75"
+			     "\x0c\xa3\x17\xae\x45\xdc\x50\xe7"
+			     "\x7e\x15\x89\x20\xb7\x2b\xc2\x59"
+			     "\xf0\x64\xfb\x92\x06\x9d\x34\xcb"
+			     "\x3f\xd6\x6d\x04\x78\x0f\xa6\x1a"
+			     "\xb1\x48\xdf\x53\xea\x81\x18\x8c"
+			     "\x23\xba\x2e\xc5\x5c\xf3\x67\xfe"
+			     "\x95\x09\xa0\x37\xce\x42\xd9\x70"
+			     "\x07\x7b\x12\xa9\x1d\xb4\x4b\xe2"
+			     "\x56\xed\x84\x1b\x8f\x26\xbd\x31"
+			     "\xc8\x5f\xf6\x6a\x01\x98\x0c\xa3"
+			     "\x3a\xd1\x45\xdc\x73\x0a\x7e\x15"
+			     "\xac\x20\xb7\x4e\xe5\x59\xf0\x87"
+			     "\x1e\x92\x29\xc0\x34\xcb\x62\xf9"
+			     "\x6d\x04\x9b\x0f\xa6\x3d\xd4\x48"
+			     "\xdf\x76\x0d\x81\x18\xaf\x23\xba"
+			     "\x51\xe8\x5c\xf3\x8a\x21\x95\x2c"
+			     "\xc3\x37\xce\x65\xfc\x70\x07\x9e"
+			     "\x12\xa9\x40\xd7\x4b\xe2\x79\x10"
+			     "\x84\x1b\xb2\x26\xbd\x54\xeb\x5f"
+			     "\xf6\x8d\x01\x98\x2f\xc6\x3a\xd1"
+			     "\x68\xff\x73\x0a\xa1\x15\xac\x43"
+			     "\xda\x4e\xe5\x7c\x13\x87\x1e\xb5"
+			     "\x29\xc0\x57\xee\x62\xf9\x90\x04"
+			     "\x9b\x32\xc9\x3d\xd4\x6b\x02\x76"
+			     "\x0d\xa4\x18\xaf\x46\xdd\x51\xe8"
+			     "\x7f\x16\x8a\x21\xb8\x2c\xc3\x5a"
+			     "\xf1\x65\xfc\x93\x07\x9e\x35\xcc"
+			     "\x40\xd7\x6e\x05\x79\x10\xa7\x1b"
+			     "\xb2\x49\xe0\x54\xeb\x82\x19\x8d"
+			     "\x24\xbb\x2f\xc6\x5d\xf4\x68\xff"
+			     "\x96\x0a\xa1\x38\xcf\x43\xda\x71"
+			     "\x08\x7c\x13\xaa\x1e\xb5\x4c",
+		.psize     = 1023,
+		.digest    = "\xde\x41\x04\xbd\xda\xda\xd9\x71"
+			     "\xf7\xfa\x80\xf5\xea\x11\x03\xb1"
+			     "\x3b\x6a\xbc\x5f\xb9\x66\x26\xf7"
+			     "\x8a\x97\xbb\xf2\x07\x08\x38\x30",
 	},
 };
 
@@ -1109,6 +1381,144 @@ static const struct hash_testvec sha3_384_tv_template[] = {
 				"\x9b\xfd\xbc\x32\xb9\xd4\xad\x5a"
 				"\xa0\x4a\x1f\x07\x6e\x62\xfe\xa1"
 				"\x9e\xef\x51\xac\xd0\x65\x7c\x22",
+		.np	= 2,
+		.tap	= { 28, 28 },
+	}, {
+		.plaintext = "\x08\x9f\x13\xaa\x41\xd8\x4c\xe3"
+			     "\x7a\x11\x85\x1c\xb3\x27\xbe\x55"
+			     "\xec\x60\xf7\x8e\x02\x99\x30\xc7"
+			     "\x3b\xd2\x69\x00\x74\x0b\xa2\x16"
+			     "\xad\x44\xdb\x4f\xe6\x7d\x14\x88"
+			     "\x1f\xb6\x2a\xc1\x58\xef\x63\xfa"
+			     "\x91\x05\x9c\x33\xca\x3e\xd5\x6c"
+			     "\x03\x77\x0e\xa5\x19\xb0\x47\xde"
+			     "\x52\xe9\x80\x17\x8b\x22\xb9\x2d"
+			     "\xc4\x5b\xf2\x66\xfd\x94\x08\x9f"
+			     "\x36\xcd\x41\xd8\x6f\x06\x7a\x11"
+			     "\xa8\x1c\xb3\x4a\xe1\x55\xec\x83"
+			     "\x1a\x8e\x25\xbc\x30\xc7\x5e\xf5"
+			     "\x69\x00\x97\x0b\xa2\x39\xd0\x44"
+			     "\xdb\x72\x09\x7d\x14\xab\x1f\xb6"
+			     "\x4d\xe4\x58\xef\x86\x1d\x91\x28"
+			     "\xbf\x33\xca\x61\xf8\x6c\x03\x9a"
+			     "\x0e\xa5\x3c\xd3\x47\xde\x75\x0c"
+			     "\x80\x17\xae\x22\xb9\x50\xe7\x5b"
+			     "\xf2\x89\x20\x94\x2b\xc2\x36\xcd"
+			     "\x64\xfb\x6f\x06\x9d\x11\xa8\x3f"
+			     "\xd6\x4a\xe1\x78\x0f\x83\x1a\xb1"
+			     "\x25\xbc\x53\xea\x5e\xf5\x8c\x00"
+			     "\x97\x2e\xc5\x39\xd0\x67\xfe\x72"
+			     "\x09\xa0\x14\xab\x42\xd9\x4d\xe4"
+			     "\x7b\x12\x86\x1d\xb4\x28\xbf\x56"
+			     "\xed\x61\xf8\x8f\x03\x9a\x31\xc8"
+			     "\x3c\xd3\x6a\x01\x75\x0c\xa3\x17"
+			     "\xae\x45\xdc\x50\xe7\x7e\x15\x89"
+			     "\x20\xb7\x2b\xc2\x59\xf0\x64\xfb"
+			     "\x92\x06\x9d\x34\xcb\x3f\xd6\x6d"
+			     "\x04\x78\x0f\xa6\x1a\xb1\x48\xdf"
+			     "\x53\xea\x81\x18\x8c\x23\xba\x2e"
+			     "\xc5\x5c\xf3\x67\xfe\x95\x09\xa0"
+			     "\x37\xce\x42\xd9\x70\x07\x7b\x12"
+			     "\xa9\x1d\xb4\x4b\xe2\x56\xed\x84"
+			     "\x1b\x8f\x26\xbd\x31\xc8\x5f\xf6"
+			     "\x6a\x01\x98\x0c\xa3\x3a\xd1\x45"
+			     "\xdc\x73\x0a\x7e\x15\xac\x20\xb7"
+			     "\x4e\xe5\x59\xf0\x87\x1e\x92\x29"
+			     "\xc0\x34\xcb\x62\xf9\x6d\x04\x9b"
+			     "\x0f\xa6\x3d\xd4\x48\xdf\x76\x0d"
+			     "\x81\x18\xaf\x23\xba\x51\xe8\x5c"
+			     "\xf3\x8a\x21\x95\x2c\xc3\x37\xce"
+			     "\x65\xfc\x70\x07\x9e\x12\xa9\x40"
+			     "\xd7\x4b\xe2\x79\x10\x84\x1b\xb2"
+			     "\x26\xbd\x54\xeb\x5f\xf6\x8d\x01"
+			     "\x98\x2f\xc6\x3a\xd1\x68\xff\x73"
+			     "\x0a\xa1\x15\xac\x43\xda\x4e\xe5"
+			     "\x7c\x13\x87\x1e\xb5\x29\xc0\x57"
+			     "\xee\x62\xf9\x90\x04\x9b\x32\xc9"
+			     "\x3d\xd4\x6b\x02\x76\x0d\xa4\x18"
+			     "\xaf\x46\xdd\x51\xe8\x7f\x16\x8a"
+			     "\x21\xb8\x2c\xc3\x5a\xf1\x65\xfc"
+			     "\x93\x07\x9e\x35\xcc\x40\xd7\x6e"
+			     "\x05\x79\x10\xa7\x1b\xb2\x49\xe0"
+			     "\x54\xeb\x82\x19\x8d\x24\xbb\x2f"
+			     "\xc6\x5d\xf4\x68\xff\x96\x0a\xa1"
+			     "\x38\xcf\x43\xda\x71\x08\x7c\x13"
+			     "\xaa\x1e\xb5\x4c\xe3\x57\xee\x85"
+			     "\x1c\x90\x27\xbe\x32\xc9\x60\xf7"
+			     "\x6b\x02\x99\x0d\xa4\x3b\xd2\x46"
+			     "\xdd\x74\x0b\x7f\x16\xad\x21\xb8"
+			     "\x4f\xe6\x5a\xf1\x88\x1f\x93\x2a"
+			     "\xc1\x35\xcc\x63\xfa\x6e\x05\x9c"
+			     "\x10\xa7\x3e\xd5\x49\xe0\x77\x0e"
+			     "\x82\x19\xb0\x24\xbb\x52\xe9\x5d"
+			     "\xf4\x8b\x22\x96\x2d\xc4\x38\xcf"
+			     "\x66\xfd\x71\x08\x9f\x13\xaa\x41"
+			     "\xd8\x4c\xe3\x7a\x11\x85\x1c\xb3"
+			     "\x27\xbe\x55\xec\x60\xf7\x8e\x02"
+			     "\x99\x30\xc7\x3b\xd2\x69\x00\x74"
+			     "\x0b\xa2\x16\xad\x44\xdb\x4f\xe6"
+			     "\x7d\x14\x88\x1f\xb6\x2a\xc1\x58"
+			     "\xef\x63\xfa\x91\x05\x9c\x33\xca"
+			     "\x3e\xd5\x6c\x03\x77\x0e\xa5\x19"
+			     "\xb0\x47\xde\x52\xe9\x80\x17\x8b"
+			     "\x22\xb9\x2d\xc4\x5b\xf2\x66\xfd"
+			     "\x94\x08\x9f\x36\xcd\x41\xd8\x6f"
+			     "\x06\x7a\x11\xa8\x1c\xb3\x4a\xe1"
+			     "\x55\xec\x83\x1a\x8e\x25\xbc\x30"
+			     "\xc7\x5e\xf5\x69\x00\x97\x0b\xa2"
+			     "\x39\xd0\x44\xdb\x72\x09\x7d\x14"
+			     "\xab\x1f\xb6\x4d\xe4\x58\xef\x86"
+			     "\x1d\x91\x28\xbf\x33\xca\x61\xf8"
+			     "\x6c\x03\x9a\x0e\xa5\x3c\xd3\x47"
+			     "\xde\x75\x0c\x80\x17\xae\x22\xb9"
+			     "\x50\xe7\x5b\xf2\x89\x20\x94\x2b"
+			     "\xc2\x36\xcd\x64\xfb\x6f\x06\x9d"
+			     "\x11\xa8\x3f\xd6\x4a\xe1\x78\x0f"
+			     "\x83\x1a\xb1\x25\xbc\x53\xea\x5e"
+			     "\xf5\x8c\x00\x97\x2e\xc5\x39\xd0"
+			     "\x67\xfe\x72\x09\xa0\x14\xab\x42"
+			     "\xd9\x4d\xe4\x7b\x12\x86\x1d\xb4"
+			     "\x28\xbf\x56\xed\x61\xf8\x8f\x03"
+			     "\x9a\x31\xc8\x3c\xd3\x6a\x01\x75"
+			     "\x0c\xa3\x17\xae\x45\xdc\x50\xe7"
+			     "\x7e\x15\x89\x20\xb7\x2b\xc2\x59"
+			     "\xf0\x64\xfb\x92\x06\x9d\x34\xcb"
+			     "\x3f\xd6\x6d\x04\x78\x0f\xa6\x1a"
+			     "\xb1\x48\xdf\x53\xea\x81\x18\x8c"
+			     "\x23\xba\x2e\xc5\x5c\xf3\x67\xfe"
+			     "\x95\x09\xa0\x37\xce\x42\xd9\x70"
+			     "\x07\x7b\x12\xa9\x1d\xb4\x4b\xe2"
+			     "\x56\xed\x84\x1b\x8f\x26\xbd\x31"
+			     "\xc8\x5f\xf6\x6a\x01\x98\x0c\xa3"
+			     "\x3a\xd1\x45\xdc\x73\x0a\x7e\x15"
+			     "\xac\x20\xb7\x4e\xe5\x59\xf0\x87"
+			     "\x1e\x92\x29\xc0\x34\xcb\x62\xf9"
+			     "\x6d\x04\x9b\x0f\xa6\x3d\xd4\x48"
+			     "\xdf\x76\x0d\x81\x18\xaf\x23\xba"
+			     "\x51\xe8\x5c\xf3\x8a\x21\x95\x2c"
+			     "\xc3\x37\xce\x65\xfc\x70\x07\x9e"
+			     "\x12\xa9\x40\xd7\x4b\xe2\x79\x10"
+			     "\x84\x1b\xb2\x26\xbd\x54\xeb\x5f"
+			     "\xf6\x8d\x01\x98\x2f\xc6\x3a\xd1"
+			     "\x68\xff\x73\x0a\xa1\x15\xac\x43"
+			     "\xda\x4e\xe5\x7c\x13\x87\x1e\xb5"
+			     "\x29\xc0\x57\xee\x62\xf9\x90\x04"
+			     "\x9b\x32\xc9\x3d\xd4\x6b\x02\x76"
+			     "\x0d\xa4\x18\xaf\x46\xdd\x51\xe8"
+			     "\x7f\x16\x8a\x21\xb8\x2c\xc3\x5a"
+			     "\xf1\x65\xfc\x93\x07\x9e\x35\xcc"
+			     "\x40\xd7\x6e\x05\x79\x10\xa7\x1b"
+			     "\xb2\x49\xe0\x54\xeb\x82\x19\x8d"
+			     "\x24\xbb\x2f\xc6\x5d\xf4\x68\xff"
+			     "\x96\x0a\xa1\x38\xcf\x43\xda\x71"
+			     "\x08\x7c\x13\xaa\x1e\xb5\x4c",
+		.psize     = 1023,
+		.digest    = "\x1b\x19\x4d\x8f\xd5\x36\x87\x71"
+			     "\xcf\xca\x30\x85\x9b\xc1\x25\xc7"
+			     "\x00\xcb\x73\x8a\x8e\xd4\xfe\x2b"
+			     "\x1a\xa2\xdc\x2e\x41\xfd\x52\x51"
+			     "\xd2\x21\xae\x2d\xc7\xae\x8c\x40"
+			     "\xb9\xe6\x56\x48\x03\xcd\x88\x6b",
 	},
 };
 
@@ -1147,6 +1557,146 @@ static const struct hash_testvec sha3_512_tv_template[] = {
 				"\xba\x1b\x0d\x8d\xc7\x8c\x08\x63"
 				"\x46\xb5\x33\xb4\x9c\x03\x0d\x99"
 				"\xa2\x7d\xaf\x11\x39\xd6\xe7\x5e",
+		.np	= 2,
+		.tap	= { 28, 28 },
+	}, {
+		.plaintext = "\x08\x9f\x13\xaa\x41\xd8\x4c\xe3"
+			     "\x7a\x11\x85\x1c\xb3\x27\xbe\x55"
+			     "\xec\x60\xf7\x8e\x02\x99\x30\xc7"
+			     "\x3b\xd2\x69\x00\x74\x0b\xa2\x16"
+			     "\xad\x44\xdb\x4f\xe6\x7d\x14\x88"
+			     "\x1f\xb6\x2a\xc1\x58\xef\x63\xfa"
+			     "\x91\x05\x9c\x33\xca\x3e\xd5\x6c"
+			     "\x03\x77\x0e\xa5\x19\xb0\x47\xde"
+			     "\x52\xe9\x80\x17\x8b\x22\xb9\x2d"
+			     "\xc4\x5b\xf2\x66\xfd\x94\x08\x9f"
+			     "\x36\xcd\x41\xd8\x6f\x06\x7a\x11"
+			     "\xa8\x1c\xb3\x4a\xe1\x55\xec\x83"
+			     "\x1a\x8e\x25\xbc\x30\xc7\x5e\xf5"
+			     "\x69\x00\x97\x0b\xa2\x39\xd0\x44"
+			     "\xdb\x72\x09\x7d\x14\xab\x1f\xb6"
+			     "\x4d\xe4\x58\xef\x86\x1d\x91\x28"
+			     "\xbf\x33\xca\x61\xf8\x6c\x03\x9a"
+			     "\x0e\xa5\x3c\xd3\x47\xde\x75\x0c"
+			     "\x80\x17\xae\x22\xb9\x50\xe7\x5b"
+			     "\xf2\x89\x20\x94\x2b\xc2\x36\xcd"
+			     "\x64\xfb\x6f\x06\x9d\x11\xa8\x3f"
+			     "\xd6\x4a\xe1\x78\x0f\x83\x1a\xb1"
+			     "\x25\xbc\x53\xea\x5e\xf5\x8c\x00"
+			     "\x97\x2e\xc5\x39\xd0\x67\xfe\x72"
+			     "\x09\xa0\x14\xab\x42\xd9\x4d\xe4"
+			     "\x7b\x12\x86\x1d\xb4\x28\xbf\x56"
+			     "\xed\x61\xf8\x8f\x03\x9a\x31\xc8"
+			     "\x3c\xd3\x6a\x01\x75\x0c\xa3\x17"
+			     "\xae\x45\xdc\x50\xe7\x7e\x15\x89"
+			     "\x20\xb7\x2b\xc2\x59\xf0\x64\xfb"
+			     "\x92\x06\x9d\x34\xcb\x3f\xd6\x6d"
+			     "\x04\x78\x0f\xa6\x1a\xb1\x48\xdf"
+			     "\x53\xea\x81\x18\x8c\x23\xba\x2e"
+			     "\xc5\x5c\xf3\x67\xfe\x95\x09\xa0"
+			     "\x37\xce\x42\xd9\x70\x07\x7b\x12"
+			     "\xa9\x1d\xb4\x4b\xe2\x56\xed\x84"
+			     "\x1b\x8f\x26\xbd\x31\xc8\x5f\xf6"
+			     "\x6a\x01\x98\x0c\xa3\x3a\xd1\x45"
+			     "\xdc\x73\x0a\x7e\x15\xac\x20\xb7"
+			     "\x4e\xe5\x59\xf0\x87\x1e\x92\x29"
+			     "\xc0\x34\xcb\x62\xf9\x6d\x04\x9b"
+			     "\x0f\xa6\x3d\xd4\x48\xdf\x76\x0d"
+			     "\x81\x18\xaf\x23\xba\x51\xe8\x5c"
+			     "\xf3\x8a\x21\x95\x2c\xc3\x37\xce"
+			     "\x65\xfc\x70\x07\x9e\x12\xa9\x40"
+			     "\xd7\x4b\xe2\x79\x10\x84\x1b\xb2"
+			     "\x26\xbd\x54\xeb\x5f\xf6\x8d\x01"
+			     "\x98\x2f\xc6\x3a\xd1\x68\xff\x73"
+			     "\x0a\xa1\x15\xac\x43\xda\x4e\xe5"
+			     "\x7c\x13\x87\x1e\xb5\x29\xc0\x57"
+			     "\xee\x62\xf9\x90\x04\x9b\x32\xc9"
+			     "\x3d\xd4\x6b\x02\x76\x0d\xa4\x18"
+			     "\xaf\x46\xdd\x51\xe8\x7f\x16\x8a"
+			     "\x21\xb8\x2c\xc3\x5a\xf1\x65\xfc"
+			     "\x93\x07\x9e\x35\xcc\x40\xd7\x6e"
+			     "\x05\x79\x10\xa7\x1b\xb2\x49\xe0"
+			     "\x54\xeb\x82\x19\x8d\x24\xbb\x2f"
+			     "\xc6\x5d\xf4\x68\xff\x96\x0a\xa1"
+			     "\x38\xcf\x43\xda\x71\x08\x7c\x13"
+			     "\xaa\x1e\xb5\x4c\xe3\x57\xee\x85"
+			     "\x1c\x90\x27\xbe\x32\xc9\x60\xf7"
+			     "\x6b\x02\x99\x0d\xa4\x3b\xd2\x46"
+			     "\xdd\x74\x0b\x7f\x16\xad\x21\xb8"
+			     "\x4f\xe6\x5a\xf1\x88\x1f\x93\x2a"
+			     "\xc1\x35\xcc\x63\xfa\x6e\x05\x9c"
+			     "\x10\xa7\x3e\xd5\x49\xe0\x77\x0e"
+			     "\x82\x19\xb0\x24\xbb\x52\xe9\x5d"
+			     "\xf4\x8b\x22\x96\x2d\xc4\x38\xcf"
+			     "\x66\xfd\x71\x08\x9f\x13\xaa\x41"
+			     "\xd8\x4c\xe3\x7a\x11\x85\x1c\xb3"
+			     "\x27\xbe\x55\xec\x60\xf7\x8e\x02"
+			     "\x99\x30\xc7\x3b\xd2\x69\x00\x74"
+			     "\x0b\xa2\x16\xad\x44\xdb\x4f\xe6"
+			     "\x7d\x14\x88\x1f\xb6\x2a\xc1\x58"
+			     "\xef\x63\xfa\x91\x05\x9c\x33\xca"
+			     "\x3e\xd5\x6c\x03\x77\x0e\xa5\x19"
+			     "\xb0\x47\xde\x52\xe9\x80\x17\x8b"
+			     "\x22\xb9\x2d\xc4\x5b\xf2\x66\xfd"
+			     "\x94\x08\x9f\x36\xcd\x41\xd8\x6f"
+			     "\x06\x7a\x11\xa8\x1c\xb3\x4a\xe1"
+			     "\x55\xec\x83\x1a\x8e\x25\xbc\x30"
+			     "\xc7\x5e\xf5\x69\x00\x97\x0b\xa2"
+			     "\x39\xd0\x44\xdb\x72\x09\x7d\x14"
+			     "\xab\x1f\xb6\x4d\xe4\x58\xef\x86"
+			     "\x1d\x91\x28\xbf\x33\xca\x61\xf8"
+			     "\x6c\x03\x9a\x0e\xa5\x3c\xd3\x47"
+			     "\xde\x75\x0c\x80\x17\xae\x22\xb9"
+			     "\x50\xe7\x5b\xf2\x89\x20\x94\x2b"
+			     "\xc2\x36\xcd\x64\xfb\x6f\x06\x9d"
+			     "\x11\xa8\x3f\xd6\x4a\xe1\x78\x0f"
+			     "\x83\x1a\xb1\x25\xbc\x53\xea\x5e"
+			     "\xf5\x8c\x00\x97\x2e\xc5\x39\xd0"
+			     "\x67\xfe\x72\x09\xa0\x14\xab\x42"
+			     "\xd9\x4d\xe4\x7b\x12\x86\x1d\xb4"
+			     "\x28\xbf\x56\xed\x61\xf8\x8f\x03"
+			     "\x9a\x31\xc8\x3c\xd3\x6a\x01\x75"
+			     "\x0c\xa3\x17\xae\x45\xdc\x50\xe7"
+			     "\x7e\x15\x89\x20\xb7\x2b\xc2\x59"
+			     "\xf0\x64\xfb\x92\x06\x9d\x34\xcb"
+			     "\x3f\xd6\x6d\x04\x78\x0f\xa6\x1a"
+			     "\xb1\x48\xdf\x53\xea\x81\x18\x8c"
+			     "\x23\xba\x2e\xc5\x5c\xf3\x67\xfe"
+			     "\x95\x09\xa0\x37\xce\x42\xd9\x70"
+			     "\x07\x7b\x12\xa9\x1d\xb4\x4b\xe2"
+			     "\x56\xed\x84\x1b\x8f\x26\xbd\x31"
+			     "\xc8\x5f\xf6\x6a\x01\x98\x0c\xa3"
+			     "\x3a\xd1\x45\xdc\x73\x0a\x7e\x15"
+			     "\xac\x20\xb7\x4e\xe5\x59\xf0\x87"
+			     "\x1e\x92\x29\xc0\x34\xcb\x62\xf9"
+			     "\x6d\x04\x9b\x0f\xa6\x3d\xd4\x48"
+			     "\xdf\x76\x0d\x81\x18\xaf\x23\xba"
+			     "\x51\xe8\x5c\xf3\x8a\x21\x95\x2c"
+			     "\xc3\x37\xce\x65\xfc\x70\x07\x9e"
+			     "\x12\xa9\x40\xd7\x4b\xe2\x79\x10"
+			     "\x84\x1b\xb2\x26\xbd\x54\xeb\x5f"
+			     "\xf6\x8d\x01\x98\x2f\xc6\x3a\xd1"
+			     "\x68\xff\x73\x0a\xa1\x15\xac\x43"
+			     "\xda\x4e\xe5\x7c\x13\x87\x1e\xb5"
+			     "\x29\xc0\x57\xee\x62\xf9\x90\x04"
+			     "\x9b\x32\xc9\x3d\xd4\x6b\x02\x76"
+			     "\x0d\xa4\x18\xaf\x46\xdd\x51\xe8"
+			     "\x7f\x16\x8a\x21\xb8\x2c\xc3\x5a"
+			     "\xf1\x65\xfc\x93\x07\x9e\x35\xcc"
+			     "\x40\xd7\x6e\x05\x79\x10\xa7\x1b"
+			     "\xb2\x49\xe0\x54\xeb\x82\x19\x8d"
+			     "\x24\xbb\x2f\xc6\x5d\xf4\x68\xff"
+			     "\x96\x0a\xa1\x38\xcf\x43\xda\x71"
+			     "\x08\x7c\x13\xaa\x1e\xb5\x4c",
+		.psize     = 1023,
+		.digest    = "\x59\xda\x30\xe3\x90\xe4\x3d\xde"
+			     "\xf0\xc6\x42\x17\xd7\xb2\x26\x47"
+			     "\x90\x28\xa6\x84\xe8\x49\x7a\x86"
+			     "\xd6\xb8\x9e\xf8\x07\x59\x21\x03"
+			     "\xad\xd2\xed\x48\xa3\xb9\xa5\xf0"
+			     "\xb3\xae\x02\x2b\xb8\xaf\xc3\x3b"
+			     "\xd6\xb0\x8f\xcb\x76\x8b\xa7\x41"
+			     "\x32\xc2\x8e\x50\x91\x86\x90\xfb",
 	},
 };
 

+ 2 - 3
crypto/twofish_common.c

@@ -24,9 +24,8 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  *
  * This code is a "clean room" implementation, written from the paper
  * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,

+ 2 - 3
crypto/twofish_generic.c

@@ -23,9 +23,8 @@
  * GNU General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  *
  * This code is a "clean room" implementation, written from the paper
  * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,

+ 1 - 2
crypto/xcbc.c

@@ -12,8 +12,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  * Author:
  * 	Kazunori Miyazawa <miyazawa@linux-ipv6.org>

+ 16 - 16
drivers/char/hw_random/Kconfig

@@ -73,26 +73,14 @@ config HW_RANDOM_ATMEL
 
 	  If unsure, say Y.
 
-config HW_RANDOM_BCM63XX
-	tristate "Broadcom BCM63xx Random Number Generator support"
-	depends on BCM63XX || BMIPS_GENERIC
-	default HW_RANDOM
-	---help---
-	  This driver provides kernel-side support for the Random Number
-	  Generator hardware found on the Broadcom BCM63xx SoCs.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called bcm63xx-rng
-
-	  If unusure, say Y.
-
 config HW_RANDOM_BCM2835
-	tristate "Broadcom BCM2835 Random Number Generator support"
-	depends on ARCH_BCM2835 || ARCH_BCM_NSP || ARCH_BCM_5301X
+	tristate "Broadcom BCM2835/BCM63xx Random Number Generator support"
+	depends on ARCH_BCM2835 || ARCH_BCM_NSP || ARCH_BCM_5301X || \
+		   ARCH_BCM_63XX || BCM63XX || BMIPS_GENERIC
 	default HW_RANDOM
 	---help---
 	  This driver provides kernel-side support for the Random Number
-	  Generator hardware found on the Broadcom BCM2835 SoCs.
+	  Generator hardware found on the Broadcom BCM2835 and BCM63xx SoCs.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called bcm2835-rng
@@ -436,6 +424,18 @@ config HW_RANDOM_S390
 
 	  If unsure, say Y.
 
+config HW_RANDOM_EXYNOS
+	tristate "Samsung Exynos True Random Number Generator support"
+	depends on ARCH_EXYNOS || COMPILE_TEST
+	default HW_RANDOM
+	---help---
+	  This driver provides support for the True Random Number
+	  Generator available in Exynos SoCs.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called exynos-trng.
+
+	  If unsure, say Y.
 endif # HW_RANDOM
 
 config UML_RANDOM

+ 1 - 1
drivers/char/hw_random/Makefile

@@ -9,11 +9,11 @@ obj-$(CONFIG_HW_RANDOM_TIMERIOMEM) += timeriomem-rng.o
 obj-$(CONFIG_HW_RANDOM_INTEL) += intel-rng.o
 obj-$(CONFIG_HW_RANDOM_AMD) += amd-rng.o
 obj-$(CONFIG_HW_RANDOM_ATMEL) += atmel-rng.o
-obj-$(CONFIG_HW_RANDOM_BCM63XX)	+= bcm63xx-rng.o
 obj-$(CONFIG_HW_RANDOM_GEODE) += geode-rng.o
 obj-$(CONFIG_HW_RANDOM_N2RNG) += n2-rng.o
 n2-rng-y := n2-drv.o n2-asm.o
 obj-$(CONFIG_HW_RANDOM_VIA) += via-rng.o
+obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-trng.o
 obj-$(CONFIG_HW_RANDOM_IXP4XX) += ixp4xx-rng.o
 obj-$(CONFIG_HW_RANDOM_OMAP) += omap-rng.o
 obj-$(CONFIG_HW_RANDOM_OMAP3_ROM) += omap3-rom-rng.o

+ 117 - 52
drivers/char/hw_random/bcm2835-rng.c

@@ -15,6 +15,7 @@
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
+#include <linux/clk.h>
 
 #define RNG_CTRL	0x0
 #define RNG_STATUS	0x4
@@ -29,116 +30,180 @@
 
 #define RNG_INT_OFF	0x1
 
-static void __init nsp_rng_init(void __iomem *base)
+struct bcm2835_rng_priv {
+	struct hwrng rng;
+	void __iomem *base;
+	bool mask_interrupts;
+	struct clk *clk;
+};
+
+static inline struct bcm2835_rng_priv *to_rng_priv(struct hwrng *rng)
 {
-	u32 val;
+	return container_of(rng, struct bcm2835_rng_priv, rng);
+}
+
+static inline u32 rng_readl(struct bcm2835_rng_priv *priv, u32 offset)
+{
+	/* MIPS chips strapped for BE will automagically configure the
+	 * peripheral registers for CPU-native byte order.
+	 */
+	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+		return __raw_readl(priv->base + offset);
+	else
+		return readl(priv->base + offset);
+}
 
-	/* mask the interrupt */
-	val = readl(base + RNG_INT_MASK);
-	val |= RNG_INT_OFF;
-	writel(val, base + RNG_INT_MASK);
+static inline void rng_writel(struct bcm2835_rng_priv *priv, u32 val,
+			      u32 offset)
+{
+	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+		__raw_writel(val, priv->base + offset);
+	else
+		writel(val, priv->base + offset);
 }
 
 static int bcm2835_rng_read(struct hwrng *rng, void *buf, size_t max,
 			       bool wait)
 {
-	void __iomem *rng_base = (void __iomem *)rng->priv;
+	struct bcm2835_rng_priv *priv = to_rng_priv(rng);
 	u32 max_words = max / sizeof(u32);
 	u32 num_words, count;
 
-	while ((__raw_readl(rng_base + RNG_STATUS) >> 24) == 0) {
+	while ((rng_readl(priv, RNG_STATUS) >> 24) == 0) {
 		if (!wait)
 			return 0;
 		cpu_relax();
 	}
 
-	num_words = readl(rng_base + RNG_STATUS) >> 24;
+	num_words = rng_readl(priv, RNG_STATUS) >> 24;
 	if (num_words > max_words)
 		num_words = max_words;
 
 	for (count = 0; count < num_words; count++)
-		((u32 *)buf)[count] = readl(rng_base + RNG_DATA);
+		((u32 *)buf)[count] = rng_readl(priv, RNG_DATA);
 
 	return num_words * sizeof(u32);
 }
 
-static struct hwrng bcm2835_rng_ops = {
-	.name	= "bcm2835",
-	.read	= bcm2835_rng_read,
+static int bcm2835_rng_init(struct hwrng *rng)
+{
+	struct bcm2835_rng_priv *priv = to_rng_priv(rng);
+	int ret = 0;
+	u32 val;
+
+	if (!IS_ERR(priv->clk)) {
+		ret = clk_prepare_enable(priv->clk);
+		if (ret)
+			return ret;
+	}
+
+	if (priv->mask_interrupts) {
+		/* mask the interrupt */
+		val = rng_readl(priv, RNG_INT_MASK);
+		val |= RNG_INT_OFF;
+		rng_writel(priv, val, RNG_INT_MASK);
+	}
+
+	/* set warm-up count & enable */
+	rng_writel(priv, RNG_WARMUP_COUNT, RNG_STATUS);
+	rng_writel(priv, RNG_RBGEN, RNG_CTRL);
+
+	return ret;
+}
+
+static void bcm2835_rng_cleanup(struct hwrng *rng)
+{
+	struct bcm2835_rng_priv *priv = to_rng_priv(rng);
+
+	/* disable rng hardware */
+	rng_writel(priv, 0, RNG_CTRL);
+
+	if (!IS_ERR(priv->clk))
+		clk_disable_unprepare(priv->clk);
+}
+
+struct bcm2835_rng_of_data {
+	bool mask_interrupts;
+};
+
+static const struct bcm2835_rng_of_data nsp_rng_of_data = {
+	.mask_interrupts = true,
 };
 
 static const struct of_device_id bcm2835_rng_of_match[] = {
 	{ .compatible = "brcm,bcm2835-rng"},
-	{ .compatible = "brcm,bcm-nsp-rng", .data = nsp_rng_init},
-	{ .compatible = "brcm,bcm5301x-rng", .data = nsp_rng_init},
+	{ .compatible = "brcm,bcm-nsp-rng", .data = &nsp_rng_of_data },
+	{ .compatible = "brcm,bcm5301x-rng", .data = &nsp_rng_of_data },
+	{ .compatible = "brcm,bcm6368-rng"},
 	{},
 };
 
 static int bcm2835_rng_probe(struct platform_device *pdev)
 {
+	const struct bcm2835_rng_of_data *of_data;
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
-	void (*rng_setup)(void __iomem *base);
 	const struct of_device_id *rng_id;
-	void __iomem *rng_base;
+	struct bcm2835_rng_priv *priv;
+	struct resource *r;
 	int err;
 
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, priv);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
 	/* map peripheral */
-	rng_base = of_iomap(np, 0);
-	if (!rng_base) {
-		dev_err(dev, "failed to remap rng regs");
-		return -ENODEV;
-	}
-	bcm2835_rng_ops.priv = (unsigned long)rng_base;
+	priv->base = devm_ioremap_resource(dev, r);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	/* Clock is optional on most platforms */
+	priv->clk = devm_clk_get(dev, NULL);
+
+	priv->rng.name = pdev->name;
+	priv->rng.init = bcm2835_rng_init;
+	priv->rng.read = bcm2835_rng_read;
+	priv->rng.cleanup = bcm2835_rng_cleanup;
 
 	rng_id = of_match_node(bcm2835_rng_of_match, np);
-	if (!rng_id) {
-		iounmap(rng_base);
+	if (!rng_id)
 		return -EINVAL;
-	}
-	/* Check for rng init function, execute it */
-	rng_setup = rng_id->data;
-	if (rng_setup)
-		rng_setup(rng_base);
 
-	/* set warm-up count & enable */
-	__raw_writel(RNG_WARMUP_COUNT, rng_base + RNG_STATUS);
-	__raw_writel(RNG_RBGEN, rng_base + RNG_CTRL);
+	/* Check for rng init function, execute it */
+	of_data = rng_id->data;
+	if (of_data)
+		priv->mask_interrupts = of_data->mask_interrupts;
 
 	/* register driver */
-	err = hwrng_register(&bcm2835_rng_ops);
-	if (err) {
+	err = devm_hwrng_register(dev, &priv->rng);
+	if (err)
 		dev_err(dev, "hwrng registration failed\n");
-		iounmap(rng_base);
-	} else
+	else
 		dev_info(dev, "hwrng registered\n");
 
 	return err;
 }
 
-static int bcm2835_rng_remove(struct platform_device *pdev)
-{
-	void __iomem *rng_base = (void __iomem *)bcm2835_rng_ops.priv;
-
-	/* disable rng hardware */
-	__raw_writel(0, rng_base + RNG_CTRL);
-
-	/* unregister driver */
-	hwrng_unregister(&bcm2835_rng_ops);
-	iounmap(rng_base);
-
-	return 0;
-}
-
 MODULE_DEVICE_TABLE(of, bcm2835_rng_of_match);
 
+static struct platform_device_id bcm2835_rng_devtype[] = {
+	{ .name = "bcm2835-rng" },
+	{ .name = "bcm63xx-rng" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, bcm2835_rng_devtype);
+
 static struct platform_driver bcm2835_rng_driver = {
 	.driver = {
 		.name = "bcm2835-rng",
 		.of_match_table = bcm2835_rng_of_match,
 	},
 	.probe		= bcm2835_rng_probe,
-	.remove		= bcm2835_rng_remove,
+	.id_table	= bcm2835_rng_devtype,
 };
 module_platform_driver(bcm2835_rng_driver);
 

+ 0 - 154
drivers/char/hw_random/bcm63xx-rng.c

@@ -1,154 +0,0 @@
-/*
- * Broadcom BCM63xx Random Number Generator support
- *
- * Copyright (C) 2011, Florian Fainelli <florian@openwrt.org>
- * Copyright (C) 2009, Broadcom Corporation
- *
- */
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-#include <linux/err.h>
-#include <linux/clk.h>
-#include <linux/platform_device.h>
-#include <linux/hw_random.h>
-#include <linux/of.h>
-
-#define RNG_CTRL			0x00
-#define RNG_EN				(1 << 0)
-
-#define RNG_STAT			0x04
-#define RNG_AVAIL_MASK			(0xff000000)
-
-#define RNG_DATA			0x08
-#define RNG_THRES			0x0c
-#define RNG_MASK			0x10
-
-struct bcm63xx_rng_priv {
-	struct hwrng rng;
-	struct clk *clk;
-	void __iomem *regs;
-};
-
-#define to_rng_priv(rng)	container_of(rng, struct bcm63xx_rng_priv, rng)
-
-static int bcm63xx_rng_init(struct hwrng *rng)
-{
-	struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
-	u32 val;
-	int error;
-
-	error = clk_prepare_enable(priv->clk);
-	if (error)
-		return error;
-
-	val = __raw_readl(priv->regs + RNG_CTRL);
-	val |= RNG_EN;
-	__raw_writel(val, priv->regs + RNG_CTRL);
-
-	return 0;
-}
-
-static void bcm63xx_rng_cleanup(struct hwrng *rng)
-{
-	struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
-	u32 val;
-
-	val = __raw_readl(priv->regs + RNG_CTRL);
-	val &= ~RNG_EN;
-	__raw_writel(val, priv->regs + RNG_CTRL);
-
-	clk_disable_unprepare(priv->clk);
-}
-
-static int bcm63xx_rng_data_present(struct hwrng *rng, int wait)
-{
-	struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
-
-	return __raw_readl(priv->regs + RNG_STAT) & RNG_AVAIL_MASK;
-}
-
-static int bcm63xx_rng_data_read(struct hwrng *rng, u32 *data)
-{
-	struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
-
-	*data = __raw_readl(priv->regs + RNG_DATA);
-
-	return 4;
-}
-
-static int bcm63xx_rng_probe(struct platform_device *pdev)
-{
-	struct resource *r;
-	int ret;
-	struct bcm63xx_rng_priv *priv;
-
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!r) {
-		dev_err(&pdev->dev, "no iomem resource\n");
-		return -ENXIO;
-	}
-
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	priv->rng.name = pdev->name;
-	priv->rng.init = bcm63xx_rng_init;
-	priv->rng.cleanup = bcm63xx_rng_cleanup;
-	priv->rng.data_present = bcm63xx_rng_data_present;
-	priv->rng.data_read = bcm63xx_rng_data_read;
-
-	priv->clk = devm_clk_get(&pdev->dev, "ipsec");
-	if (IS_ERR(priv->clk)) {
-		ret = PTR_ERR(priv->clk);
-		dev_err(&pdev->dev, "no clock for device: %d\n", ret);
-		return ret;
-	}
-
-	if (!devm_request_mem_region(&pdev->dev, r->start,
-					resource_size(r), pdev->name)) {
-		dev_err(&pdev->dev, "request mem failed");
-		return -EBUSY;
-	}
-
-	priv->regs = devm_ioremap_nocache(&pdev->dev, r->start,
-					resource_size(r));
-	if (!priv->regs) {
-		dev_err(&pdev->dev, "ioremap failed");
-		return -ENOMEM;
-	}
-
-	ret = devm_hwrng_register(&pdev->dev, &priv->rng);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to register rng device: %d\n",
-			ret);
-		return ret;
-	}
-
-	dev_info(&pdev->dev, "registered RNG driver\n");
-
-	return 0;
-}
-
-#ifdef CONFIG_OF
-static const struct of_device_id bcm63xx_rng_of_match[] = {
-	{ .compatible = "brcm,bcm6368-rng", },
-	{},
-};
-MODULE_DEVICE_TABLE(of, bcm63xx_rng_of_match);
-#endif
-
-static struct platform_driver bcm63xx_rng_driver = {
-	.probe		= bcm63xx_rng_probe,
-	.driver		= {
-		.name	= "bcm63xx-rng",
-		.of_match_table = of_match_ptr(bcm63xx_rng_of_match),
-	},
-};
-
-module_platform_driver(bcm63xx_rng_driver);
-
-MODULE_AUTHOR("Florian Fainelli <florian@openwrt.org>");
-MODULE_DESCRIPTION("Broadcom BCM63xx RNG driver");
-MODULE_LICENSE("GPL");

+ 4 - 0
drivers/char/hw_random/core.c

@@ -306,6 +306,10 @@ static int enable_best_rng(void)
 		ret = ((new_rng == current_rng) ? 0 : set_current_rng(new_rng));
 		if (!ret)
 			cur_rng_set_by_user = 0;
+	} else {
+		drop_current_rng();
+		cur_rng_set_by_user = 0;
+		ret = 0;
 	}
 
 	return ret;

+ 235 - 0
drivers/char/hw_random/exynos-trng.c

@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RNG driver for Exynos TRNGs
+ *
+ * Author: Łukasz Stelmach <l.stelmach@samsung.com>
+ *
+ * Copyright 2017 (c) Samsung Electronics Software, Inc.
+ *
+ * Based on the Exynos PRNG driver drivers/crypto/exynos-rng by
+ * Krzysztof Kozłowski <krzk@kernel.org>
+ */
+
+#include <linux/clk.h>
+#include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#define EXYNOS_TRNG_CLKDIV         (0x0)
+
+#define EXYNOS_TRNG_CTRL           (0x20)
+#define EXYNOS_TRNG_CTRL_RNGEN     BIT(31)
+
+#define EXYNOS_TRNG_POST_CTRL      (0x30)
+#define EXYNOS_TRNG_ONLINE_CTRL    (0x40)
+#define EXYNOS_TRNG_ONLINE_STAT    (0x44)
+#define EXYNOS_TRNG_ONLINE_MAXCHI2 (0x48)
+#define EXYNOS_TRNG_FIFO_CTRL      (0x50)
+#define EXYNOS_TRNG_FIFO_0         (0x80)
+#define EXYNOS_TRNG_FIFO_1         (0x84)
+#define EXYNOS_TRNG_FIFO_2         (0x88)
+#define EXYNOS_TRNG_FIFO_3         (0x8c)
+#define EXYNOS_TRNG_FIFO_4         (0x90)
+#define EXYNOS_TRNG_FIFO_5         (0x94)
+#define EXYNOS_TRNG_FIFO_6         (0x98)
+#define EXYNOS_TRNG_FIFO_7         (0x9c)
+#define EXYNOS_TRNG_FIFO_LEN       (8)
+#define EXYNOS_TRNG_CLOCK_RATE     (500000)
+
+
+struct exynos_trng_dev {
+	struct device    *dev;
+	void __iomem     *mem;
+	struct clk       *clk;
+	struct hwrng rng;
+};
+
+static int exynos_trng_do_read(struct hwrng *rng, void *data, size_t max,
+			       bool wait)
+{
+	struct exynos_trng_dev *trng;
+	int val;
+
+	max = min_t(size_t, max, (EXYNOS_TRNG_FIFO_LEN * 4));
+
+	trng = (struct exynos_trng_dev *)rng->priv;
+
+	writel_relaxed(max * 8, trng->mem + EXYNOS_TRNG_FIFO_CTRL);
+	val = readl_poll_timeout(trng->mem + EXYNOS_TRNG_FIFO_CTRL, val,
+				 val == 0, 200, 1000000);
+	if (val < 0)
+		return val;
+
+	memcpy_fromio(data, trng->mem + EXYNOS_TRNG_FIFO_0, max);
+
+	return max;
+}
+
+static int exynos_trng_init(struct hwrng *rng)
+{
+	struct exynos_trng_dev *trng = (struct exynos_trng_dev *)rng->priv;
+	unsigned long sss_rate;
+	u32 val;
+
+	sss_rate = clk_get_rate(trng->clk);
+
+	/*
+	 * For most TRNG circuits the clock frequency of under 500 kHz
+	 * is safe.
+	 */
+	val = sss_rate / (EXYNOS_TRNG_CLOCK_RATE * 2);
+	if (val > 0x7fff) {
+		dev_err(trng->dev, "clock divider too large: %d", val);
+		return -ERANGE;
+	}
+	val = val << 1;
+	writel_relaxed(val, trng->mem + EXYNOS_TRNG_CLKDIV);
+
+	/* Enable the generator. */
+	val = EXYNOS_TRNG_CTRL_RNGEN;
+	writel_relaxed(val, trng->mem + EXYNOS_TRNG_CTRL);
+
+	/*
+	 * Disable post-processing. /dev/hwrng is supposed to deliver
+	 * unprocessed data.
+	 */
+	writel_relaxed(0, trng->mem + EXYNOS_TRNG_POST_CTRL);
+
+	return 0;
+}
+
+static int exynos_trng_probe(struct platform_device *pdev)
+{
+	struct exynos_trng_dev *trng;
+	struct resource *res;
+	int ret = -ENOMEM;
+
+	trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL);
+	if (!trng)
+		return ret;
+
+	trng->rng.name = devm_kstrdup(&pdev->dev, dev_name(&pdev->dev),
+				      GFP_KERNEL);
+	if (!trng->rng.name)
+		return ret;
+
+	trng->rng.init = exynos_trng_init;
+	trng->rng.read = exynos_trng_do_read;
+	trng->rng.priv = (unsigned long) trng;
+
+	platform_set_drvdata(pdev, trng);
+	trng->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	trng->mem = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(trng->mem))
+		return PTR_ERR(trng->mem);
+
+	pm_runtime_enable(&pdev->dev);
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Could not get runtime PM.\n");
+		goto err_pm_get;
+	}
+
+	trng->clk = devm_clk_get(&pdev->dev, "secss");
+	if (IS_ERR(trng->clk)) {
+		ret = PTR_ERR(trng->clk);
+		dev_err(&pdev->dev, "Could not get clock.\n");
+		goto err_clock;
+	}
+
+	ret = clk_prepare_enable(trng->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Could not enable the clk.\n");
+		goto err_clock;
+	}
+
+	ret = hwrng_register(&trng->rng);
+	if (ret) {
+		dev_err(&pdev->dev, "Could not register hwrng device.\n");
+		goto err_register;
+	}
+
+	dev_info(&pdev->dev, "Exynos True Random Number Generator.\n");
+
+	return 0;
+
+err_register:
+	clk_disable_unprepare(trng->clk);
+
+err_clock:
+	pm_runtime_put_sync(&pdev->dev);
+
+err_pm_get:
+	pm_runtime_disable(&pdev->dev);
+
+	return ret;
+}
+
+static int exynos_trng_remove(struct platform_device *pdev)
+{
+	struct exynos_trng_dev *trng =  platform_get_drvdata(pdev);
+
+	hwrng_unregister(&trng->rng);
+	clk_disable_unprepare(trng->clk);
+
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static int __maybe_unused exynos_trng_suspend(struct device *dev)
+{
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int __maybe_unused exynos_trng_resume(struct device *dev)
+{
+	int ret;
+
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "Could not get runtime PM.\n");
+		pm_runtime_put_noidle(dev);
+		return ret;
+	}
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(exynos_trng_pm_ops, exynos_trng_suspend,
+			 exynos_trng_resume);
+
+static const struct of_device_id exynos_trng_dt_match[] = {
+	{
+		.compatible = "samsung,exynos5250-trng",
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, exynos_trng_dt_match);
+
+static struct platform_driver exynos_trng_driver = {
+	.driver = {
+		.name = "exynos-trng",
+		.pm = &exynos_trng_pm_ops,
+		.of_match_table = exynos_trng_dt_match,
+	},
+	.probe = exynos_trng_probe,
+	.remove = exynos_trng_remove,
+};
+
+module_platform_driver(exynos_trng_driver);
+MODULE_AUTHOR("Łukasz Stelmach");
+MODULE_DESCRIPTION("H/W TRNG driver for Exynos chips");
+MODULE_LICENSE("GPL v2");

+ 3 - 10
drivers/char/hw_random/imx-rngc.c

@@ -282,8 +282,7 @@ static int __exit imx_rngc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int imx_rngc_suspend(struct device *dev)
+static int __maybe_unused imx_rngc_suspend(struct device *dev)
 {
 	struct imx_rngc *rngc = dev_get_drvdata(dev);
 
@@ -292,7 +291,7 @@ static int imx_rngc_suspend(struct device *dev)
 	return 0;
 }
 
-static int imx_rngc_resume(struct device *dev)
+static int __maybe_unused imx_rngc_resume(struct device *dev)
 {
 	struct imx_rngc *rngc = dev_get_drvdata(dev);
 
@@ -301,11 +300,7 @@ static int imx_rngc_resume(struct device *dev)
 	return 0;
 }
 
-static const struct dev_pm_ops imx_rngc_pm_ops = {
-	.suspend	= imx_rngc_suspend,
-	.resume		= imx_rngc_resume,
-};
-#endif
+SIMPLE_DEV_PM_OPS(imx_rngc_pm_ops, imx_rngc_suspend, imx_rngc_resume);
 
 static const struct of_device_id imx_rngc_dt_ids[] = {
 	{ .compatible = "fsl,imx25-rngb", .data = NULL, },
@@ -316,9 +311,7 @@ MODULE_DEVICE_TABLE(of, imx_rngc_dt_ids);
 static struct platform_driver imx_rngc_driver = {
 	.driver = {
 		.name = "imx_rngc",
-#ifdef CONFIG_PM
 		.pm = &imx_rngc_pm_ops,
-#endif
 		.of_match_table = imx_rngc_dt_ids,
 	},
 	.remove = __exit_p(imx_rngc_remove),

+ 1 - 0
drivers/char/hw_random/mtk-rng.c

@@ -135,6 +135,7 @@ static int mtk_rng_probe(struct platform_device *pdev)
 #endif
 	priv->rng.read = mtk_rng_read;
 	priv->rng.priv = (unsigned long)&pdev->dev;
+	priv->rng.quality = 900;
 
 	priv->clk = devm_clk_get(&pdev->dev, "rng");
 	if (IS_ERR(priv->clk)) {

+ 12 - 12
drivers/char/random.c

@@ -431,9 +431,9 @@ static int crng_init = 0;
 static int crng_init_cnt = 0;
 #define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE)
 static void _extract_crng(struct crng_state *crng,
-			  __u8 out[CHACHA20_BLOCK_SIZE]);
+			  __u32 out[CHACHA20_BLOCK_WORDS]);
 static void _crng_backtrack_protect(struct crng_state *crng,
-				    __u8 tmp[CHACHA20_BLOCK_SIZE], int used);
+				    __u32 tmp[CHACHA20_BLOCK_WORDS], int used);
 static void process_random_ready_list(void);
 static void _get_random_bytes(void *buf, int nbytes);
 
@@ -817,7 +817,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 	unsigned long	flags;
 	int		i, num;
 	union {
-		__u8	block[CHACHA20_BLOCK_SIZE];
+		__u32	block[CHACHA20_BLOCK_WORDS];
 		__u32	key[8];
 	} buf;
 
@@ -851,7 +851,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
 }
 
 static void _extract_crng(struct crng_state *crng,
-			  __u8 out[CHACHA20_BLOCK_SIZE])
+			  __u32 out[CHACHA20_BLOCK_WORDS])
 {
 	unsigned long v, flags;
 
@@ -867,7 +867,7 @@ static void _extract_crng(struct crng_state *crng,
 	spin_unlock_irqrestore(&crng->lock, flags);
 }
 
-static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
+static void extract_crng(__u32 out[CHACHA20_BLOCK_WORDS])
 {
 	struct crng_state *crng = NULL;
 
@@ -885,7 +885,7 @@ static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
  * enough) to mutate the CRNG key to provide backtracking protection.
  */
 static void _crng_backtrack_protect(struct crng_state *crng,
-				    __u8 tmp[CHACHA20_BLOCK_SIZE], int used)
+				    __u32 tmp[CHACHA20_BLOCK_WORDS], int used)
 {
 	unsigned long	flags;
 	__u32		*s, *d;
@@ -897,14 +897,14 @@ static void _crng_backtrack_protect(struct crng_state *crng,
 		used = 0;
 	}
 	spin_lock_irqsave(&crng->lock, flags);
-	s = (__u32 *) &tmp[used];
+	s = &tmp[used / sizeof(__u32)];
 	d = &crng->state[4];
 	for (i=0; i < 8; i++)
 		*d++ ^= *s++;
 	spin_unlock_irqrestore(&crng->lock, flags);
 }
 
-static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used)
+static void crng_backtrack_protect(__u32 tmp[CHACHA20_BLOCK_WORDS], int used)
 {
 	struct crng_state *crng = NULL;
 
@@ -920,7 +920,7 @@ static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used)
 static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
 {
 	ssize_t ret = 0, i = CHACHA20_BLOCK_SIZE;
-	__u8 tmp[CHACHA20_BLOCK_SIZE];
+	__u32 tmp[CHACHA20_BLOCK_WORDS];
 	int large_request = (nbytes > 256);
 
 	while (nbytes) {
@@ -1507,7 +1507,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller,
  */
 static void _get_random_bytes(void *buf, int nbytes)
 {
-	__u8 tmp[CHACHA20_BLOCK_SIZE];
+	__u32 tmp[CHACHA20_BLOCK_WORDS];
 
 	trace_get_random_bytes(nbytes, _RET_IP_);
 
@@ -2114,7 +2114,7 @@ u64 get_random_u64(void)
 	if (use_lock)
 		read_lock_irqsave(&batched_entropy_reset_lock, flags);
 	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
-		extract_crng((u8 *)batch->entropy_u64);
+		extract_crng((__u32 *)batch->entropy_u64);
 		batch->position = 0;
 	}
 	ret = batch->entropy_u64[batch->position++];
@@ -2144,7 +2144,7 @@ u32 get_random_u32(void)
 	if (use_lock)
 		read_lock_irqsave(&batched_entropy_reset_lock, flags);
 	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
-		extract_crng((u8 *)batch->entropy_u32);
+		extract_crng(batch->entropy_u32);
 		batch->position = 0;
 	}
 	ret = batch->entropy_u32[batch->position++];

+ 0 - 1
drivers/crypto/Kconfig

@@ -723,7 +723,6 @@ config CRYPTO_DEV_ARTPEC6
 	select CRYPTO_HASH
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
-	select CRYPTO_SHA384
 	select CRYPTO_SHA512
 	help
 	  Enables the driver for the on-chip crypto accelerator

+ 1 - 5
drivers/crypto/amcc/crypto4xx_alg.c

@@ -256,10 +256,6 @@ static inline bool crypto4xx_aead_need_fallback(struct aead_request *req,
 	if (is_ccm && !(req->iv[0] == 1 || req->iv[0] == 3))
 		return true;
 
-	/* CCM - fix CBC MAC mismatch in special case */
-	if (is_ccm && decrypt && !req->assoclen)
-		return true;
-
 	return false;
 }
 
@@ -330,7 +326,7 @@ int crypto4xx_setkey_aes_ccm(struct crypto_aead *cipher, const u8 *key,
 	sa = (struct dynamic_sa_ctl *) ctx->sa_in;
 	sa->sa_contents.w = SA_AES_CCM_CONTENTS | (keylen << 2);
 
-	set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, SA_NOT_SAVE_IV,
+	set_dynamic_sa_command_0(sa, SA_SAVE_HASH, SA_NOT_SAVE_IV,
 				 SA_LOAD_HASH_FROM_SA, SA_LOAD_IV_FROM_STATE,
 				 SA_NO_HEADER_PROC, SA_HASH_ALG_CBC_MAC,
 				 SA_CIPHER_ALG_AES,

+ 82 - 49
drivers/crypto/amcc/crypto4xx_core.c

@@ -128,7 +128,14 @@ static void crypto4xx_hw_init(struct crypto4xx_device *dev)
 	writel(PPC4XX_INT_DESCR_CNT, dev->ce_base + CRYPTO4XX_INT_DESCR_CNT);
 	writel(PPC4XX_INT_DESCR_CNT, dev->ce_base + CRYPTO4XX_INT_DESCR_CNT);
 	writel(PPC4XX_INT_CFG, dev->ce_base + CRYPTO4XX_INT_CFG);
-	writel(PPC4XX_PD_DONE_INT, dev->ce_base + CRYPTO4XX_INT_EN);
+	if (dev->is_revb) {
+		writel(PPC4XX_INT_TIMEOUT_CNT_REVB << 10,
+		       dev->ce_base + CRYPTO4XX_INT_TIMEOUT_CNT);
+		writel(PPC4XX_PD_DONE_INT | PPC4XX_TMO_ERR_INT,
+		       dev->ce_base + CRYPTO4XX_INT_EN);
+	} else {
+		writel(PPC4XX_PD_DONE_INT, dev->ce_base + CRYPTO4XX_INT_EN);
+	}
 }
 
 int crypto4xx_alloc_sa(struct crypto4xx_ctx *ctx, u32 size)
@@ -275,14 +282,12 @@ static u32 crypto4xx_put_pd_to_pdr(struct crypto4xx_device *dev, u32 idx)
  */
 static u32 crypto4xx_build_gdr(struct crypto4xx_device *dev)
 {
-	dev->gdr = dma_alloc_coherent(dev->core_dev->device,
-				      sizeof(struct ce_gd) * PPC4XX_NUM_GD,
-				      &dev->gdr_pa, GFP_ATOMIC);
+	dev->gdr = dma_zalloc_coherent(dev->core_dev->device,
+				       sizeof(struct ce_gd) * PPC4XX_NUM_GD,
+				       &dev->gdr_pa, GFP_ATOMIC);
 	if (!dev->gdr)
 		return -ENOMEM;
 
-	memset(dev->gdr, 0, sizeof(struct ce_gd) * PPC4XX_NUM_GD);
-
 	return 0;
 }
 
@@ -570,15 +575,14 @@ static void crypto4xx_aead_done(struct crypto4xx_device *dev,
 				struct pd_uinfo *pd_uinfo,
 				struct ce_pd *pd)
 {
-	struct aead_request *aead_req;
-	struct crypto4xx_ctx *ctx;
+	struct aead_request *aead_req = container_of(pd_uinfo->async_req,
+		struct aead_request, base);
 	struct scatterlist *dst = pd_uinfo->dest_va;
+	size_t cp_len = crypto_aead_authsize(
+		crypto_aead_reqtfm(aead_req));
+	u32 icv[cp_len];
 	int err = 0;
 
-	aead_req = container_of(pd_uinfo->async_req, struct aead_request,
-				base);
-	ctx  = crypto_tfm_ctx(aead_req->base.tfm);
-
 	if (pd_uinfo->using_sd) {
 		crypto4xx_copy_pkt_to_dst(dev, pd, pd_uinfo,
 					  pd->pd_ctl_len.bf.pkt_len,
@@ -590,38 +594,39 @@ static void crypto4xx_aead_done(struct crypto4xx_device *dev,
 
 	if (pd_uinfo->sa_va->sa_command_0.bf.dir == DIR_OUTBOUND) {
 		/* append icv at the end */
-		size_t cp_len = crypto_aead_authsize(
-			crypto_aead_reqtfm(aead_req));
-		u32 icv[cp_len];
-
 		crypto4xx_memcpy_from_le32(icv, pd_uinfo->sr_va->save_digest,
 					   cp_len);
 
 		scatterwalk_map_and_copy(icv, dst, aead_req->cryptlen,
 					 cp_len, 1);
+	} else {
+		/* check icv at the end */
+		scatterwalk_map_and_copy(icv, aead_req->src,
+			aead_req->assoclen + aead_req->cryptlen -
+			cp_len, cp_len, 0);
+
+		crypto4xx_memcpy_from_le32(icv, icv, cp_len);
+
+		if (crypto_memneq(icv, pd_uinfo->sr_va->save_digest, cp_len))
+			err = -EBADMSG;
 	}
 
 	crypto4xx_ret_sg_desc(dev, pd_uinfo);
 
 	if (pd->pd_ctl.bf.status & 0xff) {
-		if (pd->pd_ctl.bf.status & 0x1) {
-			/* authentication error */
-			err = -EBADMSG;
-		} else {
-			if (!__ratelimit(&dev->aead_ratelimit)) {
-				if (pd->pd_ctl.bf.status & 2)
-					pr_err("pad fail error\n");
-				if (pd->pd_ctl.bf.status & 4)
-					pr_err("seqnum fail\n");
-				if (pd->pd_ctl.bf.status & 8)
-					pr_err("error _notify\n");
-				pr_err("aead return err status = 0x%02x\n",
-					pd->pd_ctl.bf.status & 0xff);
-				pr_err("pd pad_ctl = 0x%08x\n",
-					pd->pd_ctl.bf.pd_pad_ctl);
-			}
-			err = -EINVAL;
+		if (!__ratelimit(&dev->aead_ratelimit)) {
+			if (pd->pd_ctl.bf.status & 2)
+				pr_err("pad fail error\n");
+			if (pd->pd_ctl.bf.status & 4)
+				pr_err("seqnum fail\n");
+			if (pd->pd_ctl.bf.status & 8)
+				pr_err("error _notify\n");
+			pr_err("aead return err status = 0x%02x\n",
+				pd->pd_ctl.bf.status & 0xff);
+			pr_err("pd pad_ctl = 0x%08x\n",
+				pd->pd_ctl.bf.pd_pad_ctl);
 		}
+		err = -EINVAL;
 	}
 
 	if (pd_uinfo->state & PD_ENTRY_BUSY)
@@ -1070,21 +1075,29 @@ static void crypto4xx_bh_tasklet_cb(unsigned long data)
 /**
  * Top Half of isr.
  */
-static irqreturn_t crypto4xx_ce_interrupt_handler(int irq, void *data)
+static inline irqreturn_t crypto4xx_interrupt_handler(int irq, void *data,
+						      u32 clr_val)
 {
 	struct device *dev = (struct device *)data;
 	struct crypto4xx_core_device *core_dev = dev_get_drvdata(dev);
 
-	if (!core_dev->dev->ce_base)
-		return 0;
-
-	writel(PPC4XX_INTERRUPT_CLR,
-	       core_dev->dev->ce_base + CRYPTO4XX_INT_CLR);
+	writel(clr_val, core_dev->dev->ce_base + CRYPTO4XX_INT_CLR);
 	tasklet_schedule(&core_dev->tasklet);
 
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t crypto4xx_ce_interrupt_handler(int irq, void *data)
+{
+	return crypto4xx_interrupt_handler(irq, data, PPC4XX_INTERRUPT_CLR);
+}
+
+static irqreturn_t crypto4xx_ce_interrupt_handler_revb(int irq, void *data)
+{
+	return crypto4xx_interrupt_handler(irq, data, PPC4XX_INTERRUPT_CLR |
+		PPC4XX_TMO_ERR_INT);
+}
+
 /**
  * Supported Crypto Algorithms
  */
@@ -1266,6 +1279,8 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 	struct resource res;
 	struct device *dev = &ofdev->dev;
 	struct crypto4xx_core_device *core_dev;
+	u32 pvr;
+	bool is_revb = true;
 
 	rc = of_address_to_resource(ofdev->dev.of_node, 0, &res);
 	if (rc)
@@ -1282,6 +1297,7 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 		       mfdcri(SDR0, PPC405EX_SDR0_SRST) | PPC405EX_CE_RESET);
 		mtdcri(SDR0, PPC405EX_SDR0_SRST,
 		       mfdcri(SDR0, PPC405EX_SDR0_SRST) & ~PPC405EX_CE_RESET);
+		is_revb = false;
 	} else if (of_find_compatible_node(NULL, NULL,
 			"amcc,ppc460sx-crypto")) {
 		mtdcri(SDR0, PPC460SX_SDR0_SRST,
@@ -1304,7 +1320,22 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 	if (!core_dev->dev)
 		goto err_alloc_dev;
 
+	/*
+	 * Older version of 460EX/GT have a hardware bug.
+	 * Hence they do not support H/W based security intr coalescing
+	 */
+	pvr = mfspr(SPRN_PVR);
+	if (is_revb && ((pvr >> 4) == 0x130218A)) {
+		u32 min = PVR_MIN(pvr);
+
+		if (min < 4) {
+			dev_info(dev, "RevA detected - disable interrupt coalescing\n");
+			is_revb = false;
+		}
+	}
+
 	core_dev->dev->core_dev = core_dev;
+	core_dev->dev->is_revb = is_revb;
 	core_dev->device = dev;
 	spin_lock_init(&core_dev->lock);
 	INIT_LIST_HEAD(&core_dev->dev->alg_list);
@@ -1325,13 +1356,6 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 	tasklet_init(&core_dev->tasklet, crypto4xx_bh_tasklet_cb,
 		     (unsigned long) dev);
 
-	/* Register for Crypto isr, Crypto Engine IRQ */
-	core_dev->irq = irq_of_parse_and_map(ofdev->dev.of_node, 0);
-	rc = request_irq(core_dev->irq, crypto4xx_ce_interrupt_handler, 0,
-			 core_dev->dev->name, dev);
-	if (rc)
-		goto err_request_irq;
-
 	core_dev->dev->ce_base = of_iomap(ofdev->dev.of_node, 0);
 	if (!core_dev->dev->ce_base) {
 		dev_err(dev, "failed to of_iomap\n");
@@ -1339,6 +1363,15 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 		goto err_iomap;
 	}
 
+	/* Register for Crypto isr, Crypto Engine IRQ */
+	core_dev->irq = irq_of_parse_and_map(ofdev->dev.of_node, 0);
+	rc = request_irq(core_dev->irq, is_revb ?
+			 crypto4xx_ce_interrupt_handler_revb :
+			 crypto4xx_ce_interrupt_handler, 0,
+			 KBUILD_MODNAME, dev);
+	if (rc)
+		goto err_request_irq;
+
 	/* need to setup pdr, rdr, gdr and sdr before this */
 	crypto4xx_hw_init(core_dev->dev);
 
@@ -1352,11 +1385,11 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 	return 0;
 
 err_start_dev:
-	iounmap(core_dev->dev->ce_base);
-err_iomap:
 	free_irq(core_dev->irq, dev);
 err_request_irq:
 	irq_dispose_mapping(core_dev->irq);
+	iounmap(core_dev->dev->ce_base);
+err_iomap:
 	tasklet_kill(&core_dev->tasklet);
 err_build_sdr:
 	crypto4xx_destroy_sdr(core_dev->dev);
@@ -1397,7 +1430,7 @@ MODULE_DEVICE_TABLE(of, crypto4xx_match);
 
 static struct platform_driver crypto4xx_driver = {
 	.driver = {
-		.name = MODULE_NAME,
+		.name = KBUILD_MODNAME,
 		.of_match_table = crypto4xx_match,
 	},
 	.probe		= crypto4xx_probe,

+ 1 - 3
drivers/crypto/amcc/crypto4xx_core.h

@@ -28,8 +28,6 @@
 #include "crypto4xx_reg_def.h"
 #include "crypto4xx_sa.h"
 
-#define MODULE_NAME "crypto4xx"
-
 #define PPC460SX_SDR0_SRST                      0x201
 #define PPC405EX_SDR0_SRST                      0x200
 #define PPC460EX_SDR0_SRST                      0x201
@@ -82,7 +80,6 @@ struct pd_uinfo {
 
 struct crypto4xx_device {
 	struct crypto4xx_core_device *core_dev;
-	char *name;
 	void __iomem *ce_base;
 	void __iomem *trng_base;
 
@@ -109,6 +106,7 @@ struct crypto4xx_device {
 	struct list_head alg_list;	/* List of algorithm supported
 					by this device */
 	struct ratelimit_state aead_ratelimit;
+	bool is_revb;
 };
 
 struct crypto4xx_core_device {

+ 3 - 1
drivers/crypto/amcc/crypto4xx_reg_def.h

@@ -121,13 +121,15 @@
 #define PPC4XX_PD_SIZE				6
 #define PPC4XX_CTX_DONE_INT			0x2000
 #define PPC4XX_PD_DONE_INT			0x8000
+#define PPC4XX_TMO_ERR_INT			0x40000
 #define PPC4XX_BYTE_ORDER			0x22222
 #define PPC4XX_INTERRUPT_CLR			0x3ffff
 #define PPC4XX_PRNG_CTRL_AUTO_EN		0x3
 #define PPC4XX_DC_3DES_EN			1
 #define PPC4XX_TRNG_EN				0x00020000
-#define PPC4XX_INT_DESCR_CNT			4
+#define PPC4XX_INT_DESCR_CNT			7
 #define PPC4XX_INT_TIMEOUT_CNT			0
+#define PPC4XX_INT_TIMEOUT_CNT_REVB		0x3FF
 #define PPC4XX_INT_CFG				1
 /**
  * all follow define are ad hoc

+ 1 - 1
drivers/crypto/amcc/crypto4xx_trng.c

@@ -92,7 +92,7 @@ void ppc4xx_trng_probe(struct crypto4xx_core_device *core_dev)
 	if (!rng)
 		goto err_out;
 
-	rng->name = MODULE_NAME;
+	rng->name = KBUILD_MODNAME;
 	rng->data_present = ppc4xx_trng_data_present;
 	rng->data_read = ppc4xx_trng_data_read;
 	rng->priv = (unsigned long) dev;

部分文件因为文件数量过多而无法显示