Pārlūkot izejas kodu

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "Here is the crypto update for 4.14:

  API:
   - Defer scompress scratch buffer allocation to first use.
   - Add __crypto_xor that takes separte src and dst operands.
   - Add ahash multiple registration interface.
   - Revamped aead/skcipher algif code to fix async IO properly.

  Drivers:
   - Add non-SIMD fallback code path on ARM for SVE.
   - Add AMD Security Processor framework for ccp.
   - Add support for RSA in ccp.
   - Add XTS-AES-256 support for CCP version 5.
   - Add support for PRNG in sun4i-ss.
   - Add support for DPAA2 in caam.
   - Add ARTPEC crypto support.
   - Add Freescale RNGC hwrng support.
   - Add Microchip / Atmel ECC driver.
   - Add support for STM32 HASH module"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (116 commits)
  crypto: af_alg - get_page upon reassignment to TX SGL
  crypto: cavium/nitrox - Fix an error handling path in 'nitrox_probe()'
  crypto: inside-secure - fix an error handling path in safexcel_probe()
  crypto: rockchip - Don't dequeue the request when device is busy
  crypto: cavium - add release_firmware to all return case
  crypto: sahara - constify platform_device_id
  MAINTAINERS: Add ARTPEC crypto maintainer
  crypto: axis - add ARTPEC-6/7 crypto accelerator driver
  crypto: hash - add crypto_(un)register_ahashes()
  dt-bindings: crypto: add ARTPEC crypto
  crypto: algif_aead - fix comment regarding memory layout
  crypto: ccp - use dma_mapping_error to check map error
  lib/mpi: fix build with clang
  crypto: sahara - Remove leftover from previous used spinlock
  crypto: sahara - Fix dma unmap direction
  crypto: af_alg - consolidation of duplicate code
  crypto: caam - Remove unused dentry members
  crypto: ccp - select CONFIG_CRYPTO_RSA
  crypto: ccp - avoid uninitialized variable warning
  crypto: serpent - improve __serpent_setkey with UBSAN
  ...
Linus Torvalds 8 gadi atpakaļ
vecāks
revīzija
80cee03bf1
100 mainītis faili ar 9490 papildinājumiem un 2865 dzēšanām
  1. 16 0
      Documentation/devicetree/bindings/crypto/artpec6-crypto.txt
  2. 13 0
      Documentation/devicetree/bindings/crypto/atmel-crypto.txt
  3. 30 0
      Documentation/devicetree/bindings/crypto/st,stm32-hash.txt
  4. 21 0
      Documentation/devicetree/bindings/rng/imx-rngc.txt
  5. 7 0
      MAINTAINERS
  6. 3 2
      arch/arm/crypto/Kconfig
  7. 1 3
      arch/arm/crypto/aes-ce-glue.c
  8. 65 23
      arch/arm/crypto/aes-cipher-core.S
  9. 2 3
      arch/arm/crypto/aes-neonbs-glue.c
  10. 191 43
      arch/arm/crypto/ghash-ce-core.S
  11. 21 3
      arch/arm/crypto/ghash-ce-glue.c
  12. 16 6
      arch/arm64/crypto/Kconfig
  13. 15 15
      arch/arm64/crypto/aes-ce-ccm-core.S
  14. 139 35
      arch/arm64/crypto/aes-ce-ccm-glue.c
  15. 33 22
      arch/arm64/crypto/aes-ce-cipher.c
  16. 6 6
      arch/arm64/crypto/aes-ce.S
  17. 107 45
      arch/arm64/crypto/aes-cipher-core.S
  18. 53 0
      arch/arm64/crypto/aes-ctr-fallback.h
  19. 45 18
      arch/arm64/crypto/aes-glue.c
  20. 45 8
      arch/arm64/crypto/aes-neonbs-glue.c
  21. 3 2
      arch/arm64/crypto/chacha20-neon-glue.c
  22. 6 5
      arch/arm64/crypto/crc32-ce-glue.c
  23. 9 4
      arch/arm64/crypto/crct10dif-ce-glue.c
  24. 383 18
      arch/arm64/crypto/ghash-ce-core.S
  25. 482 35
      arch/arm64/crypto/ghash-ce-glue.c
  26. 14 4
      arch/arm64/crypto/sha1-ce-glue.c
  27. 26 4
      arch/arm64/crypto/sha2-ce-glue.c
  28. 1 0
      arch/arm64/crypto/sha256-glue.c
  29. 1 2
      arch/sparc/crypto/aes_glue.c
  30. 2 2
      arch/x86/crypto/aesni-intel_glue.c
  31. 1 2
      arch/x86/crypto/blowfish_glue.c
  32. 1 2
      arch/x86/crypto/cast5_avx_glue.c
  33. 1 2
      arch/x86/crypto/des3_ede_glue.c
  34. 2 0
      crypto/Kconfig
  35. 691 0
      crypto/af_alg.c
  36. 29 0
      crypto/ahash.c
  37. 15 10
      crypto/algapi.c
  38. 250 614
      crypto/algif_aead.c
  39. 132 702
      crypto/algif_skcipher.c
  40. 1 2
      crypto/ctr.c
  41. 33 18
      crypto/ecdh.c
  42. 4 8
      crypto/pcbc.c
  43. 4 2
      crypto/rng.c
  44. 22 33
      crypto/scompress.c
  45. 41 36
      crypto/serpent_generic.c
  46. 4 4
      crypto/tcrypt.c
  47. 16 4
      drivers/char/hw_random/Kconfig
  48. 1 0
      drivers/char/hw_random/Makefile
  49. 36 6
      drivers/char/hw_random/core.c
  50. 331 0
      drivers/char/hw_random/imx-rngc.c
  51. 46 3
      drivers/crypto/Kconfig
  52. 3 1
      drivers/crypto/Makefile
  53. 781 0
      drivers/crypto/atmel-ecc.c
  54. 128 0
      drivers/crypto/atmel-ecc.h
  55. 1 1
      drivers/crypto/atmel-sha.c
  56. 1 1
      drivers/crypto/atmel-tdes.c
  57. 1 0
      drivers/crypto/axis/Makefile
  58. 3192 0
      drivers/crypto/axis/artpec6_crypto.c
  59. 54 60
      drivers/crypto/bcm/cipher.c
  60. 7 6
      drivers/crypto/bcm/cipher.h
  61. 15 51
      drivers/crypto/caam/caamalg.c
  62. 2 3
      drivers/crypto/caam/caamalg_desc.c
  63. 47 8
      drivers/crypto/caam/caamalg_qi.c
  64. 3 4
      drivers/crypto/caam/caamhash.c
  65. 1 5
      drivers/crypto/caam/caamrng.c
  66. 59 68
      drivers/crypto/caam/ctrl.c
  67. 2 0
      drivers/crypto/caam/ctrl.h
  68. 40 0
      drivers/crypto/caam/error.c
  69. 4 0
      drivers/crypto/caam/error.h
  70. 0 11
      drivers/crypto/caam/intern.h
  71. 6 1
      drivers/crypto/caam/jr.c
  72. 12 18
      drivers/crypto/caam/qi.c
  73. 3 0
      drivers/crypto/caam/qi.h
  74. 1 0
      drivers/crypto/caam/regs.h
  75. 81 0
      drivers/crypto/caam/sg_sw_qm2.h
  76. 25 18
      drivers/crypto/caam/sg_sw_sec4.h
  77. 9 4
      drivers/crypto/cavium/cpt/cptpf_main.c
  78. 3 1
      drivers/crypto/cavium/nitrox/nitrox_main.c
  79. 15 7
      drivers/crypto/ccp/Kconfig
  80. 4 3
      drivers/crypto/ccp/Makefile
  81. 1 1
      drivers/crypto/ccp/ccp-crypto-aes-galois.c
  82. 53 43
      drivers/crypto/ccp/ccp-crypto-aes-xts.c
  83. 1 1
      drivers/crypto/ccp/ccp-crypto-des3.c
  84. 20 1
      drivers/crypto/ccp/ccp-crypto-main.c
  85. 299 0
      drivers/crypto/ccp/ccp-crypto-rsa.c
  86. 1 1
      drivers/crypto/ccp/ccp-crypto-sha.c
  87. 34 2
      drivers/crypto/ccp/ccp-crypto.h
  88. 10 5
      drivers/crypto/ccp/ccp-debugfs.c
  89. 13 7
      drivers/crypto/ccp/ccp-dev-v3.c
  90. 14 14
      drivers/crypto/ccp/ccp-dev-v5.c
  91. 84 50
      drivers/crypto/ccp/ccp-dev.c
  92. 9 21
      drivers/crypto/ccp/ccp-dev.h
  93. 1 1
      drivers/crypto/ccp/ccp-dmaengine.c
  94. 91 42
      drivers/crypto/ccp/ccp-ops.c
  95. 0 356
      drivers/crypto/ccp/ccp-pci.c
  96. 0 293
      drivers/crypto/ccp/ccp-platform.c
  97. 277 0
      drivers/crypto/ccp/sp-dev.c
  98. 133 0
      drivers/crypto/ccp/sp-dev.h
  99. 276 0
      drivers/crypto/ccp/sp-pci.c
  100. 256 0
      drivers/crypto/ccp/sp-platform.c

+ 16 - 0
Documentation/devicetree/bindings/crypto/artpec6-crypto.txt

@@ -0,0 +1,16 @@
+Axis crypto engine with PDMA interface.
+
+Required properties:
+- compatible : Should be one of the following strings:
+	"axis,artpec6-crypto" for the version in the Axis ARTPEC-6 SoC
+	"axis,artpec7-crypto" for the version in the Axis ARTPEC-7 SoC.
+- reg: Base address and size for the PDMA register area.
+- interrupts: Interrupt handle for the PDMA interrupt line.
+
+Example:
+
+crypto@f4264000 {
+	compatible = "axis,artpec6-crypto";
+	reg = <0xf4264000 0x1000>;
+	interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+};

+ 13 - 0
Documentation/devicetree/bindings/crypto/atmel-crypto.txt

@@ -66,3 +66,16 @@ sha@f8034000 {
 	dmas = <&dma1 2 17>;
 	dmas = <&dma1 2 17>;
 	dma-names = "tx";
 	dma-names = "tx";
 };
 };
+
+* Eliptic Curve Cryptography (I2C)
+
+Required properties:
+- compatible : must be "atmel,atecc508a".
+- reg: I2C bus address of the device.
+- clock-frequency: must be present in the i2c controller node.
+
+Example:
+atecc508a@C0 {
+	compatible = "atmel,atecc508a";
+	reg = <0xC0>;
+};

+ 30 - 0
Documentation/devicetree/bindings/crypto/st,stm32-hash.txt

@@ -0,0 +1,30 @@
+* STMicroelectronics STM32 HASH
+
+Required properties:
+- compatible: Should contain entries for this and backward compatible
+  HASH versions:
+  - "st,stm32f456-hash" for stm32 F456.
+  - "st,stm32f756-hash" for stm32 F756.
+- reg: The address and length of the peripheral registers space
+- interrupts: the interrupt specifier for the HASH
+- clocks: The input clock of the HASH instance
+
+Optional properties:
+- resets: The input reset of the HASH instance
+- dmas: DMA specifiers for the HASH. See the DMA client binding,
+	 Documentation/devicetree/bindings/dma/dma.txt
+- dma-names: DMA request name. Should be "in" if a dma is present.
+- dma-maxburst: Set number of maximum dma burst supported
+
+Example:
+
+hash1: hash@50060400 {
+	compatible = "st,stm32f756-hash";
+	reg = <0x50060400 0x400>;
+	interrupts = <80>;
+	clocks = <&rcc 0 STM32F7_AHB2_CLOCK(HASH)>;
+	resets = <&rcc STM32F7_AHB2_RESET(HASH)>;
+	dmas = <&dma2 7 2 0x400 0x0>;
+	dma-names = "in";
+	dma-maxburst = <0>;
+};

+ 21 - 0
Documentation/devicetree/bindings/rng/imx-rngc.txt

@@ -0,0 +1,21 @@
+Freescale RNGC (Random Number Generator Version C)
+
+The driver also supports version B, which is mostly compatible
+to version C.
+
+Required properties:
+- compatible : should be one of
+               "fsl,imx25-rngb"
+               "fsl,imx35-rngc"
+- reg : offset and length of the register set of this block
+- interrupts : the interrupt number for the RNGC block
+- clocks : the RNGC clk source
+
+Example:
+
+rng@53fb0000 {
+	compatible = "fsl,imx25-rngb";
+	reg = <0x53fb0000 0x4000>;
+	interrupts = <22>;
+	clocks = <&trng_clk>;
+};

+ 7 - 0
MAINTAINERS

@@ -1162,6 +1162,7 @@ L:	linux-arm-kernel@axis.com
 F:	arch/arm/mach-artpec
 F:	arch/arm/mach-artpec
 F:	arch/arm/boot/dts/artpec6*
 F:	arch/arm/boot/dts/artpec6*
 F:	drivers/clk/axis
 F:	drivers/clk/axis
+F:	drivers/crypto/axis
 F:	drivers/pinctrl/pinctrl-artpec*
 F:	drivers/pinctrl/pinctrl-artpec*
 F:	Documentation/devicetree/bindings/pinctrl/axis,artpec6-pinctrl.txt
 F:	Documentation/devicetree/bindings/pinctrl/axis,artpec6-pinctrl.txt
 
 
@@ -8770,6 +8771,12 @@ F:	drivers/dma/at_hdmac.c
 F:	drivers/dma/at_hdmac_regs.h
 F:	drivers/dma/at_hdmac_regs.h
 F:	include/linux/platform_data/dma-atmel.h
 F:	include/linux/platform_data/dma-atmel.h
 
 
+MICROCHIP / ATMEL ECC DRIVER
+M:	Tudor Ambarus <tudor.ambarus@microchip.com>
+L:	linux-crypto@vger.kernel.org
+S:	Maintained
+F:	drivers/crypto/atmel-ecc.*
+
 MICROCHIP / ATMEL ISC DRIVER
 MICROCHIP / ATMEL ISC DRIVER
 M:	Songjun Wu <songjun.wu@microchip.com>
 M:	Songjun Wu <songjun.wu@microchip.com>
 L:	linux-media@vger.kernel.org
 L:	linux-media@vger.kernel.org

+ 3 - 2
arch/arm/crypto/Kconfig

@@ -94,14 +94,15 @@ config CRYPTO_AES_ARM_CE
 	  ARMv8 Crypto Extensions
 	  ARMv8 Crypto Extensions
 
 
 config CRYPTO_GHASH_ARM_CE
 config CRYPTO_GHASH_ARM_CE
-	tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions"
+	tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions"
 	depends on KERNEL_MODE_NEON
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_HASH
 	select CRYPTO_CRYPTD
 	select CRYPTO_CRYPTD
 	help
 	help
 	  Use an implementation of GHASH (used by the GCM AEAD chaining mode)
 	  Use an implementation of GHASH (used by the GCM AEAD chaining mode)
 	  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
 	  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
-	  that is part of the ARMv8 Crypto Extensions
+	  that is part of the ARMv8 Crypto Extensions, or a slower variant that
+	  uses the vmull.p8 instruction that is part of the basic NEON ISA.
 
 
 config CRYPTO_CRCT10DIF_ARM_CE
 config CRYPTO_CRCT10DIF_ARM_CE
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"

+ 1 - 3
arch/arm/crypto/aes-ce-glue.c

@@ -285,9 +285,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
 
 		ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
 		ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
 				   num_rounds(ctx), blocks, walk.iv);
 				   num_rounds(ctx), blocks, walk.iv);
-		if (tdst != tsrc)
-			memcpy(tdst, tsrc, nbytes);
-		crypto_xor(tdst, tail, nbytes);
+		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
 		err = skcipher_walk_done(&walk, 0);
 		err = skcipher_walk_done(&walk, 0);
 	}
 	}
 	kernel_neon_end();
 	kernel_neon_end();

+ 65 - 23
arch/arm/crypto/aes-cipher-core.S

@@ -10,6 +10,7 @@
  */
  */
 
 
 #include <linux/linkage.h>
 #include <linux/linkage.h>
+#include <asm/cache.h>
 
 
 	.text
 	.text
 	.align		5
 	.align		5
@@ -32,19 +33,19 @@
 	.endif
 	.endif
 	.endm
 	.endm
 
 
-	.macro		__load, out, in, idx
+	.macro		__load, out, in, idx, sz, op
 	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
 	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
-	ldr		\out, [ttab, \in, lsr #(8 * \idx) - 2]
+	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
 	.else
 	.else
-	ldr		\out, [ttab, \in, lsl #2]
+	ldr\op		\out, [ttab, \in, lsl #\sz]
 	.endif
 	.endif
 	.endm
 	.endm
 
 
-	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
+	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op
 	__select	\out0, \in0, 0
 	__select	\out0, \in0, 0
 	__select	t0, \in1, 1
 	__select	t0, \in1, 1
-	__load		\out0, \out0, 0
-	__load		t0, t0, 1
+	__load		\out0, \out0, 0, \sz, \op
+	__load		t0, t0, 1, \sz, \op
 
 
 	.if		\enc
 	.if		\enc
 	__select	\out1, \in1, 0
 	__select	\out1, \in1, 0
@@ -53,10 +54,10 @@
 	__select	\out1, \in3, 0
 	__select	\out1, \in3, 0
 	__select	t1, \in0, 1
 	__select	t1, \in0, 1
 	.endif
 	.endif
-	__load		\out1, \out1, 0
+	__load		\out1, \out1, 0, \sz, \op
 	__select	t2, \in2, 2
 	__select	t2, \in2, 2
-	__load		t1, t1, 1
-	__load		t2, t2, 2
+	__load		t1, t1, 1, \sz, \op
+	__load		t2, t2, 2, \sz, \op
 
 
 	eor		\out0, \out0, t0, ror #24
 	eor		\out0, \out0, t0, ror #24
 
 
@@ -68,9 +69,9 @@
 	__select	\t3, \in1, 2
 	__select	\t3, \in1, 2
 	__select	\t4, \in2, 3
 	__select	\t4, \in2, 3
 	.endif
 	.endif
-	__load		\t3, \t3, 2
-	__load		t0, t0, 3
-	__load		\t4, \t4, 3
+	__load		\t3, \t3, 2, \sz, \op
+	__load		t0, t0, 3, \sz, \op
+	__load		\t4, \t4, 3, \sz, \op
 
 
 	eor		\out1, \out1, t1, ror #24
 	eor		\out1, \out1, t1, ror #24
 	eor		\out0, \out0, t2, ror #16
 	eor		\out0, \out0, t2, ror #16
@@ -82,14 +83,14 @@
 	eor		\out1, \out1, t2
 	eor		\out1, \out1, t2
 	.endm
 	.endm
 
 
-	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
-	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
 	.endm
 	.endm
 
 
-	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
-	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
 	.endm
 	.endm
 
 
 	.macro		__rev, out, in
 	.macro		__rev, out, in
@@ -114,7 +115,7 @@
 	.endif
 	.endif
 	.endm
 	.endm
 
 
-	.macro		do_crypt, round, ttab, ltab
+	.macro		do_crypt, round, ttab, ltab, bsz
 	push		{r3-r11, lr}
 	push		{r3-r11, lr}
 
 
 	ldr		r4, [in]
 	ldr		r4, [in]
@@ -146,9 +147,12 @@
 
 
 1:	subs		rounds, rounds, #4
 1:	subs		rounds, rounds, #4
 	\round		r8, r9, r10, r11, r4, r5, r6, r7
 	\round		r8, r9, r10, r11, r4, r5, r6, r7
-	__adrl		ttab, \ltab, ls
+	bls		2f
 	\round		r4, r5, r6, r7, r8, r9, r10, r11
 	\round		r4, r5, r6, r7, r8, r9, r10, r11
-	bhi		0b
+	b		0b
+
+2:	__adrl		ttab, \ltab
+	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b
 
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	__rev		r4, r4
 	__rev		r4, r4
@@ -170,10 +174,48 @@
 	.ltorg
 	.ltorg
 	.endm
 	.endm
 
 
+	.align		L1_CACHE_SHIFT
+	.type		__aes_arm_inverse_sbox, %object
+__aes_arm_inverse_sbox:
+	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+	.size		__aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
+
 ENTRY(__aes_arm_encrypt)
 ENTRY(__aes_arm_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_fl_tab
+	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
 ENDPROC(__aes_arm_encrypt)
 ENDPROC(__aes_arm_encrypt)
 
 
+	.align		5
 ENTRY(__aes_arm_decrypt)
 ENTRY(__aes_arm_decrypt)
-	do_crypt	iround, crypto_it_tab, crypto_il_tab
+	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
 ENDPROC(__aes_arm_decrypt)
 ENDPROC(__aes_arm_decrypt)

+ 2 - 3
arch/arm/crypto/aes-neonbs-glue.c

@@ -221,9 +221,8 @@ static int ctr_encrypt(struct skcipher_request *req)
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 
 
-			if (dst != src)
-				memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
-			crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
+			crypto_xor_cpy(dst, src, final,
+				       walk.total % AES_BLOCK_SIZE);
 
 
 			err = skcipher_walk_done(&walk, 0);
 			err = skcipher_walk_done(&walk, 0);
 			break;
 			break;

+ 191 - 43
arch/arm/crypto/ghash-ce-core.S

@@ -1,7 +1,7 @@
 /*
 /*
- * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
+ * Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions.
  *
  *
- * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  *
  * This program is free software; you can redistribute it and/or modify it
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
  * under the terms of the GNU General Public License version 2 as published
@@ -12,40 +12,162 @@
 #include <asm/assembler.h>
 #include <asm/assembler.h>
 
 
 	SHASH		.req	q0
 	SHASH		.req	q0
-	SHASH2		.req	q1
-	T1		.req	q2
-	T2		.req	q3
-	MASK		.req	q4
-	XL		.req	q5
-	XM		.req	q6
-	XH		.req	q7
-	IN1		.req	q7
+	T1		.req	q1
+	XL		.req	q2
+	XM		.req	q3
+	XH		.req	q4
+	IN1		.req	q4
 
 
 	SHASH_L		.req	d0
 	SHASH_L		.req	d0
 	SHASH_H		.req	d1
 	SHASH_H		.req	d1
-	SHASH2_L	.req	d2
-	T1_L		.req	d4
-	MASK_L		.req	d8
-	XL_L		.req	d10
-	XL_H		.req	d11
-	XM_L		.req	d12
-	XM_H		.req	d13
-	XH_L		.req	d14
+	T1_L		.req	d2
+	T1_H		.req	d3
+	XL_L		.req	d4
+	XL_H		.req	d5
+	XM_L		.req	d6
+	XM_H		.req	d7
+	XH_L		.req	d8
+
+	t0l		.req	d10
+	t0h		.req	d11
+	t1l		.req	d12
+	t1h		.req	d13
+	t2l		.req	d14
+	t2h		.req	d15
+	t3l		.req	d16
+	t3h		.req	d17
+	t4l		.req	d18
+	t4h		.req	d19
+
+	t0q		.req	q5
+	t1q		.req	q6
+	t2q		.req	q7
+	t3q		.req	q8
+	t4q		.req	q9
+	T2		.req	q9
+
+	s1l		.req	d20
+	s1h		.req	d21
+	s2l		.req	d22
+	s2h		.req	d23
+	s3l		.req	d24
+	s3h		.req	d25
+	s4l		.req	d26
+	s4h		.req	d27
+
+	MASK		.req	d28
+	SHASH2_p8	.req	d28
+
+	k16		.req	d29
+	k32		.req	d30
+	k48		.req	d31
+	SHASH2_p64	.req	d31
 
 
 	.text
 	.text
 	.fpu		crypto-neon-fp-armv8
 	.fpu		crypto-neon-fp-armv8
 
 
+	.macro		__pmull_p64, rd, rn, rm, b1, b2, b3, b4
+	vmull.p64	\rd, \rn, \rm
+	.endm
+
 	/*
 	/*
-	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-	 *			   struct ghash_key const *k, const char *head)
+	 * This implementation of 64x64 -> 128 bit polynomial multiplication
+	 * using vmull.p8 instructions (8x8 -> 16) is taken from the paper
+	 * "Fast Software Polynomial Multiplication on ARM Processors Using
+	 * the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and
+	 * Ricardo Dahab (https://hal.inria.fr/hal-01506572)
+	 *
+	 * It has been slightly tweaked for in-order performance, and to allow
+	 * 'rq' to overlap with 'ad' or 'bd'.
 	 */
 	 */
-ENTRY(pmull_ghash_update)
-	vld1.64		{SHASH}, [r3]
+	.macro		__pmull_p8, rq, ad, bd, b1=t4l, b2=t3l, b3=t4l, b4=t3l
+	vext.8		t0l, \ad, \ad, #1	@ A1
+	.ifc		\b1, t4l
+	vext.8		t4l, \bd, \bd, #1	@ B1
+	.endif
+	vmull.p8	t0q, t0l, \bd		@ F = A1*B
+	vext.8		t1l, \ad, \ad, #2	@ A2
+	vmull.p8	t4q, \ad, \b1		@ E = A*B1
+	.ifc		\b2, t3l
+	vext.8		t3l, \bd, \bd, #2	@ B2
+	.endif
+	vmull.p8	t1q, t1l, \bd		@ H = A2*B
+	vext.8		t2l, \ad, \ad, #3	@ A3
+	vmull.p8	t3q, \ad, \b2		@ G = A*B2
+	veor		t0q, t0q, t4q		@ L = E + F
+	.ifc		\b3, t4l
+	vext.8		t4l, \bd, \bd, #3	@ B3
+	.endif
+	vmull.p8	t2q, t2l, \bd		@ J = A3*B
+	veor		t0l, t0l, t0h		@ t0 = (L) (P0 + P1) << 8
+	veor		t1q, t1q, t3q		@ M = G + H
+	.ifc		\b4, t3l
+	vext.8		t3l, \bd, \bd, #4	@ B4
+	.endif
+	vmull.p8	t4q, \ad, \b3		@ I = A*B3
+	veor		t1l, t1l, t1h		@ t1 = (M) (P2 + P3) << 16
+	vmull.p8	t3q, \ad, \b4		@ K = A*B4
+	vand		t0h, t0h, k48
+	vand		t1h, t1h, k32
+	veor		t2q, t2q, t4q		@ N = I + J
+	veor		t0l, t0l, t0h
+	veor		t1l, t1l, t1h
+	veor		t2l, t2l, t2h		@ t2 = (N) (P4 + P5) << 24
+	vand		t2h, t2h, k16
+	veor		t3l, t3l, t3h		@ t3 = (K) (P6 + P7) << 32
+	vmov.i64	t3h, #0
+	vext.8		t0q, t0q, t0q, #15
+	veor		t2l, t2l, t2h
+	vext.8		t1q, t1q, t1q, #14
+	vmull.p8	\rq, \ad, \bd		@ D = A*B
+	vext.8		t2q, t2q, t2q, #13
+	vext.8		t3q, t3q, t3q, #12
+	veor		t0q, t0q, t1q
+	veor		t2q, t2q, t3q
+	veor		\rq, \rq, t0q
+	veor		\rq, \rq, t2q
+	.endm
+
+	//
+	// PMULL (64x64->128) based reduction for CPUs that can do
+	// it in a single instruction.
+	//
+	.macro		__pmull_reduce_p64
+	vmull.p64	T1, XL_L, MASK
+
+	veor		XH_L, XH_L, XM_H
+	vext.8		T1, T1, T1, #8
+	veor		XL_H, XL_H, XM_L
+	veor		T1, T1, XL
+
+	vmull.p64	XL, T1_H, MASK
+	.endm
+
+	//
+	// Alternative reduction for CPUs that lack support for the
+	// 64x64->128 PMULL instruction
+	//
+	.macro		__pmull_reduce_p8
+	veor		XL_H, XL_H, XM_L
+	veor		XH_L, XH_L, XM_H
+
+	vshl.i64	T1, XL, #57
+	vshl.i64	T2, XL, #62
+	veor		T1, T1, T2
+	vshl.i64	T2, XL, #63
+	veor		T1, T1, T2
+	veor		XL_H, XL_H, T1_L
+	veor		XH_L, XH_L, T1_H
+
+	vshr.u64	T1, XL, #1
+	veor		XH, XH, XL
+	veor		XL, XL, T1
+	vshr.u64	T1, T1, #6
+	vshr.u64	XL, XL, #1
+	.endm
+
+	.macro		ghash_update, pn
 	vld1.64		{XL}, [r1]
 	vld1.64		{XL}, [r1]
-	vmov.i8		MASK, #0xe1
-	vext.8		SHASH2, SHASH, SHASH, #8
-	vshl.u64	MASK, MASK, #57
-	veor		SHASH2, SHASH2, SHASH
 
 
 	/* do the head block first, if supplied */
 	/* do the head block first, if supplied */
 	ldr		ip, [sp]
 	ldr		ip, [sp]
@@ -62,33 +184,59 @@ ENTRY(pmull_ghash_update)
 #ifndef CONFIG_CPU_BIG_ENDIAN
 #ifndef CONFIG_CPU_BIG_ENDIAN
 	vrev64.8	T1, T1
 	vrev64.8	T1, T1
 #endif
 #endif
-	vext.8		T2, XL, XL, #8
 	vext.8		IN1, T1, T1, #8
 	vext.8		IN1, T1, T1, #8
-	veor		T1, T1, T2
+	veor		T1_L, T1_L, XL_H
 	veor		XL, XL, IN1
 	veor		XL, XL, IN1
 
 
-	vmull.p64	XH, SHASH_H, XL_H		@ a1 * b1
+	__pmull_\pn	XH, XL_H, SHASH_H, s1h, s2h, s3h, s4h	@ a1 * b1
 	veor		T1, T1, XL
 	veor		T1, T1, XL
-	vmull.p64	XL, SHASH_L, XL_L		@ a0 * b0
-	vmull.p64	XM, SHASH2_L, T1_L		@ (a1 + a0)(b1 + b0)
+	__pmull_\pn	XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l	@ a0 * b0
+	__pmull_\pn	XM, T1_L, SHASH2_\pn			@ (a1+a0)(b1+b0)
 
 
-	vext.8		T1, XL, XH, #8
-	veor		T2, XL, XH
+	veor		T1, XL, XH
 	veor		XM, XM, T1
 	veor		XM, XM, T1
-	veor		XM, XM, T2
-	vmull.p64	T2, XL_L, MASK_L
 
 
-	vmov		XH_L, XM_H
-	vmov		XM_H, XL_L
+	__pmull_reduce_\pn
 
 
-	veor		XL, XM, T2
-	vext.8		T2, XL, XL, #8
-	vmull.p64	XL, XL_L, MASK_L
-	veor		T2, T2, XH
-	veor		XL, XL, T2
+	veor		T1, T1, XH
+	veor		XL, XL, T1
 
 
 	bne		0b
 	bne		0b
 
 
 	vst1.64		{XL}, [r1]
 	vst1.64		{XL}, [r1]
 	bx		lr
 	bx		lr
-ENDPROC(pmull_ghash_update)
+	.endm
+
+	/*
+	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+	 *			   struct ghash_key const *k, const char *head)
+	 */
+ENTRY(pmull_ghash_update_p64)
+	vld1.64		{SHASH}, [r3]
+	veor		SHASH2_p64, SHASH_L, SHASH_H
+
+	vmov.i8		MASK, #0xe1
+	vshl.u64	MASK, MASK, #57
+
+	ghash_update	p64
+ENDPROC(pmull_ghash_update_p64)
+
+ENTRY(pmull_ghash_update_p8)
+	vld1.64		{SHASH}, [r3]
+	veor		SHASH2_p8, SHASH_L, SHASH_H
+
+	vext.8		s1l, SHASH_L, SHASH_L, #1
+	vext.8		s2l, SHASH_L, SHASH_L, #2
+	vext.8		s3l, SHASH_L, SHASH_L, #3
+	vext.8		s4l, SHASH_L, SHASH_L, #4
+	vext.8		s1h, SHASH_H, SHASH_H, #1
+	vext.8		s2h, SHASH_H, SHASH_H, #2
+	vext.8		s3h, SHASH_H, SHASH_H, #3
+	vext.8		s4h, SHASH_H, SHASH_H, #4
+
+	vmov.i64	k16, #0xffff
+	vmov.i64	k32, #0xffffffff
+	vmov.i64	k48, #0xffffffffffff
+
+	ghash_update	p8
+ENDPROC(pmull_ghash_update_p8)

+ 21 - 3
arch/arm/crypto/ghash-ce-glue.c

@@ -22,6 +22,7 @@
 MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
 MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("ghash");
 
 
 #define GHASH_BLOCK_SIZE	16
 #define GHASH_BLOCK_SIZE	16
 #define GHASH_DIGEST_SIZE	16
 #define GHASH_DIGEST_SIZE	16
@@ -41,8 +42,17 @@ struct ghash_async_ctx {
 	struct cryptd_ahash *cryptd_tfm;
 	struct cryptd_ahash *cryptd_tfm;
 };
 };
 
 
-asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-				   struct ghash_key const *k, const char *head);
+asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
+				       struct ghash_key const *k,
+				       const char *head);
+
+asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
+				      struct ghash_key const *k,
+				      const char *head);
+
+static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
+				  struct ghash_key const *k,
+				  const char *head);
 
 
 static int ghash_init(struct shash_desc *desc)
 static int ghash_init(struct shash_desc *desc)
 {
 {
@@ -312,6 +322,14 @@ static int __init ghash_ce_mod_init(void)
 {
 {
 	int err;
 	int err;
 
 
+	if (!(elf_hwcap & HWCAP_NEON))
+		return -ENODEV;
+
+	if (elf_hwcap2 & HWCAP2_PMULL)
+		pmull_ghash_update = pmull_ghash_update_p64;
+	else
+		pmull_ghash_update = pmull_ghash_update_p8;
+
 	err = crypto_register_shash(&ghash_alg);
 	err = crypto_register_shash(&ghash_alg);
 	if (err)
 	if (err)
 		return err;
 		return err;
@@ -332,5 +350,5 @@ static void __exit ghash_ce_mod_exit(void)
 	crypto_unregister_shash(&ghash_alg);
 	crypto_unregister_shash(&ghash_alg);
 }
 }
 
 
-module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+module_init(ghash_ce_mod_init);
 module_exit(ghash_ce_mod_exit);
 module_exit(ghash_ce_mod_exit);

+ 16 - 6
arch/arm64/crypto/Kconfig

@@ -18,18 +18,23 @@ config CRYPTO_SHA512_ARM64
 
 
 config CRYPTO_SHA1_ARM64_CE
 config CRYPTO_SHA1_ARM64_CE
 	tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
 	tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_HASH
+	select CRYPTO_SHA1
 
 
 config CRYPTO_SHA2_ARM64_CE
 config CRYPTO_SHA2_ARM64_CE
 	tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
 	tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_HASH
+	select CRYPTO_SHA256_ARM64
 
 
 config CRYPTO_GHASH_ARM64_CE
 config CRYPTO_GHASH_ARM64_CE
-	tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
-	depends on ARM64 && KERNEL_MODE_NEON
+	tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_HASH
+	select CRYPTO_GF128MUL
+	select CRYPTO_AES
+	select CRYPTO_AES_ARM64
 
 
 config CRYPTO_CRCT10DIF_ARM64_CE
 config CRYPTO_CRCT10DIF_ARM64_CE
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"
@@ -49,25 +54,29 @@ config CRYPTO_AES_ARM64_CE
 	tristate "AES core cipher using ARMv8 Crypto Extensions"
 	tristate "AES core cipher using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
 	select CRYPTO_ALGAPI
+	select CRYPTO_AES_ARM64
 
 
 config CRYPTO_AES_ARM64_CE_CCM
 config CRYPTO_AES_ARM64_CE_CCM
 	tristate "AES in CCM mode using ARMv8 Crypto Extensions"
 	tristate "AES in CCM mode using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
 	select CRYPTO_ALGAPI
 	select CRYPTO_AES_ARM64_CE
 	select CRYPTO_AES_ARM64_CE
+	select CRYPTO_AES_ARM64
 	select CRYPTO_AEAD
 	select CRYPTO_AEAD
 
 
 config CRYPTO_AES_ARM64_CE_BLK
 config CRYPTO_AES_ARM64_CE_BLK
 	tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
 	tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AES_ARM64_CE
 	select CRYPTO_AES_ARM64_CE
+	select CRYPTO_AES_ARM64
 	select CRYPTO_SIMD
 	select CRYPTO_SIMD
 
 
 config CRYPTO_AES_ARM64_NEON_BLK
 config CRYPTO_AES_ARM64_NEON_BLK
 	tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
 	tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_BLKCIPHER
+	select CRYPTO_AES_ARM64
 	select CRYPTO_AES
 	select CRYPTO_AES
 	select CRYPTO_SIMD
 	select CRYPTO_SIMD
 
 
@@ -82,6 +91,7 @@ config CRYPTO_AES_ARM64_BS
 	depends on KERNEL_MODE_NEON
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AES_ARM64_NEON_BLK
 	select CRYPTO_AES_ARM64_NEON_BLK
+	select CRYPTO_AES_ARM64
 	select CRYPTO_SIMD
 	select CRYPTO_SIMD
 
 
 endif
 endif

+ 15 - 15
arch/arm64/crypto/aes-ce-ccm-core.S

@@ -1,7 +1,7 @@
 /*
 /*
  * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
  * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
  *
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -32,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
 	beq	8f				/* out of input? */
 	beq	8f				/* out of input? */
 	cbnz	w8, 0b
 	cbnz	w8, 0b
 	eor	v0.16b, v0.16b, v1.16b
 	eor	v0.16b, v0.16b, v1.16b
-1:	ld1	{v3.16b}, [x4]			/* load first round key */
+1:	ld1	{v3.4s}, [x4]			/* load first round key */
 	prfm	pldl1strm, [x1]
 	prfm	pldl1strm, [x1]
 	cmp	w5, #12				/* which key size? */
 	cmp	w5, #12				/* which key size? */
 	add	x6, x4, #16
 	add	x6, x4, #16
@@ -42,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
 	mov	v5.16b, v3.16b
 	mov	v5.16b, v3.16b
 	b	4f
 	b	4f
 2:	mov	v4.16b, v3.16b
 2:	mov	v4.16b, v3.16b
-	ld1	{v5.16b}, [x6], #16		/* load 2nd round key */
+	ld1	{v5.4s}, [x6], #16		/* load 2nd round key */
 3:	aese	v0.16b, v4.16b
 3:	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
-4:	ld1	{v3.16b}, [x6], #16		/* load next round key */
+4:	ld1	{v3.4s}, [x6], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
-5:	ld1	{v4.16b}, [x6], #16		/* load next round key */
+5:	ld1	{v4.4s}, [x6], #16		/* load next round key */
 	subs	w7, w7, #3
 	subs	w7, w7, #3
 	aese	v0.16b, v3.16b
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
-	ld1	{v5.16b}, [x6], #16		/* load next round key */
+	ld1	{v5.4s}, [x6], #16		/* load next round key */
 	bpl	3b
 	bpl	3b
 	aese	v0.16b, v4.16b
 	aese	v0.16b, v4.16b
 	subs	w2, w2, #16			/* last data? */
 	subs	w2, w2, #16			/* last data? */
@@ -90,7 +90,7 @@ ENDPROC(ce_aes_ccm_auth_data)
 	 * 			 u32 rounds);
 	 * 			 u32 rounds);
 	 */
 	 */
 ENTRY(ce_aes_ccm_final)
 ENTRY(ce_aes_ccm_final)
-	ld1	{v3.16b}, [x2], #16		/* load first round key */
+	ld1	{v3.4s}, [x2], #16		/* load first round key */
 	ld1	{v0.16b}, [x0]			/* load mac */
 	ld1	{v0.16b}, [x0]			/* load mac */
 	cmp	w3, #12				/* which key size? */
 	cmp	w3, #12				/* which key size? */
 	sub	w3, w3, #2			/* modified # of rounds */
 	sub	w3, w3, #2			/* modified # of rounds */
@@ -100,17 +100,17 @@ ENTRY(ce_aes_ccm_final)
 	mov	v5.16b, v3.16b
 	mov	v5.16b, v3.16b
 	b	2f
 	b	2f
 0:	mov	v4.16b, v3.16b
 0:	mov	v4.16b, v3.16b
-1:	ld1	{v5.16b}, [x2], #16		/* load next round key */
+1:	ld1	{v5.4s}, [x2], #16		/* load next round key */
 	aese	v0.16b, v4.16b
 	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v4.16b
 	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
 	aesmc	v1.16b, v1.16b
-2:	ld1	{v3.16b}, [x2], #16		/* load next round key */
+2:	ld1	{v3.4s}, [x2], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v5.16b
 	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
 	aesmc	v1.16b, v1.16b
-3:	ld1	{v4.16b}, [x2], #16		/* load next round key */
+3:	ld1	{v4.4s}, [x2], #16		/* load next round key */
 	subs	w3, w3, #3
 	subs	w3, w3, #3
 	aese	v0.16b, v3.16b
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
@@ -137,31 +137,31 @@ CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
 	cmp	w4, #12				/* which key size? */
 	cmp	w4, #12				/* which key size? */
 	sub	w7, w4, #2			/* get modified # of rounds */
 	sub	w7, w4, #2			/* get modified # of rounds */
 	ins	v1.d[1], x9			/* no carry in lower ctr */
 	ins	v1.d[1], x9			/* no carry in lower ctr */
-	ld1	{v3.16b}, [x3]			/* load first round key */
+	ld1	{v3.4s}, [x3]			/* load first round key */
 	add	x10, x3, #16
 	add	x10, x3, #16
 	bmi	1f
 	bmi	1f
 	bne	4f
 	bne	4f
 	mov	v5.16b, v3.16b
 	mov	v5.16b, v3.16b
 	b	3f
 	b	3f
 1:	mov	v4.16b, v3.16b
 1:	mov	v4.16b, v3.16b
-	ld1	{v5.16b}, [x10], #16		/* load 2nd round key */
+	ld1	{v5.4s}, [x10], #16		/* load 2nd round key */
 2:	/* inner loop: 3 rounds, 2x interleaved */
 2:	/* inner loop: 3 rounds, 2x interleaved */
 	aese	v0.16b, v4.16b
 	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v4.16b
 	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
 	aesmc	v1.16b, v1.16b
-3:	ld1	{v3.16b}, [x10], #16		/* load next round key */
+3:	ld1	{v3.4s}, [x10], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v5.16b
 	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
 	aesmc	v1.16b, v1.16b
-4:	ld1	{v4.16b}, [x10], #16		/* load next round key */
+4:	ld1	{v4.4s}, [x10], #16		/* load next round key */
 	subs	w7, w7, #3
 	subs	w7, w7, #3
 	aese	v0.16b, v3.16b
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v3.16b
 	aese	v1.16b, v3.16b
 	aesmc	v1.16b, v1.16b
 	aesmc	v1.16b, v1.16b
-	ld1	{v5.16b}, [x10], #16		/* load next round key */
+	ld1	{v5.4s}, [x10], #16		/* load next round key */
 	bpl	2b
 	bpl	2b
 	aese	v0.16b, v4.16b
 	aese	v0.16b, v4.16b
 	aese	v1.16b, v4.16b
 	aese	v1.16b, v4.16b

+ 139 - 35
arch/arm64/crypto/aes-ce-ccm-glue.c

@@ -1,7 +1,7 @@
 /*
 /*
  * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
  * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
  *
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
  */
 
 
 #include <asm/neon.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <asm/unaligned.h>
 #include <crypto/aes.h>
 #include <crypto/aes.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/scatterwalk.h>
@@ -44,6 +45,8 @@ asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
 asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
 asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
 				 u32 rounds);
 				 u32 rounds);
 
 
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
 static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
 static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
 		      unsigned int key_len)
 		      unsigned int key_len)
 {
 {
@@ -103,7 +106,45 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
 	return 0;
 	return 0;
 }
 }
 
 
-static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
+			   u32 abytes, u32 *macp, bool use_neon)
+{
+	if (likely(use_neon)) {
+		ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
+				     num_rounds(key));
+	} else {
+		if (*macp > 0 && *macp < AES_BLOCK_SIZE) {
+			int added = min(abytes, AES_BLOCK_SIZE - *macp);
+
+			crypto_xor(&mac[*macp], in, added);
+
+			*macp += added;
+			in += added;
+			abytes -= added;
+		}
+
+		while (abytes > AES_BLOCK_SIZE) {
+			__aes_arm64_encrypt(key->key_enc, mac, mac,
+					    num_rounds(key));
+			crypto_xor(mac, in, AES_BLOCK_SIZE);
+
+			in += AES_BLOCK_SIZE;
+			abytes -= AES_BLOCK_SIZE;
+		}
+
+		if (abytes > 0) {
+			__aes_arm64_encrypt(key->key_enc, mac, mac,
+					    num_rounds(key));
+			crypto_xor(mac, in, abytes);
+			*macp = abytes;
+		} else {
+			*macp = 0;
+		}
+	}
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
+				   bool use_neon)
 {
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
@@ -122,8 +163,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 		ltag.len = 6;
 		ltag.len = 6;
 	}
 	}
 
 
-	ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
-			     num_rounds(ctx));
+	ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp, use_neon);
 	scatterwalk_start(&walk, req->src);
 	scatterwalk_start(&walk, req->src);
 
 
 	do {
 	do {
@@ -135,8 +175,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 			n = scatterwalk_clamp(&walk, len);
 			n = scatterwalk_clamp(&walk, len);
 		}
 		}
 		p = scatterwalk_map(&walk);
 		p = scatterwalk_map(&walk);
-		ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
-				     num_rounds(ctx));
+		ccm_update_mac(ctx, mac, p, n, &macp, use_neon);
 		len -= n;
 		len -= n;
 
 
 		scatterwalk_unmap(p);
 		scatterwalk_unmap(p);
@@ -145,6 +184,56 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 	} while (len);
 	} while (len);
 }
 }
 
 
+static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[],
+			      struct crypto_aes_ctx *ctx, bool enc)
+{
+	u8 buf[AES_BLOCK_SIZE];
+	int err = 0;
+
+	while (walk->nbytes) {
+		int blocks = walk->nbytes / AES_BLOCK_SIZE;
+		u32 tail = walk->nbytes % AES_BLOCK_SIZE;
+		u8 *dst = walk->dst.virt.addr;
+		u8 *src = walk->src.virt.addr;
+		u32 nbytes = walk->nbytes;
+
+		if (nbytes == walk->total && tail > 0) {
+			blocks++;
+			tail = 0;
+		}
+
+		do {
+			u32 bsize = AES_BLOCK_SIZE;
+
+			if (nbytes < AES_BLOCK_SIZE)
+				bsize = nbytes;
+
+			crypto_inc(walk->iv, AES_BLOCK_SIZE);
+			__aes_arm64_encrypt(ctx->key_enc, buf, walk->iv,
+					    num_rounds(ctx));
+			__aes_arm64_encrypt(ctx->key_enc, mac, mac,
+					    num_rounds(ctx));
+			if (enc)
+				crypto_xor(mac, src, bsize);
+			crypto_xor_cpy(dst, src, buf, bsize);
+			if (!enc)
+				crypto_xor(mac, dst, bsize);
+			dst += bsize;
+			src += bsize;
+			nbytes -= bsize;
+		} while (--blocks);
+
+		err = skcipher_walk_done(walk, tail);
+	}
+
+	if (!err) {
+		__aes_arm64_encrypt(ctx->key_enc, buf, iv0, num_rounds(ctx));
+		__aes_arm64_encrypt(ctx->key_enc, mac, mac, num_rounds(ctx));
+		crypto_xor(mac, buf, AES_BLOCK_SIZE);
+	}
+	return err;
+}
+
 static int ccm_encrypt(struct aead_request *req)
 static int ccm_encrypt(struct aead_request *req)
 {
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -153,39 +242,46 @@ static int ccm_encrypt(struct aead_request *req)
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen;
 	u32 len = req->cryptlen;
+	bool use_neon = may_use_simd();
 	int err;
 	int err;
 
 
 	err = ccm_init_mac(req, mac, len);
 	err = ccm_init_mac(req, mac, len);
 	if (err)
 	if (err)
 		return err;
 		return err;
 
 
-	kernel_neon_begin_partial(6);
+	if (likely(use_neon))
+		kernel_neon_begin();
 
 
 	if (req->assoclen)
 	if (req->assoclen)
-		ccm_calculate_auth_mac(req, mac);
+		ccm_calculate_auth_mac(req, mac, use_neon);
 
 
 	/* preserve the original iv for the final round */
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
 
 	err = skcipher_walk_aead_encrypt(&walk, req, true);
 	err = skcipher_walk_aead_encrypt(&walk, req, true);
 
 
-	while (walk.nbytes) {
-		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
-
-		if (walk.nbytes == walk.total)
-			tail = 0;
+	if (likely(use_neon)) {
+		while (walk.nbytes) {
+			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
 
-		ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   walk.nbytes - tail, ctx->key_enc,
-				   num_rounds(ctx), mac, walk.iv);
+			if (walk.nbytes == walk.total)
+				tail = 0;
 
 
-		err = skcipher_walk_done(&walk, tail);
-	}
-	if (!err)
-		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+			ce_aes_ccm_encrypt(walk.dst.virt.addr,
+					   walk.src.virt.addr,
+					   walk.nbytes - tail, ctx->key_enc,
+					   num_rounds(ctx), mac, walk.iv);
 
 
-	kernel_neon_end();
+			err = skcipher_walk_done(&walk, tail);
+		}
+		if (!err)
+			ce_aes_ccm_final(mac, buf, ctx->key_enc,
+					 num_rounds(ctx));
 
 
+		kernel_neon_end();
+	} else {
+		err = ccm_crypt_fallback(&walk, mac, buf, ctx, true);
+	}
 	if (err)
 	if (err)
 		return err;
 		return err;
 
 
@@ -205,38 +301,46 @@ static int ccm_decrypt(struct aead_request *req)
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen - authsize;
 	u32 len = req->cryptlen - authsize;
+	bool use_neon = may_use_simd();
 	int err;
 	int err;
 
 
 	err = ccm_init_mac(req, mac, len);
 	err = ccm_init_mac(req, mac, len);
 	if (err)
 	if (err)
 		return err;
 		return err;
 
 
-	kernel_neon_begin_partial(6);
+	if (likely(use_neon))
+		kernel_neon_begin();
 
 
 	if (req->assoclen)
 	if (req->assoclen)
-		ccm_calculate_auth_mac(req, mac);
+		ccm_calculate_auth_mac(req, mac, use_neon);
 
 
 	/* preserve the original iv for the final round */
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
 
 	err = skcipher_walk_aead_decrypt(&walk, req, true);
 	err = skcipher_walk_aead_decrypt(&walk, req, true);
 
 
-	while (walk.nbytes) {
-		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+	if (likely(use_neon)) {
+		while (walk.nbytes) {
+			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
 
-		if (walk.nbytes == walk.total)
-			tail = 0;
+			if (walk.nbytes == walk.total)
+				tail = 0;
 
 
-		ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   walk.nbytes - tail, ctx->key_enc,
-				   num_rounds(ctx), mac, walk.iv);
+			ce_aes_ccm_decrypt(walk.dst.virt.addr,
+					   walk.src.virt.addr,
+					   walk.nbytes - tail, ctx->key_enc,
+					   num_rounds(ctx), mac, walk.iv);
 
 
-		err = skcipher_walk_done(&walk, tail);
-	}
-	if (!err)
-		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+			err = skcipher_walk_done(&walk, tail);
+		}
+		if (!err)
+			ce_aes_ccm_final(mac, buf, ctx->key_enc,
+					 num_rounds(ctx));
 
 
-	kernel_neon_end();
+		kernel_neon_end();
+	} else {
+		err = ccm_crypt_fallback(&walk, mac, buf, ctx, false);
+	}
 
 
 	if (err)
 	if (err)
 		return err;
 		return err;

+ 33 - 22
arch/arm64/crypto/aes-ce-cipher.c

@@ -1,7 +1,7 @@
 /*
 /*
  * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
  * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
  *
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,8 @@
  */
  */
 
 
 #include <asm/neon.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
 #include <crypto/aes.h>
 #include <crypto/aes.h>
 #include <linux/cpufeature.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
@@ -20,6 +22,9 @@ MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_LICENSE("GPL v2");
 
 
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
 struct aes_block {
 struct aes_block {
 	u8 b[AES_BLOCK_SIZE];
 	u8 b[AES_BLOCK_SIZE];
 };
 };
@@ -44,27 +49,32 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	void *dummy0;
 	void *dummy0;
 	int dummy1;
 	int dummy1;
 
 
-	kernel_neon_begin_partial(4);
+	if (!may_use_simd()) {
+		__aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
+		return;
+	}
+
+	kernel_neon_begin();
 
 
 	__asm__("	ld1	{v0.16b}, %[in]			;"
 	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.16b}, [%[key]], #16		;"
+		"	ld1	{v1.4s}, [%[key]], #16		;"
 		"	cmp	%w[rounds], #10			;"
 		"	cmp	%w[rounds], #10			;"
 		"	bmi	0f				;"
 		"	bmi	0f				;"
 		"	bne	3f				;"
 		"	bne	3f				;"
 		"	mov	v3.16b, v1.16b			;"
 		"	mov	v3.16b, v1.16b			;"
 		"	b	2f				;"
 		"	b	2f				;"
 		"0:	mov	v2.16b, v1.16b			;"
 		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"1:	aese	v0.16b, v2.16b			;"
 		"1:	aese	v0.16b, v2.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.16b}, [%[key]], #16		;"
+		"2:	ld1	{v1.4s}, [%[key]], #16		;"
 		"	aese	v0.16b, v3.16b			;"
 		"	aese	v0.16b, v3.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.16b}, [%[key]], #16		;"
+		"3:	ld1	{v2.4s}, [%[key]], #16		;"
 		"	subs	%w[rounds], %w[rounds], #3	;"
 		"	subs	%w[rounds], %w[rounds], #3	;"
 		"	aese	v0.16b, v1.16b			;"
 		"	aese	v0.16b, v1.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"	bpl	1b				;"
 		"	bpl	1b				;"
 		"	aese	v0.16b, v2.16b			;"
 		"	aese	v0.16b, v2.16b			;"
 		"	eor	v0.16b, v0.16b, v3.16b		;"
 		"	eor	v0.16b, v0.16b, v3.16b		;"
@@ -89,27 +99,32 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	void *dummy0;
 	void *dummy0;
 	int dummy1;
 	int dummy1;
 
 
-	kernel_neon_begin_partial(4);
+	if (!may_use_simd()) {
+		__aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
+		return;
+	}
+
+	kernel_neon_begin();
 
 
 	__asm__("	ld1	{v0.16b}, %[in]			;"
 	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.16b}, [%[key]], #16		;"
+		"	ld1	{v1.4s}, [%[key]], #16		;"
 		"	cmp	%w[rounds], #10			;"
 		"	cmp	%w[rounds], #10			;"
 		"	bmi	0f				;"
 		"	bmi	0f				;"
 		"	bne	3f				;"
 		"	bne	3f				;"
 		"	mov	v3.16b, v1.16b			;"
 		"	mov	v3.16b, v1.16b			;"
 		"	b	2f				;"
 		"	b	2f				;"
 		"0:	mov	v2.16b, v1.16b			;"
 		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"1:	aesd	v0.16b, v2.16b			;"
 		"1:	aesd	v0.16b, v2.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.16b}, [%[key]], #16		;"
+		"2:	ld1	{v1.4s}, [%[key]], #16		;"
 		"	aesd	v0.16b, v3.16b			;"
 		"	aesd	v0.16b, v3.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.16b}, [%[key]], #16		;"
+		"3:	ld1	{v2.4s}, [%[key]], #16		;"
 		"	subs	%w[rounds], %w[rounds], #3	;"
 		"	subs	%w[rounds], %w[rounds], #3	;"
 		"	aesd	v0.16b, v1.16b			;"
 		"	aesd	v0.16b, v1.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"	bpl	1b				;"
 		"	bpl	1b				;"
 		"	aesd	v0.16b, v2.16b			;"
 		"	aesd	v0.16b, v2.16b			;"
 		"	eor	v0.16b, v0.16b, v3.16b		;"
 		"	eor	v0.16b, v0.16b, v3.16b		;"
@@ -165,20 +180,16 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 	    key_len != AES_KEYSIZE_256)
 	    key_len != AES_KEYSIZE_256)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	memcpy(ctx->key_enc, in_key, key_len);
 	ctx->key_length = key_len;
 	ctx->key_length = key_len;
+	for (i = 0; i < kwords; i++)
+		ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
 
 
-	kernel_neon_begin_partial(2);
+	kernel_neon_begin();
 	for (i = 0; i < sizeof(rcon); i++) {
 	for (i = 0; i < sizeof(rcon); i++) {
 		u32 *rki = ctx->key_enc + (i * kwords);
 		u32 *rki = ctx->key_enc + (i * kwords);
 		u32 *rko = rki + kwords;
 		u32 *rko = rki + kwords;
 
 
-#ifndef CONFIG_CPU_BIG_ENDIAN
 		rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
 		rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
-#else
-		rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
-			 rki[0];
-#endif
 		rko[1] = rko[0] ^ rki[1];
 		rko[1] = rko[0] ^ rki[1];
 		rko[2] = rko[1] ^ rki[2];
 		rko[2] = rko[1] ^ rki[2];
 		rko[3] = rko[2] ^ rki[3];
 		rko[3] = rko[2] ^ rki[3];
@@ -210,9 +221,9 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 
 
 	key_dec[0] = key_enc[j];
 	key_dec[0] = key_enc[j];
 	for (i = 1, j--; j > 0; i++, j--)
 	for (i = 1, j--; j > 0; i++, j--)
-		__asm__("ld1	{v0.16b}, %[in]		;"
+		__asm__("ld1	{v0.4s}, %[in]		;"
 			"aesimc	v1.16b, v0.16b		;"
 			"aesimc	v1.16b, v0.16b		;"
-			"st1	{v1.16b}, %[out]	;"
+			"st1	{v1.4s}, %[out]	;"
 
 
 		:	[out]	"=Q"(key_dec[i])
 		:	[out]	"=Q"(key_dec[i])
 		:	[in]	"Q"(key_enc[j])
 		:	[in]	"Q"(key_enc[j])

+ 6 - 6
arch/arm64/crypto/aes-ce.S

@@ -2,7 +2,7 @@
  * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
  * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
  *                                    Crypto Extensions
  *                                    Crypto Extensions
  *
  *
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -22,11 +22,11 @@
 	cmp		\rounds, #12
 	cmp		\rounds, #12
 	blo		2222f		/* 128 bits */
 	blo		2222f		/* 128 bits */
 	beq		1111f		/* 192 bits */
 	beq		1111f		/* 192 bits */
-	ld1		{v17.16b-v18.16b}, [\rk], #32
-1111:	ld1		{v19.16b-v20.16b}, [\rk], #32
-2222:	ld1		{v21.16b-v24.16b}, [\rk], #64
-	ld1		{v25.16b-v28.16b}, [\rk], #64
-	ld1		{v29.16b-v31.16b}, [\rk]
+	ld1		{v17.4s-v18.4s}, [\rk], #32
+1111:	ld1		{v19.4s-v20.4s}, [\rk], #32
+2222:	ld1		{v21.4s-v24.4s}, [\rk], #64
+	ld1		{v25.4s-v28.4s}, [\rk], #64
+	ld1		{v29.4s-v31.4s}, [\rk]
 	.endm
 	.endm
 
 
 	/* prepare for encryption with key in rk[] */
 	/* prepare for encryption with key in rk[] */

+ 107 - 45
arch/arm64/crypto/aes-cipher-core.S

@@ -10,6 +10,7 @@
 
 
 #include <linux/linkage.h>
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/assembler.h>
+#include <asm/cache.h>
 
 
 	.text
 	.text
 
 
@@ -17,94 +18,155 @@
 	out		.req	x1
 	out		.req	x1
 	in		.req	x2
 	in		.req	x2
 	rounds		.req	x3
 	rounds		.req	x3
-	tt		.req	x4
-	lt		.req	x2
+	tt		.req	x2
 
 
-	.macro		__pair, enc, reg0, reg1, in0, in1e, in1d, shift
+	.macro		__pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
+	.ifc		\op\shift, b0
+	ubfiz		\reg0, \in0, #2, #8
+	ubfiz		\reg1, \in1e, #2, #8
+	.else
 	ubfx		\reg0, \in0, #\shift, #8
 	ubfx		\reg0, \in0, #\shift, #8
-	.if		\enc
 	ubfx		\reg1, \in1e, #\shift, #8
 	ubfx		\reg1, \in1e, #\shift, #8
-	.else
-	ubfx		\reg1, \in1d, #\shift, #8
 	.endif
 	.endif
+
+	/*
+	 * AArch64 cannot do byte size indexed loads from a table containing
+	 * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
+	 * valid instruction. So perform the shift explicitly first for the
+	 * high bytes (the low byte is shifted implicitly by using ubfiz rather
+	 * than ubfx above)
+	 */
+	.ifnc		\op, b
 	ldr		\reg0, [tt, \reg0, uxtw #2]
 	ldr		\reg0, [tt, \reg0, uxtw #2]
 	ldr		\reg1, [tt, \reg1, uxtw #2]
 	ldr		\reg1, [tt, \reg1, uxtw #2]
+	.else
+	.if		\shift > 0
+	lsl		\reg0, \reg0, #2
+	lsl		\reg1, \reg1, #2
+	.endif
+	ldrb		\reg0, [tt, \reg0, uxtw]
+	ldrb		\reg1, [tt, \reg1, uxtw]
+	.endif
 	.endm
 	.endm
 
 
-	.macro		__hround, out0, out1, in0, in1, in2, in3, t0, t1, enc
+	.macro		__pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
+	ubfx		\reg0, \in0, #\shift, #8
+	ubfx		\reg1, \in1d, #\shift, #8
+	ldr\op		\reg0, [tt, \reg0, uxtw #\sz]
+	ldr\op		\reg1, [tt, \reg1, uxtw #\sz]
+	.endm
+
+	.macro		__hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
 	ldp		\out0, \out1, [rk], #8
 	ldp		\out0, \out1, [rk], #8
 
 
-	__pair		\enc, w13, w14, \in0, \in1, \in3, 0
-	__pair		\enc, w15, w16, \in1, \in2, \in0, 8
-	__pair		\enc, w17, w18, \in2, \in3, \in1, 16
-	__pair		\enc, \t0, \t1, \in3, \in0, \in2, 24
-
-	eor		\out0, \out0, w13
-	eor		\out1, \out1, w14
-	eor		\out0, \out0, w15, ror #24
-	eor		\out1, \out1, w16, ror #24
-	eor		\out0, \out0, w17, ror #16
-	eor		\out1, \out1, w18, ror #16
+	__pair\enc	\sz, \op, w12, w13, \in0, \in1, \in3, 0
+	__pair\enc	\sz, \op, w14, w15, \in1, \in2, \in0, 8
+	__pair\enc	\sz, \op, w16, w17, \in2, \in3, \in1, 16
+	__pair\enc	\sz, \op, \t0, \t1, \in3, \in0, \in2, 24
+
+	eor		\out0, \out0, w12
+	eor		\out1, \out1, w13
+	eor		\out0, \out0, w14, ror #24
+	eor		\out1, \out1, w15, ror #24
+	eor		\out0, \out0, w16, ror #16
+	eor		\out1, \out1, w17, ror #16
 	eor		\out0, \out0, \t0, ror #8
 	eor		\out0, \out0, \t0, ror #8
 	eor		\out1, \out1, \t1, ror #8
 	eor		\out1, \out1, \t1, ror #8
 	.endm
 	.endm
 
 
-	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
-	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
 	.endm
 	.endm
 
 
-	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
-	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
 	.endm
 	.endm
 
 
-	.macro		do_crypt, round, ttab, ltab
-	ldp		w5, w6, [in]
-	ldp		w7, w8, [in, #8]
-	ldp		w9, w10, [rk], #16
-	ldp		w11, w12, [rk, #-8]
+	.macro		do_crypt, round, ttab, ltab, bsz
+	ldp		w4, w5, [in]
+	ldp		w6, w7, [in, #8]
+	ldp		w8, w9, [rk], #16
+	ldp		w10, w11, [rk, #-8]
 
 
+CPU_BE(	rev		w4, w4		)
 CPU_BE(	rev		w5, w5		)
 CPU_BE(	rev		w5, w5		)
 CPU_BE(	rev		w6, w6		)
 CPU_BE(	rev		w6, w6		)
 CPU_BE(	rev		w7, w7		)
 CPU_BE(	rev		w7, w7		)
-CPU_BE(	rev		w8, w8		)
 
 
+	eor		w4, w4, w8
 	eor		w5, w5, w9
 	eor		w5, w5, w9
 	eor		w6, w6, w10
 	eor		w6, w6, w10
 	eor		w7, w7, w11
 	eor		w7, w7, w11
-	eor		w8, w8, w12
 
 
 	adr_l		tt, \ttab
 	adr_l		tt, \ttab
-	adr_l		lt, \ltab
 
 
 	tbnz		rounds, #1, 1f
 	tbnz		rounds, #1, 1f
 
 
-0:	\round		w9, w10, w11, w12, w5, w6, w7, w8
-	\round		w5, w6, w7, w8, w9, w10, w11, w12
+0:	\round		w8, w9, w10, w11, w4, w5, w6, w7
+	\round		w4, w5, w6, w7, w8, w9, w10, w11
 
 
 1:	subs		rounds, rounds, #4
 1:	subs		rounds, rounds, #4
-	\round		w9, w10, w11, w12, w5, w6, w7, w8
-	csel		tt, tt, lt, hi
-	\round		w5, w6, w7, w8, w9, w10, w11, w12
-	b.hi		0b
-
+	\round		w8, w9, w10, w11, w4, w5, w6, w7
+	b.ls		3f
+2:	\round		w4, w5, w6, w7, w8, w9, w10, w11
+	b		0b
+3:	adr_l		tt, \ltab
+	\round		w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
+
+CPU_BE(	rev		w4, w4		)
 CPU_BE(	rev		w5, w5		)
 CPU_BE(	rev		w5, w5		)
 CPU_BE(	rev		w6, w6		)
 CPU_BE(	rev		w6, w6		)
 CPU_BE(	rev		w7, w7		)
 CPU_BE(	rev		w7, w7		)
-CPU_BE(	rev		w8, w8		)
 
 
-	stp		w5, w6, [out]
-	stp		w7, w8, [out, #8]
+	stp		w4, w5, [out]
+	stp		w6, w7, [out, #8]
 	ret
 	ret
 	.endm
 	.endm
 
 
-	.align		5
+	.align		L1_CACHE_SHIFT
+	.type		__aes_arm64_inverse_sbox, %object
+__aes_arm64_inverse_sbox:
+	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+	.size		__aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox
+
 ENTRY(__aes_arm64_encrypt)
 ENTRY(__aes_arm64_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_fl_tab
+	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
 ENDPROC(__aes_arm64_encrypt)
 ENDPROC(__aes_arm64_encrypt)
 
 
 	.align		5
 	.align		5
 ENTRY(__aes_arm64_decrypt)
 ENTRY(__aes_arm64_decrypt)
-	do_crypt	iround, crypto_it_tab, crypto_il_tab
+	do_crypt	iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
 ENDPROC(__aes_arm64_decrypt)
 ENDPROC(__aes_arm64_decrypt)

+ 53 - 0
arch/arm64/crypto/aes-ctr-fallback.h

@@ -0,0 +1,53 @@
+/*
+ * Fallback for sync aes(ctr) in contexts where kernel mode NEON
+ * is not allowed
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
+
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
+static inline int aes_ctr_encrypt_fallback(struct crypto_aes_ctx *ctx,
+					   struct skcipher_request *req)
+{
+	struct skcipher_walk walk;
+	u8 buf[AES_BLOCK_SIZE];
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	while (walk.nbytes > 0) {
+		u8 *dst = walk.dst.virt.addr;
+		u8 *src = walk.src.virt.addr;
+		int nbytes = walk.nbytes;
+		int tail = 0;
+
+		if (nbytes < walk.total) {
+			nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+			tail = walk.nbytes % AES_BLOCK_SIZE;
+		}
+
+		do {
+			int bsize = min(nbytes, AES_BLOCK_SIZE);
+
+			__aes_arm64_encrypt(ctx->key_enc, buf, walk.iv,
+					    6 + ctx->key_length / 4);
+			crypto_xor_cpy(dst, src, buf, bsize);
+			crypto_inc(walk.iv, AES_BLOCK_SIZE);
+
+			dst += AES_BLOCK_SIZE;
+			src += AES_BLOCK_SIZE;
+			nbytes -= AES_BLOCK_SIZE;
+		} while (nbytes > 0);
+
+		err = skcipher_walk_done(&walk, tail);
+	}
+	return err;
+}

+ 45 - 18
arch/arm64/crypto/aes-glue.c

@@ -10,6 +10,7 @@
 
 
 #include <asm/neon.h>
 #include <asm/neon.h>
 #include <asm/hwcap.h>
 #include <asm/hwcap.h>
+#include <asm/simd.h>
 #include <crypto/aes.h>
 #include <crypto/aes.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/simd.h>
@@ -19,6 +20,7 @@
 #include <crypto/xts.h>
 #include <crypto/xts.h>
 
 
 #include "aes-ce-setkey.h"
 #include "aes-ce-setkey.h"
+#include "aes-ctr-fallback.h"
 
 
 #ifdef USE_V8_CRYPTO_EXTENSIONS
 #ifdef USE_V8_CRYPTO_EXTENSIONS
 #define MODE			"ce"
 #define MODE			"ce"
@@ -241,9 +243,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
 
 		aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
 		aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
 				blocks, walk.iv, first);
 				blocks, walk.iv, first);
-		if (tdst != tsrc)
-			memcpy(tdst, tsrc, nbytes);
-		crypto_xor(tdst, tail, nbytes);
+		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
 		err = skcipher_walk_done(&walk, 0);
 		err = skcipher_walk_done(&walk, 0);
 	}
 	}
 	kernel_neon_end();
 	kernel_neon_end();
@@ -251,6 +251,17 @@ static int ctr_encrypt(struct skcipher_request *req)
 	return err;
 	return err;
 }
 }
 
 
+static int ctr_encrypt_sync(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	if (!may_use_simd())
+		return aes_ctr_encrypt_fallback(ctx, req);
+
+	return ctr_encrypt(req);
+}
+
 static int xts_encrypt(struct skcipher_request *req)
 static int xts_encrypt(struct skcipher_request *req)
 {
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -357,8 +368,8 @@ static struct skcipher_alg aes_algs[] = { {
 	.ivsize		= AES_BLOCK_SIZE,
 	.ivsize		= AES_BLOCK_SIZE,
 	.chunksize	= AES_BLOCK_SIZE,
 	.chunksize	= AES_BLOCK_SIZE,
 	.setkey		= skcipher_aes_setkey,
 	.setkey		= skcipher_aes_setkey,
-	.encrypt	= ctr_encrypt,
-	.decrypt	= ctr_encrypt,
+	.encrypt	= ctr_encrypt_sync,
+	.decrypt	= ctr_encrypt_sync,
 }, {
 }, {
 	.base = {
 	.base = {
 		.cra_name		= "__xts(aes)",
 		.cra_name		= "__xts(aes)",
@@ -460,11 +471,35 @@ static int mac_init(struct shash_desc *desc)
 	return 0;
 	return 0;
 }
 }
 
 
+static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
+			  u8 dg[], int enc_before, int enc_after)
+{
+	int rounds = 6 + ctx->key_length / 4;
+
+	if (may_use_simd()) {
+		kernel_neon_begin();
+		aes_mac_update(in, ctx->key_enc, rounds, blocks, dg, enc_before,
+			       enc_after);
+		kernel_neon_end();
+	} else {
+		if (enc_before)
+			__aes_arm64_encrypt(ctx->key_enc, dg, dg, rounds);
+
+		while (blocks--) {
+			crypto_xor(dg, in, AES_BLOCK_SIZE);
+			in += AES_BLOCK_SIZE;
+
+			if (blocks || enc_after)
+				__aes_arm64_encrypt(ctx->key_enc, dg, dg,
+						    rounds);
+		}
+	}
+}
+
 static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
 static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
 {
 {
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-	int rounds = 6 + tctx->key.key_length / 4;
 
 
 	while (len > 0) {
 	while (len > 0) {
 		unsigned int l;
 		unsigned int l;
@@ -476,10 +511,8 @@ static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
 
 
 			len %= AES_BLOCK_SIZE;
 			len %= AES_BLOCK_SIZE;
 
 
-			kernel_neon_begin();
-			aes_mac_update(p, tctx->key.key_enc, rounds, blocks,
-				       ctx->dg, (ctx->len != 0), (len != 0));
-			kernel_neon_end();
+			mac_do_update(&tctx->key, p, blocks, ctx->dg,
+				      (ctx->len != 0), (len != 0));
 
 
 			p += blocks * AES_BLOCK_SIZE;
 			p += blocks * AES_BLOCK_SIZE;
 
 
@@ -507,11 +540,8 @@ static int cbcmac_final(struct shash_desc *desc, u8 *out)
 {
 {
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-	int rounds = 6 + tctx->key.key_length / 4;
 
 
-	kernel_neon_begin();
-	aes_mac_update(NULL, tctx->key.key_enc, rounds, 0, ctx->dg, 1, 0);
-	kernel_neon_end();
+	mac_do_update(&tctx->key, NULL, 0, ctx->dg, 1, 0);
 
 
 	memcpy(out, ctx->dg, AES_BLOCK_SIZE);
 	memcpy(out, ctx->dg, AES_BLOCK_SIZE);
 
 
@@ -522,7 +552,6 @@ static int cmac_final(struct shash_desc *desc, u8 *out)
 {
 {
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-	int rounds = 6 + tctx->key.key_length / 4;
 	u8 *consts = tctx->consts;
 	u8 *consts = tctx->consts;
 
 
 	if (ctx->len != AES_BLOCK_SIZE) {
 	if (ctx->len != AES_BLOCK_SIZE) {
@@ -530,9 +559,7 @@ static int cmac_final(struct shash_desc *desc, u8 *out)
 		consts += AES_BLOCK_SIZE;
 		consts += AES_BLOCK_SIZE;
 	}
 	}
 
 
-	kernel_neon_begin();
-	aes_mac_update(consts, tctx->key.key_enc, rounds, 1, ctx->dg, 0, 1);
-	kernel_neon_end();
+	mac_do_update(&tctx->key, consts, 1, ctx->dg, 0, 1);
 
 
 	memcpy(out, ctx->dg, AES_BLOCK_SIZE);
 	memcpy(out, ctx->dg, AES_BLOCK_SIZE);
 
 

+ 45 - 8
arch/arm64/crypto/aes-neonbs-glue.c

@@ -1,7 +1,7 @@
 /*
 /*
  * Bit sliced AES using NEON instructions
  * Bit sliced AES using NEON instructions
  *
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -9,12 +9,15 @@
  */
  */
 
 
 #include <asm/neon.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <crypto/aes.h>
 #include <crypto/aes.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/xts.h>
 #include <crypto/xts.h>
 #include <linux/module.h>
 #include <linux/module.h>
 
 
+#include "aes-ctr-fallback.h"
+
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_LICENSE("GPL v2");
 
 
@@ -58,6 +61,11 @@ struct aesbs_cbc_ctx {
 	u32			enc[AES_MAX_KEYLENGTH_U32];
 	u32			enc[AES_MAX_KEYLENGTH_U32];
 };
 };
 
 
+struct aesbs_ctr_ctx {
+	struct aesbs_ctx	key;		/* must be first member */
+	struct crypto_aes_ctx	fallback;
+};
+
 struct aesbs_xts_ctx {
 struct aesbs_xts_ctx {
 	struct aesbs_ctx	key;
 	struct aesbs_ctx	key;
 	u32			twkey[AES_MAX_KEYLENGTH_U32];
 	u32			twkey[AES_MAX_KEYLENGTH_U32];
@@ -196,6 +204,25 @@ static int cbc_decrypt(struct skcipher_request *req)
 	return err;
 	return err;
 }
 }
 
 
+static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key,
+				 unsigned int key_len)
+{
+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = crypto_aes_expand_key(&ctx->fallback, in_key, key_len);
+	if (err)
+		return err;
+
+	ctx->key.rounds = 6 + key_len / 4;
+
+	kernel_neon_begin();
+	aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds);
+	kernel_neon_end();
+
+	return 0;
+}
+
 static int ctr_encrypt(struct skcipher_request *req)
 static int ctr_encrypt(struct skcipher_request *req)
 {
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -224,9 +251,8 @@ static int ctr_encrypt(struct skcipher_request *req)
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 
 
-			if (dst != src)
-				memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
-			crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
+			crypto_xor_cpy(dst, src, final,
+				       walk.total % AES_BLOCK_SIZE);
 
 
 			err = skcipher_walk_done(&walk, 0);
 			err = skcipher_walk_done(&walk, 0);
 			break;
 			break;
@@ -260,6 +286,17 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 	return aesbs_setkey(tfm, in_key, key_len);
 	return aesbs_setkey(tfm, in_key, key_len);
 }
 }
 
 
+static int ctr_encrypt_sync(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	if (!may_use_simd())
+		return aes_ctr_encrypt_fallback(&ctx->fallback, req);
+
+	return ctr_encrypt(req);
+}
+
 static int __xts_crypt(struct skcipher_request *req,
 static int __xts_crypt(struct skcipher_request *req,
 		       void (*fn)(u8 out[], u8 const in[], u8 const rk[],
 		       void (*fn)(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[]))
 				  int rounds, int blocks, u8 iv[]))
@@ -356,7 +393,7 @@ static struct skcipher_alg aes_algs[] = { {
 	.base.cra_driver_name	= "ctr-aes-neonbs",
 	.base.cra_driver_name	= "ctr-aes-neonbs",
 	.base.cra_priority	= 250 - 1,
 	.base.cra_priority	= 250 - 1,
 	.base.cra_blocksize	= 1,
 	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct aesbs_ctx),
+	.base.cra_ctxsize	= sizeof(struct aesbs_ctr_ctx),
 	.base.cra_module	= THIS_MODULE,
 	.base.cra_module	= THIS_MODULE,
 
 
 	.min_keysize		= AES_MIN_KEY_SIZE,
 	.min_keysize		= AES_MIN_KEY_SIZE,
@@ -364,9 +401,9 @@ static struct skcipher_alg aes_algs[] = { {
 	.chunksize		= AES_BLOCK_SIZE,
 	.chunksize		= AES_BLOCK_SIZE,
 	.walksize		= 8 * AES_BLOCK_SIZE,
 	.walksize		= 8 * AES_BLOCK_SIZE,
 	.ivsize			= AES_BLOCK_SIZE,
 	.ivsize			= AES_BLOCK_SIZE,
-	.setkey			= aesbs_setkey,
-	.encrypt		= ctr_encrypt,
-	.decrypt		= ctr_encrypt,
+	.setkey			= aesbs_ctr_setkey_sync,
+	.encrypt		= ctr_encrypt_sync,
+	.decrypt		= ctr_encrypt_sync,
 }, {
 }, {
 	.base.cra_name		= "__xts(aes)",
 	.base.cra_name		= "__xts(aes)",
 	.base.cra_driver_name	= "__xts-aes-neonbs",
 	.base.cra_driver_name	= "__xts-aes-neonbs",

+ 3 - 2
arch/arm64/crypto/chacha20-neon-glue.c

@@ -1,7 +1,7 @@
 /*
 /*
  * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
  * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
  *
  *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -26,6 +26,7 @@
 
 
 #include <asm/hwcap.h>
 #include <asm/hwcap.h>
 #include <asm/neon.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 
 
 asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
 asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
 asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
 asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
@@ -64,7 +65,7 @@ static int chacha20_neon(struct skcipher_request *req)
 	u32 state[16];
 	u32 state[16];
 	int err;
 	int err;
 
 
-	if (req->cryptlen <= CHACHA20_BLOCK_SIZE)
+	if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
 		return crypto_chacha20_crypt(req);
 		return crypto_chacha20_crypt(req);
 
 
 	err = skcipher_walk_virt(&walk, req, true);
 	err = skcipher_walk_virt(&walk, req, true);

+ 6 - 5
arch/arm64/crypto/crc32-ce-glue.c

@@ -1,7 +1,7 @@
 /*
 /*
  * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
  * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
  *
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -19,6 +19,7 @@
 
 
 #include <asm/hwcap.h>
 #include <asm/hwcap.h>
 #include <asm/neon.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <asm/unaligned.h>
 
 
 #define PMULL_MIN_LEN		64L	/* minimum size of buffer
 #define PMULL_MIN_LEN		64L	/* minimum size of buffer
@@ -105,10 +106,10 @@ static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
 		length -= l;
 		length -= l;
 	}
 	}
 
 
-	if (length >= PMULL_MIN_LEN) {
+	if (length >= PMULL_MIN_LEN && may_use_simd()) {
 		l = round_down(length, SCALE_F);
 		l = round_down(length, SCALE_F);
 
 
-		kernel_neon_begin_partial(10);
+		kernel_neon_begin();
 		*crc = crc32_pmull_le(data, l, *crc);
 		*crc = crc32_pmull_le(data, l, *crc);
 		kernel_neon_end();
 		kernel_neon_end();
 
 
@@ -137,10 +138,10 @@ static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
 		length -= l;
 		length -= l;
 	}
 	}
 
 
-	if (length >= PMULL_MIN_LEN) {
+	if (length >= PMULL_MIN_LEN && may_use_simd()) {
 		l = round_down(length, SCALE_F);
 		l = round_down(length, SCALE_F);
 
 
-		kernel_neon_begin_partial(10);
+		kernel_neon_begin();
 		*crc = crc32c_pmull_le(data, l, *crc);
 		*crc = crc32c_pmull_le(data, l, *crc);
 		kernel_neon_end();
 		kernel_neon_end();
 
 

+ 9 - 4
arch/arm64/crypto/crct10dif-ce-glue.c

@@ -1,7 +1,7 @@
 /*
 /*
  * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
  * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
  *
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -18,6 +18,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 
 
 #include <asm/neon.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 
 
 #define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
 #define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
 
 
@@ -48,9 +49,13 @@ static int crct10dif_update(struct shash_desc *desc, const u8 *data,
 	}
 	}
 
 
 	if (length > 0) {
 	if (length > 0) {
-		kernel_neon_begin_partial(14);
-		*crc = crc_t10dif_pmull(*crc, data, length);
-		kernel_neon_end();
+		if (may_use_simd()) {
+			kernel_neon_begin();
+			*crc = crc_t10dif_pmull(*crc, data, length);
+			kernel_neon_end();
+		} else {
+			*crc = crc_t10dif_generic(*crc, data, length);
+		}
 	}
 	}
 
 
 	return 0;
 	return 0;

+ 383 - 18
arch/arm64/crypto/ghash-ce-core.S

@@ -1,7 +1,7 @@
 /*
 /*
  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
  *
  *
- * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  *
  * This program is free software; you can redistribute it and/or modify it
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
  * under the terms of the GNU General Public License version 2 as published
@@ -11,31 +11,215 @@
 #include <linux/linkage.h>
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/assembler.h>
 
 
-	SHASH	.req	v0
-	SHASH2	.req	v1
-	T1	.req	v2
-	T2	.req	v3
-	MASK	.req	v4
-	XL	.req	v5
-	XM	.req	v6
-	XH	.req	v7
-	IN1	.req	v7
+	SHASH		.req	v0
+	SHASH2		.req	v1
+	T1		.req	v2
+	T2		.req	v3
+	MASK		.req	v4
+	XL		.req	v5
+	XM		.req	v6
+	XH		.req	v7
+	IN1		.req	v7
+
+	k00_16		.req	v8
+	k32_48		.req	v9
+
+	t3		.req	v10
+	t4		.req	v11
+	t5		.req	v12
+	t6		.req	v13
+	t7		.req	v14
+	t8		.req	v15
+	t9		.req	v16
+
+	perm1		.req	v17
+	perm2		.req	v18
+	perm3		.req	v19
+
+	sh1		.req	v20
+	sh2		.req	v21
+	sh3		.req	v22
+	sh4		.req	v23
+
+	ss1		.req	v24
+	ss2		.req	v25
+	ss3		.req	v26
+	ss4		.req	v27
 
 
 	.text
 	.text
 	.arch		armv8-a+crypto
 	.arch		armv8-a+crypto
 
 
-	/*
-	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-	 *			   struct ghash_key const *k, const char *head)
-	 */
-ENTRY(pmull_ghash_update)
+	.macro		__pmull_p64, rd, rn, rm
+	pmull		\rd\().1q, \rn\().1d, \rm\().1d
+	.endm
+
+	.macro		__pmull2_p64, rd, rn, rm
+	pmull2		\rd\().1q, \rn\().2d, \rm\().2d
+	.endm
+
+	.macro		__pmull_p8, rq, ad, bd
+	ext		t3.8b, \ad\().8b, \ad\().8b, #1		// A1
+	ext		t5.8b, \ad\().8b, \ad\().8b, #2		// A2
+	ext		t7.8b, \ad\().8b, \ad\().8b, #3		// A3
+
+	__pmull_p8_\bd	\rq, \ad
+	.endm
+
+	.macro		__pmull2_p8, rq, ad, bd
+	tbl		t3.16b, {\ad\().16b}, perm1.16b		// A1
+	tbl		t5.16b, {\ad\().16b}, perm2.16b		// A2
+	tbl		t7.16b, {\ad\().16b}, perm3.16b		// A3
+
+	__pmull2_p8_\bd	\rq, \ad
+	.endm
+
+	.macro		__pmull_p8_SHASH, rq, ad
+	__pmull_p8_tail	\rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
+	.endm
+
+	.macro		__pmull_p8_SHASH2, rq, ad
+	__pmull_p8_tail	\rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
+	.endm
+
+	.macro		__pmull2_p8_SHASH, rq, ad
+	__pmull_p8_tail	\rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
+	.endm
+
+	.macro		__pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4
+	pmull\t		t3.8h, t3.\nb, \bd			// F = A1*B
+	pmull\t		t4.8h, \ad, \b1\().\nb			// E = A*B1
+	pmull\t		t5.8h, t5.\nb, \bd			// H = A2*B
+	pmull\t		t6.8h, \ad, \b2\().\nb			// G = A*B2
+	pmull\t		t7.8h, t7.\nb, \bd			// J = A3*B
+	pmull\t		t8.8h, \ad, \b3\().\nb			// I = A*B3
+	pmull\t		t9.8h, \ad, \b4\().\nb			// K = A*B4
+	pmull\t		\rq\().8h, \ad, \bd			// D = A*B
+
+	eor		t3.16b, t3.16b, t4.16b			// L = E + F
+	eor		t5.16b, t5.16b, t6.16b			// M = G + H
+	eor		t7.16b, t7.16b, t8.16b			// N = I + J
+
+	uzp1		t4.2d, t3.2d, t5.2d
+	uzp2		t3.2d, t3.2d, t5.2d
+	uzp1		t6.2d, t7.2d, t9.2d
+	uzp2		t7.2d, t7.2d, t9.2d
+
+	// t3 = (L) (P0 + P1) << 8
+	// t5 = (M) (P2 + P3) << 16
+	eor		t4.16b, t4.16b, t3.16b
+	and		t3.16b, t3.16b, k32_48.16b
+
+	// t7 = (N) (P4 + P5) << 24
+	// t9 = (K) (P6 + P7) << 32
+	eor		t6.16b, t6.16b, t7.16b
+	and		t7.16b, t7.16b, k00_16.16b
+
+	eor		t4.16b, t4.16b, t3.16b
+	eor		t6.16b, t6.16b, t7.16b
+
+	zip2		t5.2d, t4.2d, t3.2d
+	zip1		t3.2d, t4.2d, t3.2d
+	zip2		t9.2d, t6.2d, t7.2d
+	zip1		t7.2d, t6.2d, t7.2d
+
+	ext		t3.16b, t3.16b, t3.16b, #15
+	ext		t5.16b, t5.16b, t5.16b, #14
+	ext		t7.16b, t7.16b, t7.16b, #13
+	ext		t9.16b, t9.16b, t9.16b, #12
+
+	eor		t3.16b, t3.16b, t5.16b
+	eor		t7.16b, t7.16b, t9.16b
+	eor		\rq\().16b, \rq\().16b, t3.16b
+	eor		\rq\().16b, \rq\().16b, t7.16b
+	.endm
+
+	.macro		__pmull_pre_p64
+	movi		MASK.16b, #0xe1
+	shl		MASK.2d, MASK.2d, #57
+	.endm
+
+	.macro		__pmull_pre_p8
+	// k00_16 := 0x0000000000000000_000000000000ffff
+	// k32_48 := 0x00000000ffffffff_0000ffffffffffff
+	movi		k32_48.2d, #0xffffffff
+	mov		k32_48.h[2], k32_48.h[0]
+	ushr		k00_16.2d, k32_48.2d, #32
+
+	// prepare the permutation vectors
+	mov_q		x5, 0x080f0e0d0c0b0a09
+	movi		T1.8b, #8
+	dup		perm1.2d, x5
+	eor		perm1.16b, perm1.16b, T1.16b
+	ushr		perm2.2d, perm1.2d, #8
+	ushr		perm3.2d, perm1.2d, #16
+	ushr		T1.2d, perm1.2d, #24
+	sli		perm2.2d, perm1.2d, #56
+	sli		perm3.2d, perm1.2d, #48
+	sli		T1.2d, perm1.2d, #40
+
+	// precompute loop invariants
+	tbl		sh1.16b, {SHASH.16b}, perm1.16b
+	tbl		sh2.16b, {SHASH.16b}, perm2.16b
+	tbl		sh3.16b, {SHASH.16b}, perm3.16b
+	tbl		sh4.16b, {SHASH.16b}, T1.16b
+	ext		ss1.8b, SHASH2.8b, SHASH2.8b, #1
+	ext		ss2.8b, SHASH2.8b, SHASH2.8b, #2
+	ext		ss3.8b, SHASH2.8b, SHASH2.8b, #3
+	ext		ss4.8b, SHASH2.8b, SHASH2.8b, #4
+	.endm
+
+	//
+	// PMULL (64x64->128) based reduction for CPUs that can do
+	// it in a single instruction.
+	//
+	.macro		__pmull_reduce_p64
+	pmull		T2.1q, XL.1d, MASK.1d
+	eor		XM.16b, XM.16b, T1.16b
+
+	mov		XH.d[0], XM.d[1]
+	mov		XM.d[1], XL.d[0]
+
+	eor		XL.16b, XM.16b, T2.16b
+	ext		T2.16b, XL.16b, XL.16b, #8
+	pmull		XL.1q, XL.1d, MASK.1d
+	.endm
+
+	//
+	// Alternative reduction for CPUs that lack support for the
+	// 64x64->128 PMULL instruction
+	//
+	.macro		__pmull_reduce_p8
+	eor		XM.16b, XM.16b, T1.16b
+
+	mov		XL.d[1], XM.d[0]
+	mov		XH.d[0], XM.d[1]
+
+	shl		T1.2d, XL.2d, #57
+	shl		T2.2d, XL.2d, #62
+	eor		T2.16b, T2.16b, T1.16b
+	shl		T1.2d, XL.2d, #63
+	eor		T2.16b, T2.16b, T1.16b
+	ext		T1.16b, XL.16b, XH.16b, #8
+	eor		T2.16b, T2.16b, T1.16b
+
+	mov		XL.d[1], T2.d[0]
+	mov		XH.d[0], T2.d[1]
+
+	ushr		T2.2d, XL.2d, #1
+	eor		XH.16b, XH.16b, XL.16b
+	eor		XL.16b, XL.16b, T2.16b
+	ushr		T2.2d, T2.2d, #6
+	ushr		XL.2d, XL.2d, #1
+	.endm
+
+	.macro		__pmull_ghash, pn
 	ld1		{SHASH.2d}, [x3]
 	ld1		{SHASH.2d}, [x3]
 	ld1		{XL.2d}, [x1]
 	ld1		{XL.2d}, [x1]
-	movi		MASK.16b, #0xe1
 	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
 	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
-	shl		MASK.2d, MASK.2d, #57
 	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
 	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
 
 
+	__pmull_pre_\pn
+
 	/* do the head block first, if supplied */
 	/* do the head block first, if supplied */
 	cbz		x4, 0f
 	cbz		x4, 0f
 	ld1		{T1.2d}, [x4]
 	ld1		{T1.2d}, [x4]
@@ -52,28 +236,209 @@ CPU_LE(	rev64		T1.16b, T1.16b	)
 	eor		T1.16b, T1.16b, T2.16b
 	eor		T1.16b, T1.16b, T2.16b
 	eor		XL.16b, XL.16b, IN1.16b
 	eor		XL.16b, XL.16b, IN1.16b
 
 
+	__pmull2_\pn	XH, XL, SHASH			// a1 * b1
+	eor		T1.16b, T1.16b, XL.16b
+	__pmull_\pn 	XL, XL, SHASH			// a0 * b0
+	__pmull_\pn	XM, T1, SHASH2			// (a1 + a0)(b1 + b0)
+
+	eor		T2.16b, XL.16b, XH.16b
+	ext		T1.16b, XL.16b, XH.16b, #8
+	eor		XM.16b, XM.16b, T2.16b
+
+	__pmull_reduce_\pn
+
+	eor		T2.16b, T2.16b, XH.16b
+	eor		XL.16b, XL.16b, T2.16b
+
+	cbnz		w0, 0b
+
+	st1		{XL.2d}, [x1]
+	ret
+	.endm
+
+	/*
+	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+	 *			   struct ghash_key const *k, const char *head)
+	 */
+ENTRY(pmull_ghash_update_p64)
+	__pmull_ghash	p64
+ENDPROC(pmull_ghash_update_p64)
+
+ENTRY(pmull_ghash_update_p8)
+	__pmull_ghash	p8
+ENDPROC(pmull_ghash_update_p8)
+
+	KS		.req	v8
+	CTR		.req	v9
+	INP		.req	v10
+
+	.macro		load_round_keys, rounds, rk
+	cmp		\rounds, #12
+	blo		2222f		/* 128 bits */
+	beq		1111f		/* 192 bits */
+	ld1		{v17.4s-v18.4s}, [\rk], #32
+1111:	ld1		{v19.4s-v20.4s}, [\rk], #32
+2222:	ld1		{v21.4s-v24.4s}, [\rk], #64
+	ld1		{v25.4s-v28.4s}, [\rk], #64
+	ld1		{v29.4s-v31.4s}, [\rk]
+	.endm
+
+	.macro		enc_round, state, key
+	aese		\state\().16b, \key\().16b
+	aesmc		\state\().16b, \state\().16b
+	.endm
+
+	.macro		enc_block, state, rounds
+	cmp		\rounds, #12
+	b.lo		2222f		/* 128 bits */
+	b.eq		1111f		/* 192 bits */
+	enc_round	\state, v17
+	enc_round	\state, v18
+1111:	enc_round	\state, v19
+	enc_round	\state, v20
+2222:	.irp		key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+	enc_round	\state, \key
+	.endr
+	aese		\state\().16b, v30.16b
+	eor		\state\().16b, \state\().16b, v31.16b
+	.endm
+
+	.macro		pmull_gcm_do_crypt, enc
+	ld1		{SHASH.2d}, [x4]
+	ld1		{XL.2d}, [x1]
+	ldr		x8, [x5, #8]			// load lower counter
+
+	movi		MASK.16b, #0xe1
+	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
+CPU_LE(	rev		x8, x8		)
+	shl		MASK.2d, MASK.2d, #57
+	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
+
+	.if		\enc == 1
+	ld1		{KS.16b}, [x7]
+	.endif
+
+0:	ld1		{CTR.8b}, [x5]			// load upper counter
+	ld1		{INP.16b}, [x3], #16
+	rev		x9, x8
+	add		x8, x8, #1
+	sub		w0, w0, #1
+	ins		CTR.d[1], x9			// set lower counter
+
+	.if		\enc == 1
+	eor		INP.16b, INP.16b, KS.16b	// encrypt input
+	st1		{INP.16b}, [x2], #16
+	.endif
+
+	rev64		T1.16b, INP.16b
+
+	cmp		w6, #12
+	b.ge		2f				// AES-192/256?
+
+1:	enc_round	CTR, v21
+
+	ext		T2.16b, XL.16b, XL.16b, #8
+	ext		IN1.16b, T1.16b, T1.16b, #8
+
+	enc_round	CTR, v22
+
+	eor		T1.16b, T1.16b, T2.16b
+	eor		XL.16b, XL.16b, IN1.16b
+
+	enc_round	CTR, v23
+
 	pmull2		XH.1q, SHASH.2d, XL.2d		// a1 * b1
 	pmull2		XH.1q, SHASH.2d, XL.2d		// a1 * b1
 	eor		T1.16b, T1.16b, XL.16b
 	eor		T1.16b, T1.16b, XL.16b
+
+	enc_round	CTR, v24
+
 	pmull		XL.1q, SHASH.1d, XL.1d		// a0 * b0
 	pmull		XL.1q, SHASH.1d, XL.1d		// a0 * b0
 	pmull		XM.1q, SHASH2.1d, T1.1d		// (a1 + a0)(b1 + b0)
 	pmull		XM.1q, SHASH2.1d, T1.1d		// (a1 + a0)(b1 + b0)
 
 
+	enc_round	CTR, v25
+
 	ext		T1.16b, XL.16b, XH.16b, #8
 	ext		T1.16b, XL.16b, XH.16b, #8
 	eor		T2.16b, XL.16b, XH.16b
 	eor		T2.16b, XL.16b, XH.16b
 	eor		XM.16b, XM.16b, T1.16b
 	eor		XM.16b, XM.16b, T1.16b
+
+	enc_round	CTR, v26
+
 	eor		XM.16b, XM.16b, T2.16b
 	eor		XM.16b, XM.16b, T2.16b
 	pmull		T2.1q, XL.1d, MASK.1d
 	pmull		T2.1q, XL.1d, MASK.1d
 
 
+	enc_round	CTR, v27
+
 	mov		XH.d[0], XM.d[1]
 	mov		XH.d[0], XM.d[1]
 	mov		XM.d[1], XL.d[0]
 	mov		XM.d[1], XL.d[0]
 
 
+	enc_round	CTR, v28
+
 	eor		XL.16b, XM.16b, T2.16b
 	eor		XL.16b, XM.16b, T2.16b
+
+	enc_round	CTR, v29
+
 	ext		T2.16b, XL.16b, XL.16b, #8
 	ext		T2.16b, XL.16b, XL.16b, #8
+
+	aese		CTR.16b, v30.16b
+
 	pmull		XL.1q, XL.1d, MASK.1d
 	pmull		XL.1q, XL.1d, MASK.1d
 	eor		T2.16b, T2.16b, XH.16b
 	eor		T2.16b, T2.16b, XH.16b
+
+	eor		KS.16b, CTR.16b, v31.16b
+
 	eor		XL.16b, XL.16b, T2.16b
 	eor		XL.16b, XL.16b, T2.16b
 
 
+	.if		\enc == 0
+	eor		INP.16b, INP.16b, KS.16b
+	st1		{INP.16b}, [x2], #16
+	.endif
+
 	cbnz		w0, 0b
 	cbnz		w0, 0b
 
 
+CPU_LE(	rev		x8, x8		)
 	st1		{XL.2d}, [x1]
 	st1		{XL.2d}, [x1]
+	str		x8, [x5, #8]			// store lower counter
+
+	.if		\enc == 1
+	st1		{KS.16b}, [x7]
+	.endif
+
+	ret
+
+2:	b.eq		3f				// AES-192?
+	enc_round	CTR, v17
+	enc_round	CTR, v18
+3:	enc_round	CTR, v19
+	enc_round	CTR, v20
+	b		1b
+	.endm
+
+	/*
+	 * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
+	 *			  struct ghash_key const *k, u8 ctr[],
+	 *			  int rounds, u8 ks[])
+	 */
+ENTRY(pmull_gcm_encrypt)
+	pmull_gcm_do_crypt	1
+ENDPROC(pmull_gcm_encrypt)
+
+	/*
+	 * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
+	 *			  struct ghash_key const *k, u8 ctr[],
+	 *			  int rounds)
+	 */
+ENTRY(pmull_gcm_decrypt)
+	pmull_gcm_do_crypt	0
+ENDPROC(pmull_gcm_decrypt)
+
+	/*
+	 * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
+	 */
+ENTRY(pmull_gcm_encrypt_block)
+	cbz		x2, 0f
+	load_round_keys	w3, x2
+0:	ld1		{v0.16b}, [x1]
+	enc_block	v0, w3
+	st1		{v0.16b}, [x0]
 	ret
 	ret
-ENDPROC(pmull_ghash_update)
+ENDPROC(pmull_gcm_encrypt_block)

+ 482 - 35
arch/arm64/crypto/ghash-ce-glue.c

@@ -1,7 +1,7 @@
 /*
 /*
  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
  *
  *
- * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  *
  * This program is free software; you can redistribute it and/or modify it
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
  * under the terms of the GNU General Public License version 2 as published
@@ -9,22 +9,33 @@
  */
  */
 
 
 #include <asm/neon.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <asm/unaligned.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/aead.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/cpufeature.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 #include <linux/module.h>
 
 
-MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("ghash");
 
 
 #define GHASH_BLOCK_SIZE	16
 #define GHASH_BLOCK_SIZE	16
 #define GHASH_DIGEST_SIZE	16
 #define GHASH_DIGEST_SIZE	16
+#define GCM_IV_SIZE		12
 
 
 struct ghash_key {
 struct ghash_key {
 	u64 a;
 	u64 a;
 	u64 b;
 	u64 b;
+	be128 k;
 };
 };
 
 
 struct ghash_desc_ctx {
 struct ghash_desc_ctx {
@@ -33,8 +44,35 @@ struct ghash_desc_ctx {
 	u32 count;
 	u32 count;
 };
 };
 
 
-asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-				   struct ghash_key const *k, const char *head);
+struct gcm_aes_ctx {
+	struct crypto_aes_ctx	aes_key;
+	struct ghash_key	ghash_key;
+};
+
+asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
+				       struct ghash_key const *k,
+				       const char *head);
+
+asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
+				      struct ghash_key const *k,
+				      const char *head);
+
+static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
+				  struct ghash_key const *k,
+				  const char *head);
+
+asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
+				  const u8 src[], struct ghash_key const *k,
+				  u8 ctr[], int rounds, u8 ks[]);
+
+asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
+				  const u8 src[], struct ghash_key const *k,
+				  u8 ctr[], int rounds);
+
+asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
+					u32 const rk[], int rounds);
+
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
 
 
 static int ghash_init(struct shash_desc *desc)
 static int ghash_init(struct shash_desc *desc)
 {
 {
@@ -44,6 +82,36 @@ static int ghash_init(struct shash_desc *desc)
 	return 0;
 	return 0;
 }
 }
 
 
+static void ghash_do_update(int blocks, u64 dg[], const char *src,
+			    struct ghash_key *key, const char *head)
+{
+	if (likely(may_use_simd())) {
+		kernel_neon_begin();
+		pmull_ghash_update(blocks, dg, src, key, head);
+		kernel_neon_end();
+	} else {
+		be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
+
+		do {
+			const u8 *in = src;
+
+			if (head) {
+				in = head;
+				blocks++;
+				head = NULL;
+			} else {
+				src += GHASH_BLOCK_SIZE;
+			}
+
+			crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
+			gf128mul_lle(&dst, &key->k);
+		} while (--blocks);
+
+		dg[0] = be64_to_cpu(dst.b);
+		dg[1] = be64_to_cpu(dst.a);
+	}
+}
+
 static int ghash_update(struct shash_desc *desc, const u8 *src,
 static int ghash_update(struct shash_desc *desc, const u8 *src,
 			unsigned int len)
 			unsigned int len)
 {
 {
@@ -67,10 +135,9 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
 		blocks = len / GHASH_BLOCK_SIZE;
 		blocks = len / GHASH_BLOCK_SIZE;
 		len %= GHASH_BLOCK_SIZE;
 		len %= GHASH_BLOCK_SIZE;
 
 
-		kernel_neon_begin_partial(8);
-		pmull_ghash_update(blocks, ctx->digest, src, key,
-				   partial ? ctx->buf : NULL);
-		kernel_neon_end();
+		ghash_do_update(blocks, ctx->digest, src, key,
+				partial ? ctx->buf : NULL);
+
 		src += blocks * GHASH_BLOCK_SIZE;
 		src += blocks * GHASH_BLOCK_SIZE;
 		partial = 0;
 		partial = 0;
 	}
 	}
@@ -89,9 +156,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 
 
 		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
 		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
 
 
-		kernel_neon_begin_partial(8);
-		pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
-		kernel_neon_end();
+		ghash_do_update(1, ctx->digest, ctx->buf, key, NULL);
 	}
 	}
 	put_unaligned_be64(ctx->digest[1], dst);
 	put_unaligned_be64(ctx->digest[1], dst);
 	put_unaligned_be64(ctx->digest[0], dst + 8);
 	put_unaligned_be64(ctx->digest[0], dst + 8);
@@ -100,16 +165,13 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 	return 0;
 	return 0;
 }
 }
 
 
-static int ghash_setkey(struct crypto_shash *tfm,
-			const u8 *inkey, unsigned int keylen)
+static int __ghash_setkey(struct ghash_key *key,
+			  const u8 *inkey, unsigned int keylen)
 {
 {
-	struct ghash_key *key = crypto_shash_ctx(tfm);
 	u64 a, b;
 	u64 a, b;
 
 
-	if (keylen != GHASH_BLOCK_SIZE) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
+	/* needed for the fallback */
+	memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
 
 
 	/* perform multiplication by 'x' in GF(2^128) */
 	/* perform multiplication by 'x' in GF(2^128) */
 	b = get_unaligned_be64(inkey);
 	b = get_unaligned_be64(inkey);
@@ -124,33 +186,418 @@ static int ghash_setkey(struct crypto_shash *tfm,
 	return 0;
 	return 0;
 }
 }
 
 
+static int ghash_setkey(struct crypto_shash *tfm,
+			const u8 *inkey, unsigned int keylen)
+{
+	struct ghash_key *key = crypto_shash_ctx(tfm);
+
+	if (keylen != GHASH_BLOCK_SIZE) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	return __ghash_setkey(key, inkey, keylen);
+}
+
 static struct shash_alg ghash_alg = {
 static struct shash_alg ghash_alg = {
-	.digestsize	= GHASH_DIGEST_SIZE,
-	.init		= ghash_init,
-	.update		= ghash_update,
-	.final		= ghash_final,
-	.setkey		= ghash_setkey,
-	.descsize	= sizeof(struct ghash_desc_ctx),
-	.base		= {
-		.cra_name		= "ghash",
-		.cra_driver_name	= "ghash-ce",
-		.cra_priority		= 200,
-		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize		= GHASH_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct ghash_key),
-		.cra_module		= THIS_MODULE,
-	},
+	.base.cra_name		= "ghash",
+	.base.cra_driver_name	= "ghash-ce",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= GHASH_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct ghash_key),
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= GHASH_DIGEST_SIZE,
+	.init			= ghash_init,
+	.update			= ghash_update,
+	.final			= ghash_final,
+	.setkey			= ghash_setkey,
+	.descsize		= sizeof(struct ghash_desc_ctx),
+};
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+	/*
+	 * # of rounds specified by AES:
+	 * 128 bit key		10 rounds
+	 * 192 bit key		12 rounds
+	 * 256 bit key		14 rounds
+	 * => n byte key	=> 6 + (n/4) rounds
+	 */
+	return 6 + ctx->key_length / 4;
+}
+
+static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
+		      unsigned int keylen)
+{
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
+	u8 key[GHASH_BLOCK_SIZE];
+	int ret;
+
+	ret = crypto_aes_expand_key(&ctx->aes_key, inkey, keylen);
+	if (ret) {
+		tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	__aes_arm64_encrypt(ctx->aes_key.key_enc, key, (u8[AES_BLOCK_SIZE]){},
+			    num_rounds(&ctx->aes_key));
+
+	return __ghash_setkey(&ctx->ghash_key, key, sizeof(key));
+}
+
+static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 8:
+	case 12 ... 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
+			   int *buf_count, struct gcm_aes_ctx *ctx)
+{
+	if (*buf_count > 0) {
+		int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count);
+
+		memcpy(&buf[*buf_count], src, buf_added);
+
+		*buf_count += buf_added;
+		src += buf_added;
+		count -= buf_added;
+	}
+
+	if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
+		int blocks = count / GHASH_BLOCK_SIZE;
+
+		ghash_do_update(blocks, dg, src, &ctx->ghash_key,
+				*buf_count ? buf : NULL);
+
+		src += blocks * GHASH_BLOCK_SIZE;
+		count %= GHASH_BLOCK_SIZE;
+		*buf_count = 0;
+	}
+
+	if (count > 0) {
+		memcpy(buf, src, count);
+		*buf_count = count;
+	}
+}
+
+static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+	u8 buf[GHASH_BLOCK_SIZE];
+	struct scatter_walk walk;
+	u32 len = req->assoclen;
+	int buf_count = 0;
+
+	scatterwalk_start(&walk, req->src);
+
+	do {
+		u32 n = scatterwalk_clamp(&walk, len);
+		u8 *p;
+
+		if (!n) {
+			scatterwalk_start(&walk, sg_next(walk.sg));
+			n = scatterwalk_clamp(&walk, len);
+		}
+		p = scatterwalk_map(&walk);
+
+		gcm_update_mac(dg, p, n, buf, &buf_count, ctx);
+		len -= n;
+
+		scatterwalk_unmap(p);
+		scatterwalk_advance(&walk, n);
+		scatterwalk_done(&walk, 0, len);
+	} while (len);
+
+	if (buf_count) {
+		memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
+		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+	}
+}
+
+static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
+		      u64 dg[], u8 tag[], int cryptlen)
+{
+	u8 mac[AES_BLOCK_SIZE];
+	u128 lengths;
+
+	lengths.a = cpu_to_be64(req->assoclen * 8);
+	lengths.b = cpu_to_be64(cryptlen * 8);
+
+	ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL);
+
+	put_unaligned_be64(dg[1], mac);
+	put_unaligned_be64(dg[0], mac + 8);
+
+	crypto_xor(tag, mac, AES_BLOCK_SIZE);
+}
+
+static int gcm_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+	struct skcipher_walk walk;
+	u8 iv[AES_BLOCK_SIZE];
+	u8 ks[AES_BLOCK_SIZE];
+	u8 tag[AES_BLOCK_SIZE];
+	u64 dg[2] = {};
+	int err;
+
+	if (req->assoclen)
+		gcm_calculate_auth_mac(req, dg);
+
+	memcpy(iv, req->iv, GCM_IV_SIZE);
+	put_unaligned_be32(1, iv + GCM_IV_SIZE);
+
+	if (likely(may_use_simd())) {
+		kernel_neon_begin();
+
+		pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
+					num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+		pmull_gcm_encrypt_block(ks, iv, NULL,
+					num_rounds(&ctx->aes_key));
+		put_unaligned_be32(3, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+			pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
+					  walk.src.virt.addr, &ctx->ghash_key,
+					  iv, num_rounds(&ctx->aes_key), ks);
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		kernel_neon_end();
+	} else {
+		__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
+				    num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+			u8 *dst = walk.dst.virt.addr;
+			u8 *src = walk.src.virt.addr;
+
+			do {
+				__aes_arm64_encrypt(ctx->aes_key.key_enc,
+						    ks, iv,
+						    num_rounds(&ctx->aes_key));
+				crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE);
+				crypto_inc(iv, AES_BLOCK_SIZE);
+
+				dst += AES_BLOCK_SIZE;
+				src += AES_BLOCK_SIZE;
+			} while (--blocks > 0);
+
+			ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg,
+					walk.dst.virt.addr, &ctx->ghash_key,
+					NULL);
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		if (walk.nbytes)
+			__aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv,
+					    num_rounds(&ctx->aes_key));
+	}
+
+	/* handle the tail */
+	if (walk.nbytes) {
+		u8 buf[GHASH_BLOCK_SIZE];
+
+		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks,
+			       walk.nbytes);
+
+		memcpy(buf, walk.dst.virt.addr, walk.nbytes);
+		memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
+		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	if (err)
+		return err;
+
+	gcm_final(req, ctx, dg, tag, req->cryptlen);
+
+	/* copy authtag to end of dst */
+	scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
+				 crypto_aead_authsize(aead), 1);
+
+	return 0;
+}
+
+static int gcm_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int authsize = crypto_aead_authsize(aead);
+	struct skcipher_walk walk;
+	u8 iv[AES_BLOCK_SIZE];
+	u8 tag[AES_BLOCK_SIZE];
+	u8 buf[GHASH_BLOCK_SIZE];
+	u64 dg[2] = {};
+	int err;
+
+	if (req->assoclen)
+		gcm_calculate_auth_mac(req, dg);
+
+	memcpy(iv, req->iv, GCM_IV_SIZE);
+	put_unaligned_be32(1, iv + GCM_IV_SIZE);
+
+	if (likely(may_use_simd())) {
+		kernel_neon_begin();
+
+		pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
+					num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+			pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
+					  walk.src.virt.addr, &ctx->ghash_key,
+					  iv, num_rounds(&ctx->aes_key));
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		if (walk.nbytes)
+			pmull_gcm_encrypt_block(iv, iv, NULL,
+						num_rounds(&ctx->aes_key));
+
+		kernel_neon_end();
+	} else {
+		__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
+				    num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+			u8 *dst = walk.dst.virt.addr;
+			u8 *src = walk.src.virt.addr;
+
+			ghash_do_update(blocks, dg, walk.src.virt.addr,
+					&ctx->ghash_key, NULL);
+
+			do {
+				__aes_arm64_encrypt(ctx->aes_key.key_enc,
+						    buf, iv,
+						    num_rounds(&ctx->aes_key));
+				crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
+				crypto_inc(iv, AES_BLOCK_SIZE);
+
+				dst += AES_BLOCK_SIZE;
+				src += AES_BLOCK_SIZE;
+			} while (--blocks > 0);
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		if (walk.nbytes)
+			__aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv,
+					    num_rounds(&ctx->aes_key));
+	}
+
+	/* handle the tail */
+	if (walk.nbytes) {
+		memcpy(buf, walk.src.virt.addr, walk.nbytes);
+		memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
+		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+
+		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv,
+			       walk.nbytes);
+
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	if (err)
+		return err;
+
+	gcm_final(req, ctx, dg, tag, req->cryptlen - authsize);
+
+	/* compare calculated auth tag with the stored one */
+	scatterwalk_map_and_copy(buf, req->src,
+				 req->assoclen + req->cryptlen - authsize,
+				 authsize, 0);
+
+	if (crypto_memneq(tag, buf, authsize))
+		return -EBADMSG;
+	return 0;
+}
+
+static struct aead_alg gcm_aes_alg = {
+	.ivsize			= GCM_IV_SIZE,
+	.chunksize		= AES_BLOCK_SIZE,
+	.maxauthsize		= AES_BLOCK_SIZE,
+	.setkey			= gcm_setkey,
+	.setauthsize		= gcm_setauthsize,
+	.encrypt		= gcm_encrypt,
+	.decrypt		= gcm_decrypt,
+
+	.base.cra_name		= "gcm(aes)",
+	.base.cra_driver_name	= "gcm-aes-ce",
+	.base.cra_priority	= 300,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct gcm_aes_ctx),
+	.base.cra_module	= THIS_MODULE,
 };
 };
 
 
 static int __init ghash_ce_mod_init(void)
 static int __init ghash_ce_mod_init(void)
 {
 {
-	return crypto_register_shash(&ghash_alg);
+	int ret;
+
+	if (!(elf_hwcap & HWCAP_ASIMD))
+		return -ENODEV;
+
+	if (elf_hwcap & HWCAP_PMULL)
+		pmull_ghash_update = pmull_ghash_update_p64;
+
+	else
+		pmull_ghash_update = pmull_ghash_update_p8;
+
+	ret = crypto_register_shash(&ghash_alg);
+	if (ret)
+		return ret;
+
+	if (elf_hwcap & HWCAP_PMULL) {
+		ret = crypto_register_aead(&gcm_aes_alg);
+		if (ret)
+			crypto_unregister_shash(&ghash_alg);
+	}
+	return ret;
 }
 }
 
 
 static void __exit ghash_ce_mod_exit(void)
 static void __exit ghash_ce_mod_exit(void)
 {
 {
 	crypto_unregister_shash(&ghash_alg);
 	crypto_unregister_shash(&ghash_alg);
+	crypto_unregister_aead(&gcm_aes_alg);
 }
 }
 
 
-module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+static const struct cpu_feature ghash_cpu_feature[] = {
+	{ cpu_feature(PMULL) }, { }
+};
+MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature);
+
+module_init(ghash_ce_mod_init);
 module_exit(ghash_ce_mod_exit);
 module_exit(ghash_ce_mod_exit);

+ 14 - 4
arch/arm64/crypto/sha1-ce-glue.c

@@ -1,7 +1,7 @@
 /*
 /*
  * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
  * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
  *
  *
- * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
  */
 
 
 #include <asm/neon.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <crypto/sha.h>
@@ -37,8 +38,11 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
 {
 {
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 
 
+	if (!may_use_simd())
+		return crypto_sha1_update(desc, data, len);
+
 	sctx->finalize = 0;
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(16);
+	kernel_neon_begin();
 	sha1_base_do_update(desc, data, len,
 	sha1_base_do_update(desc, data, len,
 			    (sha1_block_fn *)sha1_ce_transform);
 			    (sha1_block_fn *)sha1_ce_transform);
 	kernel_neon_end();
 	kernel_neon_end();
@@ -52,13 +56,16 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 	bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
 	bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
 
 
+	if (!may_use_simd())
+		return crypto_sha1_finup(desc, data, len, out);
+
 	/*
 	/*
 	 * Allow the asm code to perform the finalization if there is no
 	 * Allow the asm code to perform the finalization if there is no
 	 * partial data and the input is a round multiple of the block size.
 	 * partial data and the input is a round multiple of the block size.
 	 */
 	 */
 	sctx->finalize = finalize;
 	sctx->finalize = finalize;
 
 
-	kernel_neon_begin_partial(16);
+	kernel_neon_begin();
 	sha1_base_do_update(desc, data, len,
 	sha1_base_do_update(desc, data, len,
 			    (sha1_block_fn *)sha1_ce_transform);
 			    (sha1_block_fn *)sha1_ce_transform);
 	if (!finalize)
 	if (!finalize)
@@ -71,8 +78,11 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out)
 {
 {
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 
 
+	if (!may_use_simd())
+		return crypto_sha1_finup(desc, NULL, 0, out);
+
 	sctx->finalize = 0;
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(16);
+	kernel_neon_begin();
 	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
 	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
 	kernel_neon_end();
 	kernel_neon_end();
 	return sha1_base_finish(desc, out);
 	return sha1_base_finish(desc, out);

+ 26 - 4
arch/arm64/crypto/sha2-ce-glue.c

@@ -1,7 +1,7 @@
 /*
 /*
  * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
  * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
  *
  *
- * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
  */
 
 
 #include <asm/neon.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <crypto/sha.h>
@@ -34,13 +35,19 @@ const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
 const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
 const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
 						 finalize);
 						 finalize);
 
 
+asmlinkage void sha256_block_data_order(u32 *digest, u8 const *src, int blocks);
+
 static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
 static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
 			    unsigned int len)
 			    unsigned int len)
 {
 {
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 
 
+	if (!may_use_simd())
+		return sha256_base_do_update(desc, data, len,
+				(sha256_block_fn *)sha256_block_data_order);
+
 	sctx->finalize = 0;
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(28);
+	kernel_neon_begin();
 	sha256_base_do_update(desc, data, len,
 	sha256_base_do_update(desc, data, len,
 			      (sha256_block_fn *)sha2_ce_transform);
 			      (sha256_block_fn *)sha2_ce_transform);
 	kernel_neon_end();
 	kernel_neon_end();
@@ -54,13 +61,22 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 	bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
 	bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
 
 
+	if (!may_use_simd()) {
+		if (len)
+			sha256_base_do_update(desc, data, len,
+				(sha256_block_fn *)sha256_block_data_order);
+		sha256_base_do_finalize(desc,
+				(sha256_block_fn *)sha256_block_data_order);
+		return sha256_base_finish(desc, out);
+	}
+
 	/*
 	/*
 	 * Allow the asm code to perform the finalization if there is no
 	 * Allow the asm code to perform the finalization if there is no
 	 * partial data and the input is a round multiple of the block size.
 	 * partial data and the input is a round multiple of the block size.
 	 */
 	 */
 	sctx->finalize = finalize;
 	sctx->finalize = finalize;
 
 
-	kernel_neon_begin_partial(28);
+	kernel_neon_begin();
 	sha256_base_do_update(desc, data, len,
 	sha256_base_do_update(desc, data, len,
 			      (sha256_block_fn *)sha2_ce_transform);
 			      (sha256_block_fn *)sha2_ce_transform);
 	if (!finalize)
 	if (!finalize)
@@ -74,8 +90,14 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out)
 {
 {
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 
 
+	if (!may_use_simd()) {
+		sha256_base_do_finalize(desc,
+				(sha256_block_fn *)sha256_block_data_order);
+		return sha256_base_finish(desc, out);
+	}
+
 	sctx->finalize = 0;
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(28);
+	kernel_neon_begin();
 	sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
 	sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
 	kernel_neon_end();
 	kernel_neon_end();
 	return sha256_base_finish(desc, out);
 	return sha256_base_finish(desc, out);

+ 1 - 0
arch/arm64/crypto/sha256-glue.c

@@ -29,6 +29,7 @@ MODULE_ALIAS_CRYPTO("sha256");
 
 
 asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
 asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
 					unsigned int num_blks);
 					unsigned int num_blks);
+EXPORT_SYMBOL(sha256_block_data_order);
 
 
 asmlinkage void sha256_block_neon(u32 *digest, const void *data,
 asmlinkage void sha256_block_neon(u32 *digest, const void *data,
 				  unsigned int num_blks);
 				  unsigned int num_blks);

+ 1 - 2
arch/sparc/crypto/aes_glue.c

@@ -344,8 +344,7 @@ static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
 
 
 	ctx->ops->ecb_encrypt(&ctx->key[0], (const u64 *)ctrblk,
 	ctx->ops->ecb_encrypt(&ctx->key[0], (const u64 *)ctrblk,
 			      keystream, AES_BLOCK_SIZE);
 			      keystream, AES_BLOCK_SIZE);
-	crypto_xor((u8 *) keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, (u8 *) keystream, src, nbytes);
 	crypto_inc(ctrblk, AES_BLOCK_SIZE);
 	crypto_inc(ctrblk, AES_BLOCK_SIZE);
 }
 }
 
 

+ 2 - 2
arch/x86/crypto/aesni-intel_glue.c

@@ -475,8 +475,8 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
 	unsigned int nbytes = walk->nbytes;
 	unsigned int nbytes = walk->nbytes;
 
 
 	aesni_enc(ctx, keystream, ctrblk);
 	aesni_enc(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
+
 	crypto_inc(ctrblk, AES_BLOCK_SIZE);
 	crypto_inc(ctrblk, AES_BLOCK_SIZE);
 }
 }
 
 

+ 1 - 2
arch/x86/crypto/blowfish_glue.c

@@ -271,8 +271,7 @@ static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
 	unsigned int nbytes = walk->nbytes;
 	unsigned int nbytes = walk->nbytes;
 
 
 	blowfish_enc_blk(ctx, keystream, ctrblk);
 	blowfish_enc_blk(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 
 	crypto_inc(ctrblk, BF_BLOCK_SIZE);
 	crypto_inc(ctrblk, BF_BLOCK_SIZE);
 }
 }

+ 1 - 2
arch/x86/crypto/cast5_avx_glue.c

@@ -256,8 +256,7 @@ static void ctr_crypt_final(struct blkcipher_desc *desc,
 	unsigned int nbytes = walk->nbytes;
 	unsigned int nbytes = walk->nbytes;
 
 
 	__cast5_encrypt(ctx, keystream, ctrblk);
 	__cast5_encrypt(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 
 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
 }
 }

+ 1 - 2
arch/x86/crypto/des3_ede_glue.c

@@ -277,8 +277,7 @@ static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
 	unsigned int nbytes = walk->nbytes;
 	unsigned int nbytes = walk->nbytes;
 
 
 	des3_ede_enc_blk(ctx, keystream, ctrblk);
 	des3_ede_enc_blk(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 
 	crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
 	crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
 }
 }

+ 2 - 0
crypto/Kconfig

@@ -1753,6 +1753,8 @@ config CRYPTO_USER_API_AEAD
 	tristate "User-space interface for AEAD cipher algorithms"
 	tristate "User-space interface for AEAD cipher algorithms"
 	depends on NET
 	depends on NET
 	select CRYPTO_AEAD
 	select CRYPTO_AEAD
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_NULL
 	select CRYPTO_USER_API
 	select CRYPTO_USER_API
 	help
 	help
 	  This option enables the user-spaces interface for AEAD
 	  This option enables the user-spaces interface for AEAD

+ 691 - 0
crypto/af_alg.c

@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/net.h>
 #include <linux/rwsem.h>
 #include <linux/rwsem.h>
+#include <linux/sched/signal.h>
 #include <linux/security.h>
 #include <linux/security.h>
 
 
 struct alg_type_list {
 struct alg_type_list {
@@ -507,6 +508,696 @@ void af_alg_complete(struct crypto_async_request *req, int err)
 }
 }
 EXPORT_SYMBOL_GPL(af_alg_complete);
 EXPORT_SYMBOL_GPL(af_alg_complete);
 
 
+/**
+ * af_alg_alloc_tsgl - allocate the TX SGL
+ *
+ * @sk socket of connection to user space
+ * @return: 0 upon success, < 0 upon error
+ */
+int af_alg_alloc_tsgl(struct sock *sk)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl;
+	struct scatterlist *sg = NULL;
+
+	sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, list);
+	if (!list_empty(&ctx->tsgl_list))
+		sg = sgl->sg;
+
+	if (!sg || sgl->cur >= MAX_SGL_ENTS) {
+		sgl = sock_kmalloc(sk, sizeof(*sgl) +
+				       sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1),
+				   GFP_KERNEL);
+		if (!sgl)
+			return -ENOMEM;
+
+		sg_init_table(sgl->sg, MAX_SGL_ENTS + 1);
+		sgl->cur = 0;
+
+		if (sg)
+			sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg);
+
+		list_add_tail(&sgl->list, &ctx->tsgl_list);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(af_alg_alloc_tsgl);
+
+/**
+ * aead_count_tsgl - Count number of TX SG entries
+ *
+ * The counting starts from the beginning of the SGL to @bytes. If
+ * an offset is provided, the counting of the SG entries starts at the offset.
+ *
+ * @sk socket of connection to user space
+ * @bytes Count the number of SG entries holding given number of bytes.
+ * @offset Start the counting of SG entries from the given offset.
+ * @return Number of TX SG entries found given the constraints
+ */
+unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl, *tmp;
+	unsigned int i;
+	unsigned int sgl_count = 0;
+
+	if (!bytes)
+		return 0;
+
+	list_for_each_entry_safe(sgl, tmp, &ctx->tsgl_list, list) {
+		struct scatterlist *sg = sgl->sg;
+
+		for (i = 0; i < sgl->cur; i++) {
+			size_t bytes_count;
+
+			/* Skip offset */
+			if (offset >= sg[i].length) {
+				offset -= sg[i].length;
+				bytes -= sg[i].length;
+				continue;
+			}
+
+			bytes_count = sg[i].length - offset;
+
+			offset = 0;
+			sgl_count++;
+
+			/* If we have seen requested number of bytes, stop */
+			if (bytes_count >= bytes)
+				return sgl_count;
+
+			bytes -= bytes_count;
+		}
+	}
+
+	return sgl_count;
+}
+EXPORT_SYMBOL_GPL(af_alg_count_tsgl);
+
+/**
+ * aead_pull_tsgl - Release the specified buffers from TX SGL
+ *
+ * If @dst is non-null, reassign the pages to dst. The caller must release
+ * the pages. If @dst_offset is given only reassign the pages to @dst starting
+ * at the @dst_offset (byte). The caller must ensure that @dst is large
+ * enough (e.g. by using af_alg_count_tsgl with the same offset).
+ *
+ * @sk socket of connection to user space
+ * @used Number of bytes to pull from TX SGL
+ * @dst If non-NULL, buffer is reassigned to dst SGL instead of releasing. The
+ *	caller must release the buffers in dst.
+ * @dst_offset Reassign the TX SGL from given offset. All buffers before
+ *	       reaching the offset is released.
+ */
+void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
+		      size_t dst_offset)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl;
+	struct scatterlist *sg;
+	unsigned int i, j;
+
+	while (!list_empty(&ctx->tsgl_list)) {
+		sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl,
+				       list);
+		sg = sgl->sg;
+
+		for (i = 0, j = 0; i < sgl->cur; i++) {
+			size_t plen = min_t(size_t, used, sg[i].length);
+			struct page *page = sg_page(sg + i);
+
+			if (!page)
+				continue;
+
+			/*
+			 * Assumption: caller created af_alg_count_tsgl(len)
+			 * SG entries in dst.
+			 */
+			if (dst) {
+				if (dst_offset >= plen) {
+					/* discard page before offset */
+					dst_offset -= plen;
+				} else {
+					/* reassign page to dst after offset */
+					get_page(page);
+					sg_set_page(dst + j, page,
+						    plen - dst_offset,
+						    sg[i].offset + dst_offset);
+					dst_offset = 0;
+					j++;
+				}
+			}
+
+			sg[i].length -= plen;
+			sg[i].offset += plen;
+
+			used -= plen;
+			ctx->used -= plen;
+
+			if (sg[i].length)
+				return;
+
+			put_page(page);
+			sg_assign_page(sg + i, NULL);
+		}
+
+		list_del(&sgl->list);
+		sock_kfree_s(sk, sgl, sizeof(*sgl) + sizeof(sgl->sg[0]) *
+						     (MAX_SGL_ENTS + 1));
+	}
+
+	if (!ctx->used)
+		ctx->merge = 0;
+}
+EXPORT_SYMBOL_GPL(af_alg_pull_tsgl);
+
+/**
+ * af_alg_free_areq_sgls - Release TX and RX SGLs of the request
+ *
+ * @areq Request holding the TX and RX SGL
+ */
+void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
+{
+	struct sock *sk = areq->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_rsgl *rsgl, *tmp;
+	struct scatterlist *tsgl;
+	struct scatterlist *sg;
+	unsigned int i;
+
+	list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) {
+		ctx->rcvused -= rsgl->sg_num_bytes;
+		af_alg_free_sg(&rsgl->sgl);
+		list_del(&rsgl->list);
+		if (rsgl != &areq->first_rsgl)
+			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
+	}
+
+	tsgl = areq->tsgl;
+	for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
+		if (!sg_page(sg))
+			continue;
+		put_page(sg_page(sg));
+	}
+
+	if (areq->tsgl && areq->tsgl_entries)
+		sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl));
+}
+EXPORT_SYMBOL_GPL(af_alg_free_areq_sgls);
+
+/**
+ * af_alg_wait_for_wmem - wait for availability of writable memory
+ *
+ * @sk socket of connection to user space
+ * @flags If MSG_DONTWAIT is set, then only report if function would sleep
+ * @return 0 when writable memory is available, < 0 upon error
+ */
+int af_alg_wait_for_wmem(struct sock *sk, unsigned int flags)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int err = -ERESTARTSYS;
+	long timeout;
+
+	if (flags & MSG_DONTWAIT)
+		return -EAGAIN;
+
+	sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	for (;;) {
+		if (signal_pending(current))
+			break;
+		timeout = MAX_SCHEDULE_TIMEOUT;
+		if (sk_wait_event(sk, &timeout, af_alg_writable(sk), &wait)) {
+			err = 0;
+			break;
+		}
+	}
+	remove_wait_queue(sk_sleep(sk), &wait);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(af_alg_wait_for_wmem);
+
+/**
+ * af_alg_wmem_wakeup - wakeup caller when writable memory is available
+ *
+ * @sk socket of connection to user space
+ */
+void af_alg_wmem_wakeup(struct sock *sk)
+{
+	struct socket_wq *wq;
+
+	if (!af_alg_writable(sk))
+		return;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (skwq_has_sleeper(wq))
+		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
+							   POLLRDNORM |
+							   POLLRDBAND);
+	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(af_alg_wmem_wakeup);
+
+/**
+ * af_alg_wait_for_data - wait for availability of TX data
+ *
+ * @sk socket of connection to user space
+ * @flags If MSG_DONTWAIT is set, then only report if function would sleep
+ * @return 0 when writable memory is available, < 0 upon error
+ */
+int af_alg_wait_for_data(struct sock *sk, unsigned flags)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	long timeout;
+	int err = -ERESTARTSYS;
+
+	if (flags & MSG_DONTWAIT)
+		return -EAGAIN;
+
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	for (;;) {
+		if (signal_pending(current))
+			break;
+		timeout = MAX_SCHEDULE_TIMEOUT;
+		if (sk_wait_event(sk, &timeout, (ctx->used || !ctx->more),
+				  &wait)) {
+			err = 0;
+			break;
+		}
+	}
+	remove_wait_queue(sk_sleep(sk), &wait);
+
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(af_alg_wait_for_data);
+
+/**
+ * af_alg_data_wakeup - wakeup caller when new data can be sent to kernel
+ *
+ * @sk socket of connection to user space
+ */
+
+void af_alg_data_wakeup(struct sock *sk)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct socket_wq *wq;
+
+	if (!ctx->used)
+		return;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (skwq_has_sleeper(wq))
+		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
+							   POLLRDNORM |
+							   POLLRDBAND);
+	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(af_alg_data_wakeup);
+
+/**
+ * af_alg_sendmsg - implementation of sendmsg system call handler
+ *
+ * The sendmsg system call handler obtains the user data and stores it
+ * in ctx->tsgl_list. This implies allocation of the required numbers of
+ * struct af_alg_tsgl.
+ *
+ * In addition, the ctx is filled with the information sent via CMSG.
+ *
+ * @sock socket of connection to user space
+ * @msg message from user space
+ * @size size of message from user space
+ * @ivsize the size of the IV for the cipher operation to verify that the
+ *	   user-space-provided IV has the right size
+ * @return the number of copied data upon success, < 0 upon error
+ */
+int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		   unsigned int ivsize)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl;
+	struct af_alg_control con = {};
+	long copied = 0;
+	bool enc = 0;
+	bool init = 0;
+	int err = 0;
+
+	if (msg->msg_controllen) {
+		err = af_alg_cmsg_send(msg, &con);
+		if (err)
+			return err;
+
+		init = 1;
+		switch (con.op) {
+		case ALG_OP_ENCRYPT:
+			enc = 1;
+			break;
+		case ALG_OP_DECRYPT:
+			enc = 0;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (con.iv && con.iv->ivlen != ivsize)
+			return -EINVAL;
+	}
+
+	lock_sock(sk);
+	if (!ctx->more && ctx->used) {
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	if (init) {
+		ctx->enc = enc;
+		if (con.iv)
+			memcpy(ctx->iv, con.iv->iv, ivsize);
+
+		ctx->aead_assoclen = con.aead_assoclen;
+	}
+
+	while (size) {
+		struct scatterlist *sg;
+		size_t len = size;
+		size_t plen;
+
+		/* use the existing memory in an allocated page */
+		if (ctx->merge) {
+			sgl = list_entry(ctx->tsgl_list.prev,
+					 struct af_alg_tsgl, list);
+			sg = sgl->sg + sgl->cur - 1;
+			len = min_t(size_t, len,
+				    PAGE_SIZE - sg->offset - sg->length);
+
+			err = memcpy_from_msg(page_address(sg_page(sg)) +
+					      sg->offset + sg->length,
+					      msg, len);
+			if (err)
+				goto unlock;
+
+			sg->length += len;
+			ctx->merge = (sg->offset + sg->length) &
+				     (PAGE_SIZE - 1);
+
+			ctx->used += len;
+			copied += len;
+			size -= len;
+			continue;
+		}
+
+		if (!af_alg_writable(sk)) {
+			err = af_alg_wait_for_wmem(sk, msg->msg_flags);
+			if (err)
+				goto unlock;
+		}
+
+		/* allocate a new page */
+		len = min_t(unsigned long, len, af_alg_sndbuf(sk));
+
+		err = af_alg_alloc_tsgl(sk);
+		if (err)
+			goto unlock;
+
+		sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl,
+				 list);
+		sg = sgl->sg;
+		if (sgl->cur)
+			sg_unmark_end(sg + sgl->cur - 1);
+
+		do {
+			unsigned int i = sgl->cur;
+
+			plen = min_t(size_t, len, PAGE_SIZE);
+
+			sg_assign_page(sg + i, alloc_page(GFP_KERNEL));
+			if (!sg_page(sg + i)) {
+				err = -ENOMEM;
+				goto unlock;
+			}
+
+			err = memcpy_from_msg(page_address(sg_page(sg + i)),
+					      msg, plen);
+			if (err) {
+				__free_page(sg_page(sg + i));
+				sg_assign_page(sg + i, NULL);
+				goto unlock;
+			}
+
+			sg[i].length = plen;
+			len -= plen;
+			ctx->used += plen;
+			copied += plen;
+			size -= plen;
+			sgl->cur++;
+		} while (len && sgl->cur < MAX_SGL_ENTS);
+
+		if (!size)
+			sg_mark_end(sg + sgl->cur - 1);
+
+		ctx->merge = plen & (PAGE_SIZE - 1);
+	}
+
+	err = 0;
+
+	ctx->more = msg->msg_flags & MSG_MORE;
+
+unlock:
+	af_alg_data_wakeup(sk);
+	release_sock(sk);
+
+	return copied ?: err;
+}
+EXPORT_SYMBOL_GPL(af_alg_sendmsg);
+
+/**
+ * af_alg_sendpage - sendpage system call handler
+ *
+ * This is a generic implementation of sendpage to fill ctx->tsgl_list.
+ */
+ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
+			int offset, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl;
+	int err = -EINVAL;
+
+	if (flags & MSG_SENDPAGE_NOTLAST)
+		flags |= MSG_MORE;
+
+	lock_sock(sk);
+	if (!ctx->more && ctx->used)
+		goto unlock;
+
+	if (!size)
+		goto done;
+
+	if (!af_alg_writable(sk)) {
+		err = af_alg_wait_for_wmem(sk, flags);
+		if (err)
+			goto unlock;
+	}
+
+	err = af_alg_alloc_tsgl(sk);
+	if (err)
+		goto unlock;
+
+	ctx->merge = 0;
+	sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, list);
+
+	if (sgl->cur)
+		sg_unmark_end(sgl->sg + sgl->cur - 1);
+
+	sg_mark_end(sgl->sg + sgl->cur);
+
+	get_page(page);
+	sg_set_page(sgl->sg + sgl->cur, page, size, offset);
+	sgl->cur++;
+	ctx->used += size;
+
+done:
+	ctx->more = flags & MSG_MORE;
+
+unlock:
+	af_alg_data_wakeup(sk);
+	release_sock(sk);
+
+	return err ?: size;
+}
+EXPORT_SYMBOL_GPL(af_alg_sendpage);
+
+/**
+ * af_alg_async_cb - AIO callback handler
+ *
+ * This handler cleans up the struct af_alg_async_req upon completion of the
+ * AIO operation.
+ *
+ * The number of bytes to be generated with the AIO operation must be set
+ * in areq->outlen before the AIO callback handler is invoked.
+ */
+void af_alg_async_cb(struct crypto_async_request *_req, int err)
+{
+	struct af_alg_async_req *areq = _req->data;
+	struct sock *sk = areq->sk;
+	struct kiocb *iocb = areq->iocb;
+	unsigned int resultlen;
+
+	lock_sock(sk);
+
+	/* Buffer size written by crypto operation. */
+	resultlen = areq->outlen;
+
+	af_alg_free_areq_sgls(areq);
+	sock_kfree_s(sk, areq, areq->areqlen);
+	__sock_put(sk);
+
+	iocb->ki_complete(iocb, err ? err : resultlen, 0);
+
+	release_sock(sk);
+}
+EXPORT_SYMBOL_GPL(af_alg_async_cb);
+
+/**
+ * af_alg_poll - poll system call handler
+ */
+unsigned int af_alg_poll(struct file *file, struct socket *sock,
+			 poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	unsigned int mask;
+
+	sock_poll_wait(file, sk_sleep(sk), wait);
+	mask = 0;
+
+	if (!ctx->more || ctx->used)
+		mask |= POLLIN | POLLRDNORM;
+
+	if (af_alg_writable(sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+	return mask;
+}
+EXPORT_SYMBOL_GPL(af_alg_poll);
+
+/**
+ * af_alg_alloc_areq - allocate struct af_alg_async_req
+ *
+ * @sk socket of connection to user space
+ * @areqlen size of struct af_alg_async_req + crypto_*_reqsize
+ * @return allocated data structure or ERR_PTR upon error
+ */
+struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
+					   unsigned int areqlen)
+{
+	struct af_alg_async_req *areq = sock_kmalloc(sk, areqlen, GFP_KERNEL);
+
+	if (unlikely(!areq))
+		return ERR_PTR(-ENOMEM);
+
+	areq->areqlen = areqlen;
+	areq->sk = sk;
+	areq->last_rsgl = NULL;
+	INIT_LIST_HEAD(&areq->rsgl_list);
+	areq->tsgl = NULL;
+	areq->tsgl_entries = 0;
+
+	return areq;
+}
+EXPORT_SYMBOL_GPL(af_alg_alloc_areq);
+
+/**
+ * af_alg_get_rsgl - create the RX SGL for the output data from the crypto
+ *		     operation
+ *
+ * @sk socket of connection to user space
+ * @msg user space message
+ * @flags flags used to invoke recvmsg with
+ * @areq instance of the cryptographic request that will hold the RX SGL
+ * @maxsize maximum number of bytes to be pulled from user space
+ * @outlen number of bytes in the RX SGL
+ * @return 0 on success, < 0 upon error
+ */
+int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
+		    struct af_alg_async_req *areq, size_t maxsize,
+		    size_t *outlen)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	size_t len = 0;
+
+	while (maxsize > len && msg_data_left(msg)) {
+		struct af_alg_rsgl *rsgl;
+		size_t seglen;
+		int err;
+
+		/* limit the amount of readable buffers */
+		if (!af_alg_readable(sk))
+			break;
+
+		if (!ctx->used) {
+			err = af_alg_wait_for_data(sk, flags);
+			if (err)
+				return err;
+		}
+
+		seglen = min_t(size_t, (maxsize - len),
+			       msg_data_left(msg));
+
+		if (list_empty(&areq->rsgl_list)) {
+			rsgl = &areq->first_rsgl;
+		} else {
+			rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
+			if (unlikely(!rsgl))
+				return -ENOMEM;
+		}
+
+		rsgl->sgl.npages = 0;
+		list_add_tail(&rsgl->list, &areq->rsgl_list);
+
+		/* make one iovec available as scatterlist */
+		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
+		if (err < 0)
+			return err;
+
+		/* chain the new scatterlist with previous one */
+		if (areq->last_rsgl)
+			af_alg_link_sg(&areq->last_rsgl->sgl, &rsgl->sgl);
+
+		areq->last_rsgl = rsgl;
+		len += err;
+		ctx->rcvused += err;
+		rsgl->sg_num_bytes = err;
+		iov_iter_advance(&msg->msg_iter, err);
+	}
+
+	*outlen = len;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(af_alg_get_rsgl);
+
 static int __init af_alg_init(void)
 static int __init af_alg_init(void)
 {
 {
 	int err = proto_register(&alg_proto, 0);
 	int err = proto_register(&alg_proto, 0);

+ 29 - 0
crypto/ahash.c

@@ -588,6 +588,35 @@ int crypto_unregister_ahash(struct ahash_alg *alg)
 }
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_ahash);
 EXPORT_SYMBOL_GPL(crypto_unregister_ahash);
 
 
+int crypto_register_ahashes(struct ahash_alg *algs, int count)
+{
+	int i, ret;
+
+	for (i = 0; i < count; i++) {
+		ret = crypto_register_ahash(&algs[i]);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	for (--i; i >= 0; --i)
+		crypto_unregister_ahash(&algs[i]);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_register_ahashes);
+
+void crypto_unregister_ahashes(struct ahash_alg *algs, int count)
+{
+	int i;
+
+	for (i = count - 1; i >= 0; --i)
+		crypto_unregister_ahash(&algs[i]);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_ahashes);
+
 int ahash_register_instance(struct crypto_template *tmpl,
 int ahash_register_instance(struct crypto_template *tmpl,
 			    struct ahash_instance *inst)
 			    struct ahash_instance *inst)
 {
 {

+ 15 - 10
crypto/algapi.c

@@ -975,13 +975,15 @@ void crypto_inc(u8 *a, unsigned int size)
 }
 }
 EXPORT_SYMBOL_GPL(crypto_inc);
 EXPORT_SYMBOL_GPL(crypto_inc);
 
 
-void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
+void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
 {
 {
 	int relalign = 0;
 	int relalign = 0;
 
 
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
 		int size = sizeof(unsigned long);
 		int size = sizeof(unsigned long);
-		int d = ((unsigned long)dst ^ (unsigned long)src) & (size - 1);
+		int d = (((unsigned long)dst ^ (unsigned long)src1) |
+			 ((unsigned long)dst ^ (unsigned long)src2)) &
+			(size - 1);
 
 
 		relalign = d ? 1 << __ffs(d) : size;
 		relalign = d ? 1 << __ffs(d) : size;
 
 
@@ -992,34 +994,37 @@ void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
 		 * process the remainder of the input using optimal strides.
 		 * process the remainder of the input using optimal strides.
 		 */
 		 */
 		while (((unsigned long)dst & (relalign - 1)) && len > 0) {
 		while (((unsigned long)dst & (relalign - 1)) && len > 0) {
-			*dst++ ^= *src++;
+			*dst++ = *src1++ ^ *src2++;
 			len--;
 			len--;
 		}
 		}
 	}
 	}
 
 
 	while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
 	while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
-		*(u64 *)dst ^= *(u64 *)src;
+		*(u64 *)dst = *(u64 *)src1 ^  *(u64 *)src2;
 		dst += 8;
 		dst += 8;
-		src += 8;
+		src1 += 8;
+		src2 += 8;
 		len -= 8;
 		len -= 8;
 	}
 	}
 
 
 	while (len >= 4 && !(relalign & 3)) {
 	while (len >= 4 && !(relalign & 3)) {
-		*(u32 *)dst ^= *(u32 *)src;
+		*(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2;
 		dst += 4;
 		dst += 4;
-		src += 4;
+		src1 += 4;
+		src2 += 4;
 		len -= 4;
 		len -= 4;
 	}
 	}
 
 
 	while (len >= 2 && !(relalign & 1)) {
 	while (len >= 2 && !(relalign & 1)) {
-		*(u16 *)dst ^= *(u16 *)src;
+		*(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2;
 		dst += 2;
 		dst += 2;
-		src += 2;
+		src1 += 2;
+		src2 += 2;
 		len -= 2;
 		len -= 2;
 	}
 	}
 
 
 	while (len--)
 	while (len--)
-		*dst++ ^= *src++;
+		*dst++ = *src1++ ^ *src2++;
 }
 }
 EXPORT_SYMBOL_GPL(__crypto_xor);
 EXPORT_SYMBOL_GPL(__crypto_xor);
 
 

+ 250 - 614
crypto/algif_aead.c

@@ -5,88 +5,56 @@
  *
  *
  * This file provides the user-space API for AEAD ciphers.
  * This file provides the user-space API for AEAD ciphers.
  *
  *
- * This file is derived from algif_skcipher.c.
- *
  * This program is free software; you can redistribute it and/or modify it
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
  * under the terms of the GNU General Public License as published by the Free
  * Software Foundation; either version 2 of the License, or (at your option)
  * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
  * any later version.
+ *
+ * The following concept of the memory management is used:
+ *
+ * The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
+ * filled by user space with the data submitted via sendpage/sendmsg. Filling
+ * up the TX SGL does not cause a crypto operation -- the data will only be
+ * tracked by the kernel. Upon receipt of one recvmsg call, the caller must
+ * provide a buffer which is tracked with the RX SGL.
+ *
+ * During the processing of the recvmsg operation, the cipher request is
+ * allocated and prepared. As part of the recvmsg operation, the processed
+ * TX buffers are extracted from the TX SGL into a separate SGL.
+ *
+ * After the completion of the crypto operation, the RX SGL and the cipher
+ * request is released. The extracted TX SGL parts are released together with
+ * the RX SGL release.
  */
  */
 
 
 #include <crypto/internal/aead.h>
 #include <crypto/internal/aead.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/if_alg.h>
 #include <crypto/if_alg.h>
+#include <crypto/skcipher.h>
+#include <crypto/null.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
-#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/net.h>
 #include <net/sock.h>
 #include <net/sock.h>
 
 
-struct aead_sg_list {
-	unsigned int cur;
-	struct scatterlist sg[ALG_MAX_PAGES];
-};
-
-struct aead_async_rsgl {
-	struct af_alg_sgl sgl;
-	struct list_head list;
-};
-
-struct aead_async_req {
-	struct scatterlist *tsgl;
-	struct aead_async_rsgl first_rsgl;
-	struct list_head list;
-	struct kiocb *iocb;
-	struct sock *sk;
-	unsigned int tsgls;
-	char iv[];
-};
-
 struct aead_tfm {
 struct aead_tfm {
 	struct crypto_aead *aead;
 	struct crypto_aead *aead;
 	bool has_key;
 	bool has_key;
+	struct crypto_skcipher *null_tfm;
 };
 };
 
 
-struct aead_ctx {
-	struct aead_sg_list tsgl;
-	struct aead_async_rsgl first_rsgl;
-	struct list_head list;
-
-	void *iv;
-
-	struct af_alg_completion completion;
-
-	unsigned long used;
-
-	unsigned int len;
-	bool more;
-	bool merge;
-	bool enc;
-
-	size_t aead_assoclen;
-	struct aead_request aead_req;
-};
-
-static inline int aead_sndbuf(struct sock *sk)
+static inline bool aead_sufficient_data(struct sock *sk)
 {
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-
-	return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
-			  ctx->used, 0);
-}
-
-static inline bool aead_writable(struct sock *sk)
-{
-	return PAGE_SIZE <= aead_sndbuf(sk);
-}
-
-static inline bool aead_sufficient_data(struct aead_ctx *ctx)
-{
-	unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct aead_tfm *aeadc = pask->private;
+	struct crypto_aead *tfm = aeadc->aead;
+	unsigned int as = crypto_aead_authsize(tfm);
 
 
 	/*
 	/*
 	 * The minimum amount of memory needed for an AEAD cipher is
 	 * The minimum amount of memory needed for an AEAD cipher is
@@ -95,484 +63,58 @@ static inline bool aead_sufficient_data(struct aead_ctx *ctx)
 	return ctx->used >= ctx->aead_assoclen + (ctx->enc ? 0 : as);
 	return ctx->used >= ctx->aead_assoclen + (ctx->enc ? 0 : as);
 }
 }
 
 
-static void aead_reset_ctx(struct aead_ctx *ctx)
-{
-	struct aead_sg_list *sgl = &ctx->tsgl;
-
-	sg_init_table(sgl->sg, ALG_MAX_PAGES);
-	sgl->cur = 0;
-	ctx->used = 0;
-	ctx->more = 0;
-	ctx->merge = 0;
-}
-
-static void aead_put_sgl(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct aead_sg_list *sgl = &ctx->tsgl;
-	struct scatterlist *sg = sgl->sg;
-	unsigned int i;
-
-	for (i = 0; i < sgl->cur; i++) {
-		if (!sg_page(sg + i))
-			continue;
-
-		put_page(sg_page(sg + i));
-		sg_assign_page(sg + i, NULL);
-	}
-	aead_reset_ctx(ctx);
-}
-
-static void aead_wmem_wakeup(struct sock *sk)
-{
-	struct socket_wq *wq;
-
-	if (!aead_writable(sk))
-		return;
-
-	rcu_read_lock();
-	wq = rcu_dereference(sk->sk_wq);
-	if (skwq_has_sleeper(wq))
-		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
-							   POLLRDNORM |
-							   POLLRDBAND);
-	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-	rcu_read_unlock();
-}
-
-static int aead_wait_for_data(struct sock *sk, unsigned flags)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	long timeout;
-	int err = -ERESTARTSYS;
-
-	if (flags & MSG_DONTWAIT)
-		return -EAGAIN;
-
-	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-	add_wait_queue(sk_sleep(sk), &wait);
-	for (;;) {
-		if (signal_pending(current))
-			break;
-		timeout = MAX_SCHEDULE_TIMEOUT;
-		if (sk_wait_event(sk, &timeout, !ctx->more, &wait)) {
-			err = 0;
-			break;
-		}
-	}
-	remove_wait_queue(sk_sleep(sk), &wait);
-
-	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
-	return err;
-}
-
-static void aead_data_wakeup(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct socket_wq *wq;
-
-	if (ctx->more)
-		return;
-	if (!ctx->used)
-		return;
-
-	rcu_read_lock();
-	wq = rcu_dereference(sk->sk_wq);
-	if (skwq_has_sleeper(wq))
-		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
-							   POLLRDNORM |
-							   POLLRDBAND);
-	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
-	rcu_read_unlock();
-}
-
 static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	unsigned ivsize =
-		crypto_aead_ivsize(crypto_aead_reqtfm(&ctx->aead_req));
-	struct aead_sg_list *sgl = &ctx->tsgl;
-	struct af_alg_control con = {};
-	long copied = 0;
-	bool enc = 0;
-	bool init = 0;
-	int err = -EINVAL;
-
-	if (msg->msg_controllen) {
-		err = af_alg_cmsg_send(msg, &con);
-		if (err)
-			return err;
-
-		init = 1;
-		switch (con.op) {
-		case ALG_OP_ENCRYPT:
-			enc = 1;
-			break;
-		case ALG_OP_DECRYPT:
-			enc = 0;
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		if (con.iv && con.iv->ivlen != ivsize)
-			return -EINVAL;
-	}
-
-	lock_sock(sk);
-	if (!ctx->more && ctx->used)
-		goto unlock;
-
-	if (init) {
-		ctx->enc = enc;
-		if (con.iv)
-			memcpy(ctx->iv, con.iv->iv, ivsize);
-
-		ctx->aead_assoclen = con.aead_assoclen;
-	}
-
-	while (size) {
-		size_t len = size;
-		struct scatterlist *sg = NULL;
-
-		/* use the existing memory in an allocated page */
-		if (ctx->merge) {
-			sg = sgl->sg + sgl->cur - 1;
-			len = min_t(unsigned long, len,
-				    PAGE_SIZE - sg->offset - sg->length);
-			err = memcpy_from_msg(page_address(sg_page(sg)) +
-					      sg->offset + sg->length,
-					      msg, len);
-			if (err)
-				goto unlock;
-
-			sg->length += len;
-			ctx->merge = (sg->offset + sg->length) &
-				     (PAGE_SIZE - 1);
-
-			ctx->used += len;
-			copied += len;
-			size -= len;
-			continue;
-		}
-
-		if (!aead_writable(sk)) {
-			/* user space sent too much data */
-			aead_put_sgl(sk);
-			err = -EMSGSIZE;
-			goto unlock;
-		}
-
-		/* allocate a new page */
-		len = min_t(unsigned long, size, aead_sndbuf(sk));
-		while (len) {
-			size_t plen = 0;
-
-			if (sgl->cur >= ALG_MAX_PAGES) {
-				aead_put_sgl(sk);
-				err = -E2BIG;
-				goto unlock;
-			}
-
-			sg = sgl->sg + sgl->cur;
-			plen = min_t(size_t, len, PAGE_SIZE);
-
-			sg_assign_page(sg, alloc_page(GFP_KERNEL));
-			err = -ENOMEM;
-			if (!sg_page(sg))
-				goto unlock;
-
-			err = memcpy_from_msg(page_address(sg_page(sg)),
-					      msg, plen);
-			if (err) {
-				__free_page(sg_page(sg));
-				sg_assign_page(sg, NULL);
-				goto unlock;
-			}
-
-			sg->offset = 0;
-			sg->length = plen;
-			len -= plen;
-			ctx->used += plen;
-			copied += plen;
-			sgl->cur++;
-			size -= plen;
-			ctx->merge = plen & (PAGE_SIZE - 1);
-		}
-	}
-
-	err = 0;
-
-	ctx->more = msg->msg_flags & MSG_MORE;
-	if (!ctx->more && !aead_sufficient_data(ctx)) {
-		aead_put_sgl(sk);
-		err = -EMSGSIZE;
-	}
-
-unlock:
-	aead_data_wakeup(sk);
-	release_sock(sk);
-
-	return err ?: copied;
-}
-
-static ssize_t aead_sendpage(struct socket *sock, struct page *page,
-			     int offset, size_t size, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct aead_sg_list *sgl = &ctx->tsgl;
-	int err = -EINVAL;
-
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		flags |= MSG_MORE;
-
-	if (sgl->cur >= ALG_MAX_PAGES)
-		return -E2BIG;
-
-	lock_sock(sk);
-	if (!ctx->more && ctx->used)
-		goto unlock;
-
-	if (!size)
-		goto done;
-
-	if (!aead_writable(sk)) {
-		/* user space sent too much data */
-		aead_put_sgl(sk);
-		err = -EMSGSIZE;
-		goto unlock;
-	}
-
-	ctx->merge = 0;
-
-	get_page(page);
-	sg_set_page(sgl->sg + sgl->cur, page, size, offset);
-	sgl->cur++;
-	ctx->used += size;
-
-	err = 0;
-
-done:
-	ctx->more = flags & MSG_MORE;
-	if (!ctx->more && !aead_sufficient_data(ctx)) {
-		aead_put_sgl(sk);
-		err = -EMSGSIZE;
-	}
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
+	struct aead_tfm *aeadc = pask->private;
+	struct crypto_aead *tfm = aeadc->aead;
+	unsigned int ivsize = crypto_aead_ivsize(tfm);
 
 
-unlock:
-	aead_data_wakeup(sk);
-	release_sock(sk);
-
-	return err ?: size;
+	return af_alg_sendmsg(sock, msg, size, ivsize);
 }
 }
 
 
-#define GET_ASYM_REQ(req, tfm) (struct aead_async_req *) \
-		((char *)req + sizeof(struct aead_request) + \
-		 crypto_aead_reqsize(tfm))
-
- #define GET_REQ_SIZE(tfm) sizeof(struct aead_async_req) + \
-	crypto_aead_reqsize(tfm) + crypto_aead_ivsize(tfm) + \
-	sizeof(struct aead_request)
-
-static void aead_async_cb(struct crypto_async_request *_req, int err)
+static int crypto_aead_copy_sgl(struct crypto_skcipher *null_tfm,
+				struct scatterlist *src,
+				struct scatterlist *dst, unsigned int len)
 {
 {
-	struct aead_request *req = _req->data;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aead_async_req *areq = GET_ASYM_REQ(req, tfm);
-	struct sock *sk = areq->sk;
-	struct scatterlist *sg = areq->tsgl;
-	struct aead_async_rsgl *rsgl;
-	struct kiocb *iocb = areq->iocb;
-	unsigned int i, reqlen = GET_REQ_SIZE(tfm);
-
-	list_for_each_entry(rsgl, &areq->list, list) {
-		af_alg_free_sg(&rsgl->sgl);
-		if (rsgl != &areq->first_rsgl)
-			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
-	}
+	SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
 
 
-	for (i = 0; i < areq->tsgls; i++)
-		put_page(sg_page(sg + i));
+	skcipher_request_set_tfm(skreq, null_tfm);
+	skcipher_request_set_callback(skreq, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      NULL, NULL);
+	skcipher_request_set_crypt(skreq, src, dst, len, NULL);
 
 
-	sock_kfree_s(sk, areq->tsgl, sizeof(*areq->tsgl) * areq->tsgls);
-	sock_kfree_s(sk, req, reqlen);
-	__sock_put(sk);
-	iocb->ki_complete(iocb, err, err);
+	return crypto_skcipher_encrypt(skreq);
 }
 }
 
 
-static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
-			      int flags)
+static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
+			 size_t ignored, int flags)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(&ctx->aead_req);
-	struct aead_async_req *areq;
-	struct aead_request *req = NULL;
-	struct aead_sg_list *sgl = &ctx->tsgl;
-	struct aead_async_rsgl *last_rsgl = NULL, *rsgl;
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct aead_tfm *aeadc = pask->private;
+	struct crypto_aead *tfm = aeadc->aead;
+	struct crypto_skcipher *null_tfm = aeadc->null_tfm;
 	unsigned int as = crypto_aead_authsize(tfm);
 	unsigned int as = crypto_aead_authsize(tfm);
-	unsigned int i, reqlen = GET_REQ_SIZE(tfm);
-	int err = -ENOMEM;
-	unsigned long used;
-	size_t outlen = 0;
-	size_t usedpages = 0;
-
-	lock_sock(sk);
-	if (ctx->more) {
-		err = aead_wait_for_data(sk, flags);
-		if (err)
-			goto unlock;
-	}
-
-	if (!aead_sufficient_data(ctx))
-		goto unlock;
-
-	used = ctx->used;
-	if (ctx->enc)
-		outlen = used + as;
-	else
-		outlen = used - as;
-
-	req = sock_kmalloc(sk, reqlen, GFP_KERNEL);
-	if (unlikely(!req))
-		goto unlock;
-
-	areq = GET_ASYM_REQ(req, tfm);
-	memset(&areq->first_rsgl, '\0', sizeof(areq->first_rsgl));
-	INIT_LIST_HEAD(&areq->list);
-	areq->iocb = msg->msg_iocb;
-	areq->sk = sk;
-	memcpy(areq->iv, ctx->iv, crypto_aead_ivsize(tfm));
-	aead_request_set_tfm(req, tfm);
-	aead_request_set_ad(req, ctx->aead_assoclen);
-	aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-				  aead_async_cb, req);
-	used -= ctx->aead_assoclen;
-
-	/* take over all tx sgls from ctx */
-	areq->tsgl = sock_kmalloc(sk,
-				  sizeof(*areq->tsgl) * max_t(u32, sgl->cur, 1),
-				  GFP_KERNEL);
-	if (unlikely(!areq->tsgl))
-		goto free;
-
-	sg_init_table(areq->tsgl, max_t(u32, sgl->cur, 1));
-	for (i = 0; i < sgl->cur; i++)
-		sg_set_page(&areq->tsgl[i], sg_page(&sgl->sg[i]),
-			    sgl->sg[i].length, sgl->sg[i].offset);
-
-	areq->tsgls = sgl->cur;
-
-	/* create rx sgls */
-	while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) {
-		size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
-				      (outlen - usedpages));
-
-		if (list_empty(&areq->list)) {
-			rsgl = &areq->first_rsgl;
-
-		} else {
-			rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
-			if (unlikely(!rsgl)) {
-				err = -ENOMEM;
-				goto free;
-			}
-		}
-		rsgl->sgl.npages = 0;
-		list_add_tail(&rsgl->list, &areq->list);
-
-		/* make one iovec available as scatterlist */
-		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
-		if (err < 0)
-			goto free;
-
-		usedpages += err;
-
-		/* chain the new scatterlist with previous one */
-		if (last_rsgl)
-			af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
-
-		last_rsgl = rsgl;
-
-		iov_iter_advance(&msg->msg_iter, err);
-	}
-
-	/* ensure output buffer is sufficiently large */
-	if (usedpages < outlen) {
-		err = -EINVAL;
-		goto unlock;
-	}
-
-	aead_request_set_crypt(req, areq->tsgl, areq->first_rsgl.sgl.sg, used,
-			       areq->iv);
-	err = ctx->enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req);
-	if (err) {
-		if (err == -EINPROGRESS) {
-			sock_hold(sk);
-			err = -EIOCBQUEUED;
-			aead_reset_ctx(ctx);
-			goto unlock;
-		} else if (err == -EBADMSG) {
-			aead_put_sgl(sk);
-		}
-		goto free;
-	}
-	aead_put_sgl(sk);
-
-free:
-	list_for_each_entry(rsgl, &areq->list, list) {
-		af_alg_free_sg(&rsgl->sgl);
-		if (rsgl != &areq->first_rsgl)
-			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
-	}
-	if (areq->tsgl)
-		sock_kfree_s(sk, areq->tsgl, sizeof(*areq->tsgl) * areq->tsgls);
-	if (req)
-		sock_kfree_s(sk, req, reqlen);
-unlock:
-	aead_wmem_wakeup(sk);
-	release_sock(sk);
-	return err ? err : outlen;
-}
-
-static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
-	struct aead_sg_list *sgl = &ctx->tsgl;
-	struct aead_async_rsgl *last_rsgl = NULL;
-	struct aead_async_rsgl *rsgl, *tmp;
-	int err = -EINVAL;
-	unsigned long used = 0;
-	size_t outlen = 0;
-	size_t usedpages = 0;
-
-	lock_sock(sk);
+	struct af_alg_async_req *areq;
+	struct af_alg_tsgl *tsgl;
+	struct scatterlist *src;
+	int err = 0;
+	size_t used = 0;		/* [in]  TX bufs to be en/decrypted */
+	size_t outlen = 0;		/* [out] RX bufs produced by kernel */
+	size_t usedpages = 0;		/* [in]  RX bufs to be used from user */
+	size_t processed = 0;		/* [in]  TX bufs to be consumed */
 
 
 	/*
 	/*
-	 * Please see documentation of aead_request_set_crypt for the
-	 * description of the AEAD memory structure expected from the caller.
+	 * Data length provided by caller via sendmsg/sendpage that has not
+	 * yet been processed.
 	 */
 	 */
-
-	if (ctx->more) {
-		err = aead_wait_for_data(sk, flags);
-		if (err)
-			goto unlock;
-	}
-
-	/* data length provided by caller via sendmsg/sendpage */
 	used = ctx->used;
 	used = ctx->used;
 
 
 	/*
 	/*
@@ -584,8 +126,8 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
 	 * the error message in sendmsg/sendpage and still call recvmsg. This
 	 * the error message in sendmsg/sendpage and still call recvmsg. This
 	 * check here protects the kernel integrity.
 	 * check here protects the kernel integrity.
 	 */
 	 */
-	if (!aead_sufficient_data(ctx))
-		goto unlock;
+	if (!aead_sufficient_data(sk))
+		return -EINVAL;
 
 
 	/*
 	/*
 	 * Calculate the minimum output buffer size holding the result of the
 	 * Calculate the minimum output buffer size holding the result of the
@@ -606,104 +148,191 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
 	 */
 	 */
 	used -= ctx->aead_assoclen;
 	used -= ctx->aead_assoclen;
 
 
-	/* convert iovecs of output buffers into scatterlists */
-	while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) {
-		size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
-				      (outlen - usedpages));
-
-		if (list_empty(&ctx->list)) {
-			rsgl = &ctx->first_rsgl;
-		} else {
-			rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
-			if (unlikely(!rsgl)) {
-				err = -ENOMEM;
-				goto unlock;
-			}
-		}
-		rsgl->sgl.npages = 0;
-		list_add_tail(&rsgl->list, &ctx->list);
+	/* Allocate cipher request for current operation. */
+	areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
+				     crypto_aead_reqsize(tfm));
+	if (IS_ERR(areq))
+		return PTR_ERR(areq);
 
 
-		/* make one iovec available as scatterlist */
-		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
-		if (err < 0)
-			goto unlock;
-		usedpages += err;
-		/* chain the new scatterlist with previous one */
-		if (last_rsgl)
-			af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
+	/* convert iovecs of output buffers into RX SGL */
+	err = af_alg_get_rsgl(sk, msg, flags, areq, outlen, &usedpages);
+	if (err)
+		goto free;
 
 
-		last_rsgl = rsgl;
+	/*
+	 * Ensure output buffer is sufficiently large. If the caller provides
+	 * less buffer space, only use the relative required input size. This
+	 * allows AIO operation where the caller sent all data to be processed
+	 * and the AIO operation performs the operation on the different chunks
+	 * of the input data.
+	 */
+	if (usedpages < outlen) {
+		size_t less = outlen - usedpages;
 
 
-		iov_iter_advance(&msg->msg_iter, err);
+		if (used < less) {
+			err = -EINVAL;
+			goto free;
+		}
+		used -= less;
+		outlen -= less;
 	}
 	}
 
 
-	/* ensure output buffer is sufficiently large */
-	if (usedpages < outlen) {
-		err = -EINVAL;
-		goto unlock;
-	}
+	processed = used + ctx->aead_assoclen;
+	tsgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl, list);
 
 
-	sg_mark_end(sgl->sg + sgl->cur - 1);
-	aead_request_set_crypt(&ctx->aead_req, sgl->sg, ctx->first_rsgl.sgl.sg,
-			       used, ctx->iv);
-	aead_request_set_ad(&ctx->aead_req, ctx->aead_assoclen);
+	/*
+	 * Copy of AAD from source to destination
+	 *
+	 * The AAD is copied to the destination buffer without change. Even
+	 * when user space uses an in-place cipher operation, the kernel
+	 * will copy the data as it does not see whether such in-place operation
+	 * is initiated.
+	 *
+	 * To ensure efficiency, the following implementation ensure that the
+	 * ciphers are invoked to perform a crypto operation in-place. This
+	 * is achieved by memory management specified as follows.
+	 */
 
 
-	err = af_alg_wait_for_completion(ctx->enc ?
-					 crypto_aead_encrypt(&ctx->aead_req) :
-					 crypto_aead_decrypt(&ctx->aead_req),
-					 &ctx->completion);
+	/* Use the RX SGL as source (and destination) for crypto op. */
+	src = areq->first_rsgl.sgl.sg;
+
+	if (ctx->enc) {
+		/*
+		 * Encryption operation - The in-place cipher operation is
+		 * achieved by the following operation:
+		 *
+		 * TX SGL: AAD || PT
+		 *	    |	   |
+		 *	    | copy |
+		 *	    v	   v
+		 * RX SGL: AAD || PT || Tag
+		 */
+		err = crypto_aead_copy_sgl(null_tfm, tsgl->sg,
+					   areq->first_rsgl.sgl.sg, processed);
+		if (err)
+			goto free;
+		af_alg_pull_tsgl(sk, processed, NULL, 0);
+	} else {
+		/*
+		 * Decryption operation - To achieve an in-place cipher
+		 * operation, the following  SGL structure is used:
+		 *
+		 * TX SGL: AAD || CT || Tag
+		 *	    |	   |	 ^
+		 *	    | copy |	 | Create SGL link.
+		 *	    v	   v	 |
+		 * RX SGL: AAD || CT ----+
+		 */
+
+		 /* Copy AAD || CT to RX SGL buffer for in-place operation. */
+		err = crypto_aead_copy_sgl(null_tfm, tsgl->sg,
+					   areq->first_rsgl.sgl.sg, outlen);
+		if (err)
+			goto free;
 
 
-	if (err) {
-		/* EBADMSG implies a valid cipher operation took place */
-		if (err == -EBADMSG)
-			aead_put_sgl(sk);
+		/* Create TX SGL for tag and chain it to RX SGL. */
+		areq->tsgl_entries = af_alg_count_tsgl(sk, processed,
+						       processed - as);
+		if (!areq->tsgl_entries)
+			areq->tsgl_entries = 1;
+		areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) *
+					      areq->tsgl_entries,
+					  GFP_KERNEL);
+		if (!areq->tsgl) {
+			err = -ENOMEM;
+			goto free;
+		}
+		sg_init_table(areq->tsgl, areq->tsgl_entries);
+
+		/* Release TX SGL, except for tag data and reassign tag data. */
+		af_alg_pull_tsgl(sk, processed, areq->tsgl, processed - as);
+
+		/* chain the areq TX SGL holding the tag with RX SGL */
+		if (usedpages) {
+			/* RX SGL present */
+			struct af_alg_sgl *sgl_prev = &areq->last_rsgl->sgl;
+
+			sg_unmark_end(sgl_prev->sg + sgl_prev->npages - 1);
+			sg_chain(sgl_prev->sg, sgl_prev->npages + 1,
+				 areq->tsgl);
+		} else
+			/* no RX SGL present (e.g. authentication only) */
+			src = areq->tsgl;
+	}
 
 
-		goto unlock;
+	/* Initialize the crypto operation */
+	aead_request_set_crypt(&areq->cra_u.aead_req, src,
+			       areq->first_rsgl.sgl.sg, used, ctx->iv);
+	aead_request_set_ad(&areq->cra_u.aead_req, ctx->aead_assoclen);
+	aead_request_set_tfm(&areq->cra_u.aead_req, tfm);
+
+	if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) {
+		/* AIO operation */
+		areq->iocb = msg->msg_iocb;
+		aead_request_set_callback(&areq->cra_u.aead_req,
+					  CRYPTO_TFM_REQ_MAY_BACKLOG,
+					  af_alg_async_cb, areq);
+		err = ctx->enc ? crypto_aead_encrypt(&areq->cra_u.aead_req) :
+				 crypto_aead_decrypt(&areq->cra_u.aead_req);
+	} else {
+		/* Synchronous operation */
+		aead_request_set_callback(&areq->cra_u.aead_req,
+					  CRYPTO_TFM_REQ_MAY_BACKLOG,
+					  af_alg_complete, &ctx->completion);
+		err = af_alg_wait_for_completion(ctx->enc ?
+				crypto_aead_encrypt(&areq->cra_u.aead_req) :
+				crypto_aead_decrypt(&areq->cra_u.aead_req),
+						 &ctx->completion);
 	}
 	}
 
 
-	aead_put_sgl(sk);
-	err = 0;
+	/* AIO operation in progress */
+	if (err == -EINPROGRESS) {
+		sock_hold(sk);
 
 
-unlock:
-	list_for_each_entry_safe(rsgl, tmp, &ctx->list, list) {
-		af_alg_free_sg(&rsgl->sgl);
-		list_del(&rsgl->list);
-		if (rsgl != &ctx->first_rsgl)
-			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
+		/* Remember output size that will be generated. */
+		areq->outlen = outlen;
+
+		return -EIOCBQUEUED;
 	}
 	}
-	INIT_LIST_HEAD(&ctx->list);
-	aead_wmem_wakeup(sk);
-	release_sock(sk);
 
 
-	return err ? err : outlen;
-}
+free:
+	af_alg_free_areq_sgls(areq);
+	sock_kfree_s(sk, areq, areq->areqlen);
 
 
-static int aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t ignored,
-			int flags)
-{
-	return (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) ?
-		aead_recvmsg_async(sock, msg, flags) :
-		aead_recvmsg_sync(sock, msg, flags);
+	return err ? err : outlen;
 }
 }
 
 
-static unsigned int aead_poll(struct file *file, struct socket *sock,
-			      poll_table *wait)
+static int aead_recvmsg(struct socket *sock, struct msghdr *msg,
+			size_t ignored, int flags)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	unsigned int mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	int ret = 0;
 
 
-	if (!ctx->more)
-		mask |= POLLIN | POLLRDNORM;
+	lock_sock(sk);
+	while (msg_data_left(msg)) {
+		int err = _aead_recvmsg(sock, msg, ignored, flags);
+
+		/*
+		 * This error covers -EIOCBQUEUED which implies that we can
+		 * only handle one AIO request. If the caller wants to have
+		 * multiple AIO requests in parallel, he must make multiple
+		 * separate AIO calls.
+		 *
+		 * Also return the error if no data has been processed so far.
+		 */
+		if (err <= 0) {
+			if (err == -EIOCBQUEUED || err == -EBADMSG || !ret)
+				ret = err;
+			goto out;
+		}
 
 
-	if (aead_writable(sk))
-		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+		ret += err;
+	}
 
 
-	return mask;
+out:
+	af_alg_wmem_wakeup(sk);
+	release_sock(sk);
+	return ret;
 }
 }
 
 
 static struct proto_ops algif_aead_ops = {
 static struct proto_ops algif_aead_ops = {
@@ -723,9 +352,9 @@ static struct proto_ops algif_aead_ops = {
 
 
 	.release	=	af_alg_release,
 	.release	=	af_alg_release,
 	.sendmsg	=	aead_sendmsg,
 	.sendmsg	=	aead_sendmsg,
-	.sendpage	=	aead_sendpage,
+	.sendpage	=	af_alg_sendpage,
 	.recvmsg	=	aead_recvmsg,
 	.recvmsg	=	aead_recvmsg,
-	.poll		=	aead_poll,
+	.poll		=	af_alg_poll,
 };
 };
 
 
 static int aead_check_key(struct socket *sock)
 static int aead_check_key(struct socket *sock)
@@ -787,7 +416,7 @@ static ssize_t aead_sendpage_nokey(struct socket *sock, struct page *page,
 	if (err)
 	if (err)
 		return err;
 		return err;
 
 
-	return aead_sendpage(sock, page, offset, size, flags);
+	return af_alg_sendpage(sock, page, offset, size, flags);
 }
 }
 
 
 static int aead_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
 static int aead_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
@@ -821,13 +450,14 @@ static struct proto_ops algif_aead_ops_nokey = {
 	.sendmsg	=	aead_sendmsg_nokey,
 	.sendmsg	=	aead_sendmsg_nokey,
 	.sendpage	=	aead_sendpage_nokey,
 	.sendpage	=	aead_sendpage_nokey,
 	.recvmsg	=	aead_recvmsg_nokey,
 	.recvmsg	=	aead_recvmsg_nokey,
-	.poll		=	aead_poll,
+	.poll		=	af_alg_poll,
 };
 };
 
 
 static void *aead_bind(const char *name, u32 type, u32 mask)
 static void *aead_bind(const char *name, u32 type, u32 mask)
 {
 {
 	struct aead_tfm *tfm;
 	struct aead_tfm *tfm;
 	struct crypto_aead *aead;
 	struct crypto_aead *aead;
+	struct crypto_skcipher *null_tfm;
 
 
 	tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
 	tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
 	if (!tfm)
 	if (!tfm)
@@ -839,7 +469,15 @@ static void *aead_bind(const char *name, u32 type, u32 mask)
 		return ERR_CAST(aead);
 		return ERR_CAST(aead);
 	}
 	}
 
 
+	null_tfm = crypto_get_default_null_skcipher2();
+	if (IS_ERR(null_tfm)) {
+		crypto_free_aead(aead);
+		kfree(tfm);
+		return ERR_CAST(null_tfm);
+	}
+
 	tfm->aead = aead;
 	tfm->aead = aead;
+	tfm->null_tfm = null_tfm;
 
 
 	return tfm;
 	return tfm;
 }
 }
@@ -873,12 +511,15 @@ static int aead_setkey(void *private, const u8 *key, unsigned int keylen)
 static void aead_sock_destruct(struct sock *sk)
 static void aead_sock_destruct(struct sock *sk)
 {
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	unsigned int ivlen = crypto_aead_ivsize(
-				crypto_aead_reqtfm(&ctx->aead_req));
-
-	WARN_ON(refcount_read(&sk->sk_refcnt) != 0);
-	aead_put_sgl(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
+	struct aead_tfm *aeadc = pask->private;
+	struct crypto_aead *tfm = aeadc->aead;
+	unsigned int ivlen = crypto_aead_ivsize(tfm);
+
+	af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
+	crypto_put_default_null_skcipher2();
 	sock_kzfree_s(sk, ctx->iv, ivlen);
 	sock_kzfree_s(sk, ctx->iv, ivlen);
 	sock_kfree_s(sk, ctx, ctx->len);
 	sock_kfree_s(sk, ctx, ctx->len);
 	af_alg_release_parent(sk);
 	af_alg_release_parent(sk);
@@ -886,11 +527,11 @@ static void aead_sock_destruct(struct sock *sk)
 
 
 static int aead_accept_parent_nokey(void *private, struct sock *sk)
 static int aead_accept_parent_nokey(void *private, struct sock *sk)
 {
 {
-	struct aead_ctx *ctx;
+	struct af_alg_ctx *ctx;
 	struct alg_sock *ask = alg_sk(sk);
 	struct alg_sock *ask = alg_sk(sk);
 	struct aead_tfm *tfm = private;
 	struct aead_tfm *tfm = private;
 	struct crypto_aead *aead = tfm->aead;
 	struct crypto_aead *aead = tfm->aead;
-	unsigned int len = sizeof(*ctx) + crypto_aead_reqsize(aead);
+	unsigned int len = sizeof(*ctx);
 	unsigned int ivlen = crypto_aead_ivsize(aead);
 	unsigned int ivlen = crypto_aead_ivsize(aead);
 
 
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
@@ -905,23 +546,18 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk)
 	}
 	}
 	memset(ctx->iv, 0, ivlen);
 	memset(ctx->iv, 0, ivlen);
 
 
+	INIT_LIST_HEAD(&ctx->tsgl_list);
 	ctx->len = len;
 	ctx->len = len;
 	ctx->used = 0;
 	ctx->used = 0;
+	ctx->rcvused = 0;
 	ctx->more = 0;
 	ctx->more = 0;
 	ctx->merge = 0;
 	ctx->merge = 0;
 	ctx->enc = 0;
 	ctx->enc = 0;
-	ctx->tsgl.cur = 0;
 	ctx->aead_assoclen = 0;
 	ctx->aead_assoclen = 0;
 	af_alg_init_completion(&ctx->completion);
 	af_alg_init_completion(&ctx->completion);
-	sg_init_table(ctx->tsgl.sg, ALG_MAX_PAGES);
-	INIT_LIST_HEAD(&ctx->list);
 
 
 	ask->private = ctx;
 	ask->private = ctx;
 
 
-	aead_request_set_tfm(&ctx->aead_req, aead);
-	aead_request_set_callback(&ctx->aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-				  af_alg_complete, &ctx->completion);
-
 	sk->sk_destruct = aead_sock_destruct;
 	sk->sk_destruct = aead_sock_destruct;
 
 
 	return 0;
 	return 0;

+ 132 - 702
crypto/algif_skcipher.c

@@ -10,6 +10,21 @@
  * Software Foundation; either version 2 of the License, or (at your option)
  * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
  * any later version.
  *
  *
+ * The following concept of the memory management is used:
+ *
+ * The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
+ * filled by user space with the data submitted via sendpage/sendmsg. Filling
+ * up the TX SGL does not cause a crypto operation -- the data will only be
+ * tracked by the kernel. Upon receipt of one recvmsg call, the caller must
+ * provide a buffer which is tracked with the RX SGL.
+ *
+ * During the processing of the recvmsg operation, the cipher request is
+ * allocated and prepared. As part of the recvmsg operation, the processed
+ * TX buffers are extracted from the TX SGL into a separate SGL.
+ *
+ * After the completion of the crypto operation, the RX SGL and the cipher
+ * request is released. The extracted TX SGL parts are released together with
+ * the RX SGL release.
  */
  */
 
 
 #include <crypto/scatterwalk.h>
 #include <crypto/scatterwalk.h>
@@ -18,284 +33,16 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
-#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/net.h>
 #include <net/sock.h>
 #include <net/sock.h>
 
 
-struct skcipher_sg_list {
-	struct list_head list;
-
-	int cur;
-
-	struct scatterlist sg[0];
-};
-
 struct skcipher_tfm {
 struct skcipher_tfm {
 	struct crypto_skcipher *skcipher;
 	struct crypto_skcipher *skcipher;
 	bool has_key;
 	bool has_key;
 };
 };
 
 
-struct skcipher_ctx {
-	struct list_head tsgl;
-	struct af_alg_sgl rsgl;
-
-	void *iv;
-
-	struct af_alg_completion completion;
-
-	atomic_t inflight;
-	size_t used;
-
-	unsigned int len;
-	bool more;
-	bool merge;
-	bool enc;
-
-	struct skcipher_request req;
-};
-
-struct skcipher_async_rsgl {
-	struct af_alg_sgl sgl;
-	struct list_head list;
-};
-
-struct skcipher_async_req {
-	struct kiocb *iocb;
-	struct skcipher_async_rsgl first_sgl;
-	struct list_head list;
-	struct scatterlist *tsg;
-	atomic_t *inflight;
-	struct skcipher_request req;
-};
-
-#define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \
-		      sizeof(struct scatterlist) - 1)
-
-static void skcipher_free_async_sgls(struct skcipher_async_req *sreq)
-{
-	struct skcipher_async_rsgl *rsgl, *tmp;
-	struct scatterlist *sgl;
-	struct scatterlist *sg;
-	int i, n;
-
-	list_for_each_entry_safe(rsgl, tmp, &sreq->list, list) {
-		af_alg_free_sg(&rsgl->sgl);
-		if (rsgl != &sreq->first_sgl)
-			kfree(rsgl);
-	}
-	sgl = sreq->tsg;
-	n = sg_nents(sgl);
-	for_each_sg(sgl, sg, n, i) {
-		struct page *page = sg_page(sg);
-
-		/* some SGs may not have a page mapped */
-		if (page && page_ref_count(page))
-			put_page(page);
-	}
-
-	kfree(sreq->tsg);
-}
-
-static void skcipher_async_cb(struct crypto_async_request *req, int err)
-{
-	struct skcipher_async_req *sreq = req->data;
-	struct kiocb *iocb = sreq->iocb;
-
-	atomic_dec(sreq->inflight);
-	skcipher_free_async_sgls(sreq);
-	kzfree(sreq);
-	iocb->ki_complete(iocb, err, err);
-}
-
-static inline int skcipher_sndbuf(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-
-	return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
-			  ctx->used, 0);
-}
-
-static inline bool skcipher_writable(struct sock *sk)
-{
-	return PAGE_SIZE <= skcipher_sndbuf(sk);
-}
-
-static int skcipher_alloc_sgl(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_sg_list *sgl;
-	struct scatterlist *sg = NULL;
-
-	sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
-	if (!list_empty(&ctx->tsgl))
-		sg = sgl->sg;
-
-	if (!sg || sgl->cur >= MAX_SGL_ENTS) {
-		sgl = sock_kmalloc(sk, sizeof(*sgl) +
-				       sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1),
-				   GFP_KERNEL);
-		if (!sgl)
-			return -ENOMEM;
-
-		sg_init_table(sgl->sg, MAX_SGL_ENTS + 1);
-		sgl->cur = 0;
-
-		if (sg)
-			sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg);
-
-		list_add_tail(&sgl->list, &ctx->tsgl);
-	}
-
-	return 0;
-}
-
-static void skcipher_pull_sgl(struct sock *sk, size_t used, int put)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_sg_list *sgl;
-	struct scatterlist *sg;
-	int i;
-
-	while (!list_empty(&ctx->tsgl)) {
-		sgl = list_first_entry(&ctx->tsgl, struct skcipher_sg_list,
-				       list);
-		sg = sgl->sg;
-
-		for (i = 0; i < sgl->cur; i++) {
-			size_t plen = min_t(size_t, used, sg[i].length);
-
-			if (!sg_page(sg + i))
-				continue;
-
-			sg[i].length -= plen;
-			sg[i].offset += plen;
-
-			used -= plen;
-			ctx->used -= plen;
-
-			if (sg[i].length)
-				return;
-			if (put)
-				put_page(sg_page(sg + i));
-			sg_assign_page(sg + i, NULL);
-		}
-
-		list_del(&sgl->list);
-		sock_kfree_s(sk, sgl,
-			     sizeof(*sgl) + sizeof(sgl->sg[0]) *
-					    (MAX_SGL_ENTS + 1));
-	}
-
-	if (!ctx->used)
-		ctx->merge = 0;
-}
-
-static void skcipher_free_sgl(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-
-	skcipher_pull_sgl(sk, ctx->used, 1);
-}
-
-static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	int err = -ERESTARTSYS;
-	long timeout;
-
-	if (flags & MSG_DONTWAIT)
-		return -EAGAIN;
-
-	sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
-	add_wait_queue(sk_sleep(sk), &wait);
-	for (;;) {
-		if (signal_pending(current))
-			break;
-		timeout = MAX_SCHEDULE_TIMEOUT;
-		if (sk_wait_event(sk, &timeout, skcipher_writable(sk), &wait)) {
-			err = 0;
-			break;
-		}
-	}
-	remove_wait_queue(sk_sleep(sk), &wait);
-
-	return err;
-}
-
-static void skcipher_wmem_wakeup(struct sock *sk)
-{
-	struct socket_wq *wq;
-
-	if (!skcipher_writable(sk))
-		return;
-
-	rcu_read_lock();
-	wq = rcu_dereference(sk->sk_wq);
-	if (skwq_has_sleeper(wq))
-		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
-							   POLLRDNORM |
-							   POLLRDBAND);
-	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-	rcu_read_unlock();
-}
-
-static int skcipher_wait_for_data(struct sock *sk, unsigned flags)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	long timeout;
-	int err = -ERESTARTSYS;
-
-	if (flags & MSG_DONTWAIT) {
-		return -EAGAIN;
-	}
-
-	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
-	add_wait_queue(sk_sleep(sk), &wait);
-	for (;;) {
-		if (signal_pending(current))
-			break;
-		timeout = MAX_SCHEDULE_TIMEOUT;
-		if (sk_wait_event(sk, &timeout, ctx->used, &wait)) {
-			err = 0;
-			break;
-		}
-	}
-	remove_wait_queue(sk_sleep(sk), &wait);
-
-	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
-	return err;
-}
-
-static void skcipher_data_wakeup(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct socket_wq *wq;
-
-	if (!ctx->used)
-		return;
-
-	rcu_read_lock();
-	wq = rcu_dereference(sk->sk_wq);
-	if (skwq_has_sleeper(wq))
-		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
-							   POLLRDNORM |
-							   POLLRDBAND);
-	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
-	rcu_read_unlock();
-}
-
 static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 			    size_t size)
 			    size_t size)
 {
 {
@@ -303,445 +50,143 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 	struct alg_sock *ask = alg_sk(sk);
 	struct alg_sock *ask = alg_sk(sk);
 	struct sock *psk = ask->parent;
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
 	struct alg_sock *pask = alg_sk(psk);
-	struct skcipher_ctx *ctx = ask->private;
 	struct skcipher_tfm *skc = pask->private;
 	struct skcipher_tfm *skc = pask->private;
 	struct crypto_skcipher *tfm = skc->skcipher;
 	struct crypto_skcipher *tfm = skc->skcipher;
 	unsigned ivsize = crypto_skcipher_ivsize(tfm);
 	unsigned ivsize = crypto_skcipher_ivsize(tfm);
-	struct skcipher_sg_list *sgl;
-	struct af_alg_control con = {};
-	long copied = 0;
-	bool enc = 0;
-	bool init = 0;
-	int err;
-	int i;
-
-	if (msg->msg_controllen) {
-		err = af_alg_cmsg_send(msg, &con);
-		if (err)
-			return err;
-
-		init = 1;
-		switch (con.op) {
-		case ALG_OP_ENCRYPT:
-			enc = 1;
-			break;
-		case ALG_OP_DECRYPT:
-			enc = 0;
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		if (con.iv && con.iv->ivlen != ivsize)
-			return -EINVAL;
-	}
-
-	err = -EINVAL;
-
-	lock_sock(sk);
-	if (!ctx->more && ctx->used)
-		goto unlock;
-
-	if (init) {
-		ctx->enc = enc;
-		if (con.iv)
-			memcpy(ctx->iv, con.iv->iv, ivsize);
-	}
-
-	while (size) {
-		struct scatterlist *sg;
-		unsigned long len = size;
-		size_t plen;
-
-		if (ctx->merge) {
-			sgl = list_entry(ctx->tsgl.prev,
-					 struct skcipher_sg_list, list);
-			sg = sgl->sg + sgl->cur - 1;
-			len = min_t(unsigned long, len,
-				    PAGE_SIZE - sg->offset - sg->length);
-
-			err = memcpy_from_msg(page_address(sg_page(sg)) +
-					      sg->offset + sg->length,
-					      msg, len);
-			if (err)
-				goto unlock;
-
-			sg->length += len;
-			ctx->merge = (sg->offset + sg->length) &
-				     (PAGE_SIZE - 1);
-
-			ctx->used += len;
-			copied += len;
-			size -= len;
-			continue;
-		}
 
 
-		if (!skcipher_writable(sk)) {
-			err = skcipher_wait_for_wmem(sk, msg->msg_flags);
-			if (err)
-				goto unlock;
-		}
-
-		len = min_t(unsigned long, len, skcipher_sndbuf(sk));
-
-		err = skcipher_alloc_sgl(sk);
-		if (err)
-			goto unlock;
-
-		sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
-		sg = sgl->sg;
-		if (sgl->cur)
-			sg_unmark_end(sg + sgl->cur - 1);
-		do {
-			i = sgl->cur;
-			plen = min_t(size_t, len, PAGE_SIZE);
-
-			sg_assign_page(sg + i, alloc_page(GFP_KERNEL));
-			err = -ENOMEM;
-			if (!sg_page(sg + i))
-				goto unlock;
-
-			err = memcpy_from_msg(page_address(sg_page(sg + i)),
-					      msg, plen);
-			if (err) {
-				__free_page(sg_page(sg + i));
-				sg_assign_page(sg + i, NULL);
-				goto unlock;
-			}
-
-			sg[i].length = plen;
-			len -= plen;
-			ctx->used += plen;
-			copied += plen;
-			size -= plen;
-			sgl->cur++;
-		} while (len && sgl->cur < MAX_SGL_ENTS);
-
-		if (!size)
-			sg_mark_end(sg + sgl->cur - 1);
-
-		ctx->merge = plen & (PAGE_SIZE - 1);
-	}
-
-	err = 0;
-
-	ctx->more = msg->msg_flags & MSG_MORE;
-
-unlock:
-	skcipher_data_wakeup(sk);
-	release_sock(sk);
-
-	return copied ?: err;
+	return af_alg_sendmsg(sock, msg, size, ivsize);
 }
 }
 
 
-static ssize_t skcipher_sendpage(struct socket *sock, struct page *page,
-				 int offset, size_t size, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_sg_list *sgl;
-	int err = -EINVAL;
-
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		flags |= MSG_MORE;
-
-	lock_sock(sk);
-	if (!ctx->more && ctx->used)
-		goto unlock;
-
-	if (!size)
-		goto done;
-
-	if (!skcipher_writable(sk)) {
-		err = skcipher_wait_for_wmem(sk, flags);
-		if (err)
-			goto unlock;
-	}
-
-	err = skcipher_alloc_sgl(sk);
-	if (err)
-		goto unlock;
-
-	ctx->merge = 0;
-	sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
-
-	if (sgl->cur)
-		sg_unmark_end(sgl->sg + sgl->cur - 1);
-
-	sg_mark_end(sgl->sg + sgl->cur);
-	get_page(page);
-	sg_set_page(sgl->sg + sgl->cur, page, size, offset);
-	sgl->cur++;
-	ctx->used += size;
-
-done:
-	ctx->more = flags & MSG_MORE;
-
-unlock:
-	skcipher_data_wakeup(sk);
-	release_sock(sk);
-
-	return err ?: size;
-}
-
-static int skcipher_all_sg_nents(struct skcipher_ctx *ctx)
-{
-	struct skcipher_sg_list *sgl;
-	struct scatterlist *sg;
-	int nents = 0;
-
-	list_for_each_entry(sgl, &ctx->tsgl, list) {
-		sg = sgl->sg;
-
-		while (!sg->length)
-			sg++;
-
-		nents += sg_nents(sg);
-	}
-	return nents;
-}
-
-static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
-				  int flags)
+static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
+			     size_t ignored, int flags)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct alg_sock *ask = alg_sk(sk);
 	struct sock *psk = ask->parent;
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
 	struct alg_sock *pask = alg_sk(psk);
-	struct skcipher_ctx *ctx = ask->private;
+	struct af_alg_ctx *ctx = ask->private;
 	struct skcipher_tfm *skc = pask->private;
 	struct skcipher_tfm *skc = pask->private;
 	struct crypto_skcipher *tfm = skc->skcipher;
 	struct crypto_skcipher *tfm = skc->skcipher;
-	struct skcipher_sg_list *sgl;
-	struct scatterlist *sg;
-	struct skcipher_async_req *sreq;
-	struct skcipher_request *req;
-	struct skcipher_async_rsgl *last_rsgl = NULL;
-	unsigned int txbufs = 0, len = 0, tx_nents;
-	unsigned int reqsize = crypto_skcipher_reqsize(tfm);
-	unsigned int ivsize = crypto_skcipher_ivsize(tfm);
-	int err = -ENOMEM;
-	bool mark = false;
-	char *iv;
-
-	sreq = kzalloc(sizeof(*sreq) + reqsize + ivsize, GFP_KERNEL);
-	if (unlikely(!sreq))
-		goto out;
-
-	req = &sreq->req;
-	iv = (char *)(req + 1) + reqsize;
-	sreq->iocb = msg->msg_iocb;
-	INIT_LIST_HEAD(&sreq->list);
-	sreq->inflight = &ctx->inflight;
+	unsigned int bs = crypto_skcipher_blocksize(tfm);
+	struct af_alg_async_req *areq;
+	int err = 0;
+	size_t len = 0;
 
 
-	lock_sock(sk);
-	tx_nents = skcipher_all_sg_nents(ctx);
-	sreq->tsg = kcalloc(tx_nents, sizeof(*sg), GFP_KERNEL);
-	if (unlikely(!sreq->tsg))
-		goto unlock;
-	sg_init_table(sreq->tsg, tx_nents);
-	memcpy(iv, ctx->iv, ivsize);
-	skcipher_request_set_tfm(req, tfm);
-	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
-				      skcipher_async_cb, sreq);
-
-	while (iov_iter_count(&msg->msg_iter)) {
-		struct skcipher_async_rsgl *rsgl;
-		int used;
-
-		if (!ctx->used) {
-			err = skcipher_wait_for_data(sk, flags);
-			if (err)
-				goto free;
-		}
-		sgl = list_first_entry(&ctx->tsgl,
-				       struct skcipher_sg_list, list);
-		sg = sgl->sg;
-
-		while (!sg->length)
-			sg++;
-
-		used = min_t(unsigned long, ctx->used,
-			     iov_iter_count(&msg->msg_iter));
-		used = min_t(unsigned long, used, sg->length);
-
-		if (txbufs == tx_nents) {
-			struct scatterlist *tmp;
-			int x;
-			/* Ran out of tx slots in async request
-			 * need to expand */
-			tmp = kcalloc(tx_nents * 2, sizeof(*tmp),
-				      GFP_KERNEL);
-			if (!tmp) {
-				err = -ENOMEM;
-				goto free;
-			}
-
-			sg_init_table(tmp, tx_nents * 2);
-			for (x = 0; x < tx_nents; x++)
-				sg_set_page(&tmp[x], sg_page(&sreq->tsg[x]),
-					    sreq->tsg[x].length,
-					    sreq->tsg[x].offset);
-			kfree(sreq->tsg);
-			sreq->tsg = tmp;
-			tx_nents *= 2;
-			mark = true;
-		}
-		/* Need to take over the tx sgl from ctx
-		 * to the asynch req - these sgls will be freed later */
-		sg_set_page(sreq->tsg + txbufs++, sg_page(sg), sg->length,
-			    sg->offset);
-
-		if (list_empty(&sreq->list)) {
-			rsgl = &sreq->first_sgl;
-			list_add_tail(&rsgl->list, &sreq->list);
-		} else {
-			rsgl = kmalloc(sizeof(*rsgl), GFP_KERNEL);
-			if (!rsgl) {
-				err = -ENOMEM;
-				goto free;
-			}
-			list_add_tail(&rsgl->list, &sreq->list);
-		}
+	/* Allocate cipher request for current operation. */
+	areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
+				     crypto_skcipher_reqsize(tfm));
+	if (IS_ERR(areq))
+		return PTR_ERR(areq);
 
 
-		used = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, used);
-		err = used;
-		if (used < 0)
-			goto free;
-		if (last_rsgl)
-			af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
-
-		last_rsgl = rsgl;
-		len += used;
-		skcipher_pull_sgl(sk, used, 0);
-		iov_iter_advance(&msg->msg_iter, used);
+	/* convert iovecs of output buffers into RX SGL */
+	err = af_alg_get_rsgl(sk, msg, flags, areq, -1, &len);
+	if (err)
+		goto free;
+
+	/* Process only as much RX buffers for which we have TX data */
+	if (len > ctx->used)
+		len = ctx->used;
+
+	/*
+	 * If more buffers are to be expected to be processed, process only
+	 * full block size buffers.
+	 */
+	if (ctx->more || len < ctx->used)
+		len -= len % bs;
+
+	/*
+	 * Create a per request TX SGL for this request which tracks the
+	 * SG entries from the global TX SGL.
+	 */
+	areq->tsgl_entries = af_alg_count_tsgl(sk, len, 0);
+	if (!areq->tsgl_entries)
+		areq->tsgl_entries = 1;
+	areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * areq->tsgl_entries,
+				  GFP_KERNEL);
+	if (!areq->tsgl) {
+		err = -ENOMEM;
+		goto free;
+	}
+	sg_init_table(areq->tsgl, areq->tsgl_entries);
+	af_alg_pull_tsgl(sk, len, areq->tsgl, 0);
+
+	/* Initialize the crypto operation */
+	skcipher_request_set_tfm(&areq->cra_u.skcipher_req, tfm);
+	skcipher_request_set_crypt(&areq->cra_u.skcipher_req, areq->tsgl,
+				   areq->first_rsgl.sgl.sg, len, ctx->iv);
+
+	if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) {
+		/* AIO operation */
+		areq->iocb = msg->msg_iocb;
+		skcipher_request_set_callback(&areq->cra_u.skcipher_req,
+					      CRYPTO_TFM_REQ_MAY_SLEEP,
+					      af_alg_async_cb, areq);
+		err = ctx->enc ?
+			crypto_skcipher_encrypt(&areq->cra_u.skcipher_req) :
+			crypto_skcipher_decrypt(&areq->cra_u.skcipher_req);
+	} else {
+		/* Synchronous operation */
+		skcipher_request_set_callback(&areq->cra_u.skcipher_req,
+					      CRYPTO_TFM_REQ_MAY_SLEEP |
+					      CRYPTO_TFM_REQ_MAY_BACKLOG,
+					      af_alg_complete,
+					      &ctx->completion);
+		err = af_alg_wait_for_completion(ctx->enc ?
+			crypto_skcipher_encrypt(&areq->cra_u.skcipher_req) :
+			crypto_skcipher_decrypt(&areq->cra_u.skcipher_req),
+						 &ctx->completion);
 	}
 	}
 
 
-	if (mark)
-		sg_mark_end(sreq->tsg + txbufs - 1);
-
-	skcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg,
-				   len, iv);
-	err = ctx->enc ? crypto_skcipher_encrypt(req) :
-			 crypto_skcipher_decrypt(req);
+	/* AIO operation in progress */
 	if (err == -EINPROGRESS) {
 	if (err == -EINPROGRESS) {
-		atomic_inc(&ctx->inflight);
-		err = -EIOCBQUEUED;
-		sreq = NULL;
-		goto unlock;
+		sock_hold(sk);
+
+		/* Remember output size that will be generated. */
+		areq->outlen = len;
+
+		return -EIOCBQUEUED;
 	}
 	}
+
 free:
 free:
-	skcipher_free_async_sgls(sreq);
-unlock:
-	skcipher_wmem_wakeup(sk);
-	release_sock(sk);
-	kzfree(sreq);
-out:
-	return err;
+	af_alg_free_areq_sgls(areq);
+	sock_kfree_s(sk, areq, areq->areqlen);
+
+	return err ? err : len;
 }
 }
 
 
-static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
-				 int flags)
+static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
+			    size_t ignored, int flags)
 {
 {
 	struct sock *sk = sock->sk;
 	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct sock *psk = ask->parent;
-	struct alg_sock *pask = alg_sk(psk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_tfm *skc = pask->private;
-	struct crypto_skcipher *tfm = skc->skcipher;
-	unsigned bs = crypto_skcipher_blocksize(tfm);
-	struct skcipher_sg_list *sgl;
-	struct scatterlist *sg;
-	int err = -EAGAIN;
-	int used;
-	long copied = 0;
+	int ret = 0;
 
 
 	lock_sock(sk);
 	lock_sock(sk);
 	while (msg_data_left(msg)) {
 	while (msg_data_left(msg)) {
-		if (!ctx->used) {
-			err = skcipher_wait_for_data(sk, flags);
-			if (err)
-				goto unlock;
+		int err = _skcipher_recvmsg(sock, msg, ignored, flags);
+
+		/*
+		 * This error covers -EIOCBQUEUED which implies that we can
+		 * only handle one AIO request. If the caller wants to have
+		 * multiple AIO requests in parallel, he must make multiple
+		 * separate AIO calls.
+		 *
+		 * Also return the error if no data has been processed so far.
+		 */
+		if (err <= 0) {
+			if (err == -EIOCBQUEUED || !ret)
+				ret = err;
+			goto out;
 		}
 		}
 
 
-		used = min_t(unsigned long, ctx->used, msg_data_left(msg));
-
-		used = af_alg_make_sg(&ctx->rsgl, &msg->msg_iter, used);
-		err = used;
-		if (err < 0)
-			goto unlock;
-
-		if (ctx->more || used < ctx->used)
-			used -= used % bs;
-
-		err = -EINVAL;
-		if (!used)
-			goto free;
-
-		sgl = list_first_entry(&ctx->tsgl,
-				       struct skcipher_sg_list, list);
-		sg = sgl->sg;
-
-		while (!sg->length)
-			sg++;
-
-		skcipher_request_set_crypt(&ctx->req, sg, ctx->rsgl.sg, used,
-					   ctx->iv);
-
-		err = af_alg_wait_for_completion(
-				ctx->enc ?
-					crypto_skcipher_encrypt(&ctx->req) :
-					crypto_skcipher_decrypt(&ctx->req),
-				&ctx->completion);
-
-free:
-		af_alg_free_sg(&ctx->rsgl);
-
-		if (err)
-			goto unlock;
-
-		copied += used;
-		skcipher_pull_sgl(sk, used, 1);
-		iov_iter_advance(&msg->msg_iter, used);
+		ret += err;
 	}
 	}
 
 
-	err = 0;
-
-unlock:
-	skcipher_wmem_wakeup(sk);
+out:
+	af_alg_wmem_wakeup(sk);
 	release_sock(sk);
 	release_sock(sk);
-
-	return copied ?: err;
-}
-
-static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
-			    size_t ignored, int flags)
-{
-	return (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) ?
-		skcipher_recvmsg_async(sock, msg, flags) :
-		skcipher_recvmsg_sync(sock, msg, flags);
+	return ret;
 }
 }
 
 
-static unsigned int skcipher_poll(struct file *file, struct socket *sock,
-				  poll_table *wait)
-{
-	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	unsigned int mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
-
-	if (ctx->used)
-		mask |= POLLIN | POLLRDNORM;
-
-	if (skcipher_writable(sk))
-		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
-
-	return mask;
-}
 
 
 static struct proto_ops algif_skcipher_ops = {
 static struct proto_ops algif_skcipher_ops = {
 	.family		=	PF_ALG,
 	.family		=	PF_ALG,
@@ -760,9 +205,9 @@ static struct proto_ops algif_skcipher_ops = {
 
 
 	.release	=	af_alg_release,
 	.release	=	af_alg_release,
 	.sendmsg	=	skcipher_sendmsg,
 	.sendmsg	=	skcipher_sendmsg,
-	.sendpage	=	skcipher_sendpage,
+	.sendpage	=	af_alg_sendpage,
 	.recvmsg	=	skcipher_recvmsg,
 	.recvmsg	=	skcipher_recvmsg,
-	.poll		=	skcipher_poll,
+	.poll		=	af_alg_poll,
 };
 };
 
 
 static int skcipher_check_key(struct socket *sock)
 static int skcipher_check_key(struct socket *sock)
@@ -824,7 +269,7 @@ static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page,
 	if (err)
 	if (err)
 		return err;
 		return err;
 
 
-	return skcipher_sendpage(sock, page, offset, size, flags);
+	return af_alg_sendpage(sock, page, offset, size, flags);
 }
 }
 
 
 static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
 static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
@@ -858,7 +303,7 @@ static struct proto_ops algif_skcipher_ops_nokey = {
 	.sendmsg	=	skcipher_sendmsg_nokey,
 	.sendmsg	=	skcipher_sendmsg_nokey,
 	.sendpage	=	skcipher_sendpage_nokey,
 	.sendpage	=	skcipher_sendpage_nokey,
 	.recvmsg	=	skcipher_recvmsg_nokey,
 	.recvmsg	=	skcipher_recvmsg_nokey,
-	.poll		=	skcipher_poll,
+	.poll		=	af_alg_poll,
 };
 };
 
 
 static void *skcipher_bind(const char *name, u32 type, u32 mask)
 static void *skcipher_bind(const char *name, u32 type, u32 mask)
@@ -900,26 +345,16 @@ static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
 	return err;
 	return err;
 }
 }
 
 
-static void skcipher_wait(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	int ctr = 0;
-
-	while (atomic_read(&ctx->inflight) && ctr++ < 100)
-		msleep(100);
-}
-
 static void skcipher_sock_destruct(struct sock *sk)
 static void skcipher_sock_destruct(struct sock *sk)
 {
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req);
-
-	if (atomic_read(&ctx->inflight))
-		skcipher_wait(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
+	struct skcipher_tfm *skc = pask->private;
+	struct crypto_skcipher *tfm = skc->skcipher;
 
 
-	skcipher_free_sgl(sk);
+	af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
 	sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm));
 	sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm));
 	sock_kfree_s(sk, ctx, ctx->len);
 	sock_kfree_s(sk, ctx, ctx->len);
 	af_alg_release_parent(sk);
 	af_alg_release_parent(sk);
@@ -927,11 +362,11 @@ static void skcipher_sock_destruct(struct sock *sk)
 
 
 static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 {
 {
-	struct skcipher_ctx *ctx;
+	struct af_alg_ctx *ctx;
 	struct alg_sock *ask = alg_sk(sk);
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_tfm *tfm = private;
 	struct skcipher_tfm *tfm = private;
 	struct crypto_skcipher *skcipher = tfm->skcipher;
 	struct crypto_skcipher *skcipher = tfm->skcipher;
-	unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(skcipher);
+	unsigned int len = sizeof(*ctx);
 
 
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
 	if (!ctx)
 	if (!ctx)
@@ -946,22 +381,17 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 
 
 	memset(ctx->iv, 0, crypto_skcipher_ivsize(skcipher));
 	memset(ctx->iv, 0, crypto_skcipher_ivsize(skcipher));
 
 
-	INIT_LIST_HEAD(&ctx->tsgl);
+	INIT_LIST_HEAD(&ctx->tsgl_list);
 	ctx->len = len;
 	ctx->len = len;
 	ctx->used = 0;
 	ctx->used = 0;
+	ctx->rcvused = 0;
 	ctx->more = 0;
 	ctx->more = 0;
 	ctx->merge = 0;
 	ctx->merge = 0;
 	ctx->enc = 0;
 	ctx->enc = 0;
-	atomic_set(&ctx->inflight, 0);
 	af_alg_init_completion(&ctx->completion);
 	af_alg_init_completion(&ctx->completion);
 
 
 	ask->private = ctx;
 	ask->private = ctx;
 
 
-	skcipher_request_set_tfm(&ctx->req, skcipher);
-	skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_SLEEP |
-						 CRYPTO_TFM_REQ_MAY_BACKLOG,
-				      af_alg_complete, &ctx->completion);
-
 	sk->sk_destruct = skcipher_sock_destruct;
 	sk->sk_destruct = skcipher_sock_destruct;
 
 
 	return 0;
 	return 0;

+ 1 - 2
crypto/ctr.c

@@ -65,8 +65,7 @@ static void crypto_ctr_crypt_final(struct blkcipher_walk *walk,
 	unsigned int nbytes = walk->nbytes;
 	unsigned int nbytes = walk->nbytes;
 
 
 	crypto_cipher_encrypt_one(tfm, keystream, ctrblk);
 	crypto_cipher_encrypt_one(tfm, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 
 	crypto_inc(ctrblk, bsize);
 	crypto_inc(ctrblk, bsize);
 }
 }

+ 33 - 18
crypto/ecdh.c

@@ -20,8 +20,6 @@ struct ecdh_ctx {
 	unsigned int curve_id;
 	unsigned int curve_id;
 	unsigned int ndigits;
 	unsigned int ndigits;
 	u64 private_key[ECC_MAX_DIGITS];
 	u64 private_key[ECC_MAX_DIGITS];
-	u64 public_key[2 * ECC_MAX_DIGITS];
-	u64 shared_secret[ECC_MAX_DIGITS];
 };
 };
 
 
 static inline struct ecdh_ctx *ecdh_get_ctx(struct crypto_kpp *tfm)
 static inline struct ecdh_ctx *ecdh_get_ctx(struct crypto_kpp *tfm)
@@ -70,41 +68,58 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
 
 
 static int ecdh_compute_value(struct kpp_request *req)
 static int ecdh_compute_value(struct kpp_request *req)
 {
 {
-	int ret = 0;
 	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
 	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
 	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
 	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
-	size_t copied, nbytes;
+	u64 *public_key;
+	u64 *shared_secret = NULL;
 	void *buf;
 	void *buf;
+	size_t copied, nbytes, public_key_sz;
+	int ret = -ENOMEM;
 
 
 	nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
 	nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	/* Public part is a point thus it has both coordinates */
+	public_key_sz = 2 * nbytes;
+
+	public_key = kmalloc(public_key_sz, GFP_KERNEL);
+	if (!public_key)
+		return -ENOMEM;
 
 
 	if (req->src) {
 	if (req->src) {
-		copied = sg_copy_to_buffer(req->src, 1, ctx->public_key,
-					   2 * nbytes);
-		if (copied != 2 * nbytes)
-			return -EINVAL;
+		shared_secret = kmalloc(nbytes, GFP_KERNEL);
+		if (!shared_secret)
+			goto free_pubkey;
+
+		copied = sg_copy_to_buffer(req->src, 1, public_key,
+					   public_key_sz);
+		if (copied != public_key_sz) {
+			ret = -EINVAL;
+			goto free_all;
+		}
 
 
 		ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits,
 		ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits,
-						ctx->private_key,
-						ctx->public_key,
-						ctx->shared_secret);
+						ctx->private_key, public_key,
+						shared_secret);
 
 
-		buf = ctx->shared_secret;
+		buf = shared_secret;
 	} else {
 	} else {
 		ret = ecc_make_pub_key(ctx->curve_id, ctx->ndigits,
 		ret = ecc_make_pub_key(ctx->curve_id, ctx->ndigits,
-				       ctx->private_key, ctx->public_key);
-		buf = ctx->public_key;
-		/* Public part is a point thus it has both coordinates */
-		nbytes *= 2;
+				       ctx->private_key, public_key);
+		buf = public_key;
+		nbytes = public_key_sz;
 	}
 	}
 
 
 	if (ret < 0)
 	if (ret < 0)
-		return ret;
+		goto free_all;
 
 
 	copied = sg_copy_from_buffer(req->dst, 1, buf, nbytes);
 	copied = sg_copy_from_buffer(req->dst, 1, buf, nbytes);
 	if (copied != nbytes)
 	if (copied != nbytes)
-		return -EINVAL;
+		ret = -EINVAL;
 
 
+	/* fall through */
+free_all:
+	kzfree(shared_secret);
+free_pubkey:
+	kfree(public_key);
 	return ret;
 	return ret;
 }
 }
 
 

+ 4 - 8
crypto/pcbc.c

@@ -55,8 +55,7 @@ static int crypto_pcbc_encrypt_segment(struct skcipher_request *req,
 	do {
 	do {
 		crypto_xor(iv, src, bsize);
 		crypto_xor(iv, src, bsize);
 		crypto_cipher_encrypt_one(tfm, dst, iv);
 		crypto_cipher_encrypt_one(tfm, dst, iv);
-		memcpy(iv, dst, bsize);
-		crypto_xor(iv, src, bsize);
+		crypto_xor_cpy(iv, dst, src, bsize);
 
 
 		src += bsize;
 		src += bsize;
 		dst += bsize;
 		dst += bsize;
@@ -79,8 +78,7 @@ static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req,
 		memcpy(tmpbuf, src, bsize);
 		memcpy(tmpbuf, src, bsize);
 		crypto_xor(iv, src, bsize);
 		crypto_xor(iv, src, bsize);
 		crypto_cipher_encrypt_one(tfm, src, iv);
 		crypto_cipher_encrypt_one(tfm, src, iv);
-		memcpy(iv, tmpbuf, bsize);
-		crypto_xor(iv, src, bsize);
+		crypto_xor_cpy(iv, tmpbuf, src, bsize);
 
 
 		src += bsize;
 		src += bsize;
 	} while ((nbytes -= bsize) >= bsize);
 	} while ((nbytes -= bsize) >= bsize);
@@ -127,8 +125,7 @@ static int crypto_pcbc_decrypt_segment(struct skcipher_request *req,
 	do {
 	do {
 		crypto_cipher_decrypt_one(tfm, dst, src);
 		crypto_cipher_decrypt_one(tfm, dst, src);
 		crypto_xor(dst, iv, bsize);
 		crypto_xor(dst, iv, bsize);
-		memcpy(iv, src, bsize);
-		crypto_xor(iv, dst, bsize);
+		crypto_xor_cpy(iv, dst, src, bsize);
 
 
 		src += bsize;
 		src += bsize;
 		dst += bsize;
 		dst += bsize;
@@ -153,8 +150,7 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req,
 		memcpy(tmpbuf, src, bsize);
 		memcpy(tmpbuf, src, bsize);
 		crypto_cipher_decrypt_one(tfm, src, src);
 		crypto_cipher_decrypt_one(tfm, src, src);
 		crypto_xor(src, iv, bsize);
 		crypto_xor(src, iv, bsize);
-		memcpy(iv, tmpbuf, bsize);
-		crypto_xor(iv, src, bsize);
+		crypto_xor_cpy(iv, src, tmpbuf, bsize);
 
 
 		src += bsize;
 		src += bsize;
 	} while ((nbytes -= bsize) >= bsize);
 	} while ((nbytes -= bsize) >= bsize);

+ 4 - 2
crypto/rng.c

@@ -43,12 +43,14 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
 		if (!buf)
 		if (!buf)
 			return -ENOMEM;
 			return -ENOMEM;
 
 
-		get_random_bytes(buf, slen);
+		err = get_random_bytes_wait(buf, slen);
+		if (err)
+			goto out;
 		seed = buf;
 		seed = buf;
 	}
 	}
 
 
 	err = crypto_rng_alg(tfm)->seed(tfm, seed, slen);
 	err = crypto_rng_alg(tfm)->seed(tfm, seed, slen);
-
+out:
 	kzfree(buf);
 	kzfree(buf);
 	return err;
 	return err;
 }
 }

+ 22 - 33
crypto/scompress.c

@@ -65,11 +65,6 @@ static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
 	seq_puts(m, "type         : scomp\n");
 	seq_puts(m, "type         : scomp\n");
 }
 }
 
 
-static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
-{
-	return 0;
-}
-
 static void crypto_scomp_free_scratches(void * __percpu *scratches)
 static void crypto_scomp_free_scratches(void * __percpu *scratches)
 {
 {
 	int i;
 	int i;
@@ -125,12 +120,26 @@ static int crypto_scomp_alloc_all_scratches(void)
 		if (!scomp_src_scratches)
 		if (!scomp_src_scratches)
 			return -ENOMEM;
 			return -ENOMEM;
 		scomp_dst_scratches = crypto_scomp_alloc_scratches();
 		scomp_dst_scratches = crypto_scomp_alloc_scratches();
-		if (!scomp_dst_scratches)
+		if (!scomp_dst_scratches) {
+			crypto_scomp_free_scratches(scomp_src_scratches);
+			scomp_src_scratches = NULL;
 			return -ENOMEM;
 			return -ENOMEM;
+		}
 	}
 	}
 	return 0;
 	return 0;
 }
 }
 
 
+static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
+{
+	int ret;
+
+	mutex_lock(&scomp_lock);
+	ret = crypto_scomp_alloc_all_scratches();
+	mutex_unlock(&scomp_lock);
+
+	return ret;
+}
+
 static void crypto_scomp_sg_free(struct scatterlist *sgl)
 static void crypto_scomp_sg_free(struct scatterlist *sgl)
 {
 {
 	int i, n;
 	int i, n;
@@ -211,9 +220,7 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
 					      scratch_dst, &req->dlen, *ctx);
 					      scratch_dst, &req->dlen, *ctx);
 	if (!ret) {
 	if (!ret) {
 		if (!req->dst) {
 		if (!req->dst) {
-			req->dst = crypto_scomp_sg_alloc(req->dlen,
-				   req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
-				   GFP_KERNEL : GFP_ATOMIC);
+			req->dst = crypto_scomp_sg_alloc(req->dlen, GFP_ATOMIC);
 			if (!req->dst)
 			if (!req->dst)
 				goto out;
 				goto out;
 		}
 		}
@@ -240,6 +247,10 @@ static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm)
 	struct crypto_scomp **ctx = crypto_tfm_ctx(tfm);
 	struct crypto_scomp **ctx = crypto_tfm_ctx(tfm);
 
 
 	crypto_free_scomp(*ctx);
 	crypto_free_scomp(*ctx);
+
+	mutex_lock(&scomp_lock);
+	crypto_scomp_free_all_scratches();
+	mutex_unlock(&scomp_lock);
 }
 }
 
 
 int crypto_init_scomp_ops_async(struct crypto_tfm *tfm)
 int crypto_init_scomp_ops_async(struct crypto_tfm *tfm)
@@ -316,40 +327,18 @@ static const struct crypto_type crypto_scomp_type = {
 int crypto_register_scomp(struct scomp_alg *alg)
 int crypto_register_scomp(struct scomp_alg *alg)
 {
 {
 	struct crypto_alg *base = &alg->base;
 	struct crypto_alg *base = &alg->base;
-	int ret = -ENOMEM;
-
-	mutex_lock(&scomp_lock);
-	if (crypto_scomp_alloc_all_scratches())
-		goto error;
 
 
 	base->cra_type = &crypto_scomp_type;
 	base->cra_type = &crypto_scomp_type;
 	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
 	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
 	base->cra_flags |= CRYPTO_ALG_TYPE_SCOMPRESS;
 	base->cra_flags |= CRYPTO_ALG_TYPE_SCOMPRESS;
 
 
-	ret = crypto_register_alg(base);
-	if (ret)
-		goto error;
-
-	mutex_unlock(&scomp_lock);
-	return ret;
-
-error:
-	crypto_scomp_free_all_scratches();
-	mutex_unlock(&scomp_lock);
-	return ret;
+	return crypto_register_alg(base);
 }
 }
 EXPORT_SYMBOL_GPL(crypto_register_scomp);
 EXPORT_SYMBOL_GPL(crypto_register_scomp);
 
 
 int crypto_unregister_scomp(struct scomp_alg *alg)
 int crypto_unregister_scomp(struct scomp_alg *alg)
 {
 {
-	int ret;
-
-	mutex_lock(&scomp_lock);
-	ret = crypto_unregister_alg(&alg->base);
-	crypto_scomp_free_all_scratches();
-	mutex_unlock(&scomp_lock);
-
-	return ret;
+	return crypto_unregister_alg(&alg->base);
 }
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_scomp);
 EXPORT_SYMBOL_GPL(crypto_unregister_scomp);
 
 

+ 41 - 36
crypto/serpent_generic.c

@@ -229,6 +229,46 @@
 	x4 ^= x2;					\
 	x4 ^= x2;					\
 	})
 	})
 
 
+static void __serpent_setkey_sbox(u32 r0, u32 r1, u32 r2, u32 r3, u32 r4, u32 *k)
+{
+	k += 100;
+	S3(r3, r4, r0, r1, r2); store_and_load_keys(r1, r2, r4, r3, 28, 24);
+	S4(r1, r2, r4, r3, r0); store_and_load_keys(r2, r4, r3, r0, 24, 20);
+	S5(r2, r4, r3, r0, r1); store_and_load_keys(r1, r2, r4, r0, 20, 16);
+	S6(r1, r2, r4, r0, r3); store_and_load_keys(r4, r3, r2, r0, 16, 12);
+	S7(r4, r3, r2, r0, r1); store_and_load_keys(r1, r2, r0, r4, 12, 8);
+	S0(r1, r2, r0, r4, r3); store_and_load_keys(r0, r2, r4, r1, 8, 4);
+	S1(r0, r2, r4, r1, r3); store_and_load_keys(r3, r4, r1, r0, 4, 0);
+	S2(r3, r4, r1, r0, r2); store_and_load_keys(r2, r4, r3, r0, 0, -4);
+	S3(r2, r4, r3, r0, r1); store_and_load_keys(r0, r1, r4, r2, -4, -8);
+	S4(r0, r1, r4, r2, r3); store_and_load_keys(r1, r4, r2, r3, -8, -12);
+	S5(r1, r4, r2, r3, r0); store_and_load_keys(r0, r1, r4, r3, -12, -16);
+	S6(r0, r1, r4, r3, r2); store_and_load_keys(r4, r2, r1, r3, -16, -20);
+	S7(r4, r2, r1, r3, r0); store_and_load_keys(r0, r1, r3, r4, -20, -24);
+	S0(r0, r1, r3, r4, r2); store_and_load_keys(r3, r1, r4, r0, -24, -28);
+	k -= 50;
+	S1(r3, r1, r4, r0, r2); store_and_load_keys(r2, r4, r0, r3, 22, 18);
+	S2(r2, r4, r0, r3, r1); store_and_load_keys(r1, r4, r2, r3, 18, 14);
+	S3(r1, r4, r2, r3, r0); store_and_load_keys(r3, r0, r4, r1, 14, 10);
+	S4(r3, r0, r4, r1, r2); store_and_load_keys(r0, r4, r1, r2, 10, 6);
+	S5(r0, r4, r1, r2, r3); store_and_load_keys(r3, r0, r4, r2, 6, 2);
+	S6(r3, r0, r4, r2, r1); store_and_load_keys(r4, r1, r0, r2, 2, -2);
+	S7(r4, r1, r0, r2, r3); store_and_load_keys(r3, r0, r2, r4, -2, -6);
+	S0(r3, r0, r2, r4, r1); store_and_load_keys(r2, r0, r4, r3, -6, -10);
+	S1(r2, r0, r4, r3, r1); store_and_load_keys(r1, r4, r3, r2, -10, -14);
+	S2(r1, r4, r3, r2, r0); store_and_load_keys(r0, r4, r1, r2, -14, -18);
+	S3(r0, r4, r1, r2, r3); store_and_load_keys(r2, r3, r4, r0, -18, -22);
+	k -= 50;
+	S4(r2, r3, r4, r0, r1); store_and_load_keys(r3, r4, r0, r1, 28, 24);
+	S5(r3, r4, r0, r1, r2); store_and_load_keys(r2, r3, r4, r1, 24, 20);
+	S6(r2, r3, r4, r1, r0); store_and_load_keys(r4, r0, r3, r1, 20, 16);
+	S7(r4, r0, r3, r1, r2); store_and_load_keys(r2, r3, r1, r4, 16, 12);
+	S0(r2, r3, r1, r4, r0); store_and_load_keys(r1, r3, r4, r2, 12, 8);
+	S1(r1, r3, r4, r2, r0); store_and_load_keys(r0, r4, r2, r1, 8, 4);
+	S2(r0, r4, r2, r1, r3); store_and_load_keys(r3, r4, r0, r1, 4, 0);
+	S3(r3, r4, r0, r1, r2); storekeys(r1, r2, r4, r3, 0);
+}
+
 int __serpent_setkey(struct serpent_ctx *ctx, const u8 *key,
 int __serpent_setkey(struct serpent_ctx *ctx, const u8 *key,
 		     unsigned int keylen)
 		     unsigned int keylen)
 {
 {
@@ -395,42 +435,7 @@ int __serpent_setkey(struct serpent_ctx *ctx, const u8 *key,
 	keyiter(k[23], r1, r0, r3, 131, 31);
 	keyiter(k[23], r1, r0, r3, 131, 31);
 
 
 	/* Apply S-boxes */
 	/* Apply S-boxes */
-
-	S3(r3, r4, r0, r1, r2); store_and_load_keys(r1, r2, r4, r3, 28, 24);
-	S4(r1, r2, r4, r3, r0); store_and_load_keys(r2, r4, r3, r0, 24, 20);
-	S5(r2, r4, r3, r0, r1); store_and_load_keys(r1, r2, r4, r0, 20, 16);
-	S6(r1, r2, r4, r0, r3); store_and_load_keys(r4, r3, r2, r0, 16, 12);
-	S7(r4, r3, r2, r0, r1); store_and_load_keys(r1, r2, r0, r4, 12, 8);
-	S0(r1, r2, r0, r4, r3); store_and_load_keys(r0, r2, r4, r1, 8, 4);
-	S1(r0, r2, r4, r1, r3); store_and_load_keys(r3, r4, r1, r0, 4, 0);
-	S2(r3, r4, r1, r0, r2); store_and_load_keys(r2, r4, r3, r0, 0, -4);
-	S3(r2, r4, r3, r0, r1); store_and_load_keys(r0, r1, r4, r2, -4, -8);
-	S4(r0, r1, r4, r2, r3); store_and_load_keys(r1, r4, r2, r3, -8, -12);
-	S5(r1, r4, r2, r3, r0); store_and_load_keys(r0, r1, r4, r3, -12, -16);
-	S6(r0, r1, r4, r3, r2); store_and_load_keys(r4, r2, r1, r3, -16, -20);
-	S7(r4, r2, r1, r3, r0); store_and_load_keys(r0, r1, r3, r4, -20, -24);
-	S0(r0, r1, r3, r4, r2); store_and_load_keys(r3, r1, r4, r0, -24, -28);
-	k -= 50;
-	S1(r3, r1, r4, r0, r2); store_and_load_keys(r2, r4, r0, r3, 22, 18);
-	S2(r2, r4, r0, r3, r1); store_and_load_keys(r1, r4, r2, r3, 18, 14);
-	S3(r1, r4, r2, r3, r0); store_and_load_keys(r3, r0, r4, r1, 14, 10);
-	S4(r3, r0, r4, r1, r2); store_and_load_keys(r0, r4, r1, r2, 10, 6);
-	S5(r0, r4, r1, r2, r3); store_and_load_keys(r3, r0, r4, r2, 6, 2);
-	S6(r3, r0, r4, r2, r1); store_and_load_keys(r4, r1, r0, r2, 2, -2);
-	S7(r4, r1, r0, r2, r3); store_and_load_keys(r3, r0, r2, r4, -2, -6);
-	S0(r3, r0, r2, r4, r1); store_and_load_keys(r2, r0, r4, r3, -6, -10);
-	S1(r2, r0, r4, r3, r1); store_and_load_keys(r1, r4, r3, r2, -10, -14);
-	S2(r1, r4, r3, r2, r0); store_and_load_keys(r0, r4, r1, r2, -14, -18);
-	S3(r0, r4, r1, r2, r3); store_and_load_keys(r2, r3, r4, r0, -18, -22);
-	k -= 50;
-	S4(r2, r3, r4, r0, r1); store_and_load_keys(r3, r4, r0, r1, 28, 24);
-	S5(r3, r4, r0, r1, r2); store_and_load_keys(r2, r3, r4, r1, 24, 20);
-	S6(r2, r3, r4, r1, r0); store_and_load_keys(r4, r0, r3, r1, 20, 16);
-	S7(r4, r0, r3, r1, r2); store_and_load_keys(r2, r3, r1, r4, 16, 12);
-	S0(r2, r3, r1, r4, r0); store_and_load_keys(r1, r3, r4, r2, 12, 8);
-	S1(r1, r3, r4, r2, r0); store_and_load_keys(r0, r4, r2, r1, 8, 4);
-	S2(r0, r4, r2, r1, r3); store_and_load_keys(r3, r4, r0, r1, 4, 0);
-	S3(r3, r4, r0, r1, r2); storekeys(r1, r2, r4, r3, 0);
+	__serpent_setkey_sbox(r0, r1, r2, r3, r4, ctx->expkey);
 
 
 	return 0;
 	return 0;
 }
 }

+ 4 - 4
crypto/tcrypt.c

@@ -1404,9 +1404,9 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 		test_cipher_speed("lrw(aes)", DECRYPT, sec, NULL, 0,
 		test_cipher_speed("lrw(aes)", DECRYPT, sec, NULL, 0,
 				speed_template_32_40_48);
 				speed_template_32_40_48);
 		test_cipher_speed("xts(aes)", ENCRYPT, sec, NULL, 0,
 		test_cipher_speed("xts(aes)", ENCRYPT, sec, NULL, 0,
-				speed_template_32_48_64);
+				speed_template_32_64);
 		test_cipher_speed("xts(aes)", DECRYPT, sec, NULL, 0,
 		test_cipher_speed("xts(aes)", DECRYPT, sec, NULL, 0,
-				speed_template_32_48_64);
+				speed_template_32_64);
 		test_cipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0,
 		test_cipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0,
 				speed_template_16_24_32);
 				speed_template_16_24_32);
 		test_cipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0,
 		test_cipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0,
@@ -1837,9 +1837,9 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 		test_acipher_speed("lrw(aes)", DECRYPT, sec, NULL, 0,
 		test_acipher_speed("lrw(aes)", DECRYPT, sec, NULL, 0,
 				   speed_template_32_40_48);
 				   speed_template_32_40_48);
 		test_acipher_speed("xts(aes)", ENCRYPT, sec, NULL, 0,
 		test_acipher_speed("xts(aes)", ENCRYPT, sec, NULL, 0,
-				   speed_template_32_48_64);
+				   speed_template_32_64);
 		test_acipher_speed("xts(aes)", DECRYPT, sec, NULL, 0,
 		test_acipher_speed("xts(aes)", DECRYPT, sec, NULL, 0,
-				   speed_template_32_48_64);
+				   speed_template_32_64);
 		test_acipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0,
 		test_acipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0,
 				   speed_template_16_24_32);
 				   speed_template_16_24_32);
 		test_acipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0,
 		test_acipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0,

+ 16 - 4
drivers/char/hw_random/Kconfig

@@ -13,10 +13,8 @@ menuconfig HW_RANDOM
 	  that's usually called /dev/hwrng, and which exposes one
 	  that's usually called /dev/hwrng, and which exposes one
 	  of possibly several hardware random number generators.
 	  of possibly several hardware random number generators.
 
 
-	  These hardware random number generators do not feed directly
-	  into the kernel's random number generator.  That is usually
-	  handled by the "rngd" daemon.  Documentation/hw_random.txt
-	  has more information.
+	  These hardware random number generators do feed into the
+	  kernel's random number generator entropy pool.
 
 
 	  If unsure, say Y.
 	  If unsure, say Y.
 
 
@@ -255,6 +253,20 @@ config HW_RANDOM_MXC_RNGA
 
 
 	  If unsure, say Y.
 	  If unsure, say Y.
 
 
+config HW_RANDOM_IMX_RNGC
+	tristate "Freescale i.MX RNGC Random Number Generator"
+	depends on ARCH_MXC
+	default HW_RANDOM
+	---help---
+	  This driver provides kernel-side support for the Random Number
+	  Generator Version C hardware found on some Freescale i.MX
+	  processors. Version B is also supported by this driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called imx-rngc.
+
+	  If unsure, say Y.
+
 config HW_RANDOM_NOMADIK
 config HW_RANDOM_NOMADIK
 	tristate "ST-Ericsson Nomadik Random Number Generator support"
 	tristate "ST-Ericsson Nomadik Random Number Generator support"
 	depends on ARCH_NOMADIK
 	depends on ARCH_NOMADIK

+ 1 - 0
drivers/char/hw_random/Makefile

@@ -20,6 +20,7 @@ obj-$(CONFIG_HW_RANDOM_PASEMI) += pasemi-rng.o
 obj-$(CONFIG_HW_RANDOM_VIRTIO) += virtio-rng.o
 obj-$(CONFIG_HW_RANDOM_VIRTIO) += virtio-rng.o
 obj-$(CONFIG_HW_RANDOM_TX4939) += tx4939-rng.o
 obj-$(CONFIG_HW_RANDOM_TX4939) += tx4939-rng.o
 obj-$(CONFIG_HW_RANDOM_MXC_RNGA) += mxc-rnga.o
 obj-$(CONFIG_HW_RANDOM_MXC_RNGA) += mxc-rnga.o
+obj-$(CONFIG_HW_RANDOM_IMX_RNGC) += imx-rngc.o
 obj-$(CONFIG_HW_RANDOM_OCTEON) += octeon-rng.o
 obj-$(CONFIG_HW_RANDOM_OCTEON) += octeon-rng.o
 obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o
 obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o
 obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o
 obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o

+ 36 - 6
drivers/char/hw_random/core.c

@@ -28,7 +28,10 @@
 #define RNG_MODULE_NAME		"hw_random"
 #define RNG_MODULE_NAME		"hw_random"
 
 
 static struct hwrng *current_rng;
 static struct hwrng *current_rng;
+/* the current rng has been explicitly chosen by user via sysfs */
+static int cur_rng_set_by_user;
 static struct task_struct *hwrng_fill;
 static struct task_struct *hwrng_fill;
+/* list of registered rngs, sorted decending by quality */
 static LIST_HEAD(rng_list);
 static LIST_HEAD(rng_list);
 /* Protects rng_list and current_rng */
 /* Protects rng_list and current_rng */
 static DEFINE_MUTEX(rng_mutex);
 static DEFINE_MUTEX(rng_mutex);
@@ -303,6 +306,7 @@ static ssize_t hwrng_attr_current_store(struct device *dev,
 	list_for_each_entry(rng, &rng_list, list) {
 	list_for_each_entry(rng, &rng_list, list) {
 		if (sysfs_streq(rng->name, buf)) {
 		if (sysfs_streq(rng->name, buf)) {
 			err = 0;
 			err = 0;
+			cur_rng_set_by_user = 1;
 			if (rng != current_rng)
 			if (rng != current_rng)
 				err = set_current_rng(rng);
 				err = set_current_rng(rng);
 			break;
 			break;
@@ -351,16 +355,27 @@ static ssize_t hwrng_attr_available_show(struct device *dev,
 	return strlen(buf);
 	return strlen(buf);
 }
 }
 
 
+static ssize_t hwrng_attr_selected_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", cur_rng_set_by_user);
+}
+
 static DEVICE_ATTR(rng_current, S_IRUGO | S_IWUSR,
 static DEVICE_ATTR(rng_current, S_IRUGO | S_IWUSR,
 		   hwrng_attr_current_show,
 		   hwrng_attr_current_show,
 		   hwrng_attr_current_store);
 		   hwrng_attr_current_store);
 static DEVICE_ATTR(rng_available, S_IRUGO,
 static DEVICE_ATTR(rng_available, S_IRUGO,
 		   hwrng_attr_available_show,
 		   hwrng_attr_available_show,
 		   NULL);
 		   NULL);
+static DEVICE_ATTR(rng_selected, S_IRUGO,
+		   hwrng_attr_selected_show,
+		   NULL);
 
 
 static struct attribute *rng_dev_attrs[] = {
 static struct attribute *rng_dev_attrs[] = {
 	&dev_attr_rng_current.attr,
 	&dev_attr_rng_current.attr,
 	&dev_attr_rng_available.attr,
 	&dev_attr_rng_available.attr,
+	&dev_attr_rng_selected.attr,
 	NULL
 	NULL
 };
 };
 
 
@@ -417,6 +432,7 @@ int hwrng_register(struct hwrng *rng)
 {
 {
 	int err = -EINVAL;
 	int err = -EINVAL;
 	struct hwrng *old_rng, *tmp;
 	struct hwrng *old_rng, *tmp;
+	struct list_head *rng_list_ptr;
 
 
 	if (!rng->name || (!rng->data_read && !rng->read))
 	if (!rng->name || (!rng->data_read && !rng->read))
 		goto out;
 		goto out;
@@ -432,14 +448,27 @@ int hwrng_register(struct hwrng *rng)
 	init_completion(&rng->cleanup_done);
 	init_completion(&rng->cleanup_done);
 	complete(&rng->cleanup_done);
 	complete(&rng->cleanup_done);
 
 
+	/* rng_list is sorted by decreasing quality */
+	list_for_each(rng_list_ptr, &rng_list) {
+		tmp = list_entry(rng_list_ptr, struct hwrng, list);
+		if (tmp->quality < rng->quality)
+			break;
+	}
+	list_add_tail(&rng->list, rng_list_ptr);
+
 	old_rng = current_rng;
 	old_rng = current_rng;
 	err = 0;
 	err = 0;
-	if (!old_rng) {
+	if (!old_rng ||
+	    (!cur_rng_set_by_user && rng->quality > old_rng->quality)) {
+		/*
+		 * Set new rng as current as the new rng source
+		 * provides better entropy quality and was not
+		 * chosen by userspace.
+		 */
 		err = set_current_rng(rng);
 		err = set_current_rng(rng);
 		if (err)
 		if (err)
 			goto out_unlock;
 			goto out_unlock;
 	}
 	}
-	list_add_tail(&rng->list, &rng_list);
 
 
 	if (old_rng && !rng->init) {
 	if (old_rng && !rng->init) {
 		/*
 		/*
@@ -466,12 +495,13 @@ void hwrng_unregister(struct hwrng *rng)
 	list_del(&rng->list);
 	list_del(&rng->list);
 	if (current_rng == rng) {
 	if (current_rng == rng) {
 		drop_current_rng();
 		drop_current_rng();
+		cur_rng_set_by_user = 0;
+		/* rng_list is sorted by quality, use the best (=first) one */
 		if (!list_empty(&rng_list)) {
 		if (!list_empty(&rng_list)) {
-			struct hwrng *tail;
-
-			tail = list_entry(rng_list.prev, struct hwrng, list);
+			struct hwrng *new_rng;
 
 
-			set_current_rng(tail);
+			new_rng = list_entry(rng_list.next, struct hwrng, list);
+			set_current_rng(new_rng);
 		}
 		}
 	}
 	}
 
 

+ 331 - 0
drivers/char/hw_random/imx-rngc.c

@@ -0,0 +1,331 @@
+/*
+ * RNG driver for Freescale RNGC
+ *
+ * Copyright (C) 2008-2012 Freescale Semiconductor, Inc.
+ * Copyright (C) 2017 Martin Kaiser <martin@kaiser.cx>
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/hw_random.h>
+#include <linux/completion.h>
+#include <linux/io.h>
+
+#define RNGC_COMMAND			0x0004
+#define RNGC_CONTROL			0x0008
+#define RNGC_STATUS			0x000C
+#define RNGC_ERROR			0x0010
+#define RNGC_FIFO			0x0014
+
+#define RNGC_CMD_CLR_ERR		0x00000020
+#define RNGC_CMD_CLR_INT		0x00000010
+#define RNGC_CMD_SEED			0x00000002
+#define RNGC_CMD_SELF_TEST		0x00000001
+
+#define RNGC_CTRL_MASK_ERROR		0x00000040
+#define RNGC_CTRL_MASK_DONE		0x00000020
+
+#define RNGC_STATUS_ERROR		0x00010000
+#define RNGC_STATUS_FIFO_LEVEL_MASK	0x00000f00
+#define RNGC_STATUS_FIFO_LEVEL_SHIFT	8
+#define RNGC_STATUS_SEED_DONE		0x00000020
+#define RNGC_STATUS_ST_DONE		0x00000010
+
+#define RNGC_ERROR_STATUS_STAT_ERR	0x00000008
+
+#define RNGC_TIMEOUT  3000 /* 3 sec */
+
+
+static bool self_test = true;
+module_param(self_test, bool, 0);
+
+struct imx_rngc {
+	struct device		*dev;
+	struct clk		*clk;
+	void __iomem		*base;
+	struct hwrng		rng;
+	struct completion	rng_op_done;
+	/*
+	 * err_reg is written only by the irq handler and read only
+	 * when interrupts are masked, we need no spinlock
+	 */
+	u32			err_reg;
+};
+
+
+static inline void imx_rngc_irq_mask_clear(struct imx_rngc *rngc)
+{
+	u32 ctrl, cmd;
+
+	/* mask interrupts */
+	ctrl = readl(rngc->base + RNGC_CONTROL);
+	ctrl |= RNGC_CTRL_MASK_DONE | RNGC_CTRL_MASK_ERROR;
+	writel(ctrl, rngc->base + RNGC_CONTROL);
+
+	/*
+	 * CLR_INT clears the interrupt only if there's no error
+	 * CLR_ERR clear the interrupt and the error register if there
+	 * is an error
+	 */
+	cmd = readl(rngc->base + RNGC_COMMAND);
+	cmd |= RNGC_CMD_CLR_INT | RNGC_CMD_CLR_ERR;
+	writel(cmd, rngc->base + RNGC_COMMAND);
+}
+
+static inline void imx_rngc_irq_unmask(struct imx_rngc *rngc)
+{
+	u32 ctrl;
+
+	ctrl = readl(rngc->base + RNGC_CONTROL);
+	ctrl &= ~(RNGC_CTRL_MASK_DONE | RNGC_CTRL_MASK_ERROR);
+	writel(ctrl, rngc->base + RNGC_CONTROL);
+}
+
+static int imx_rngc_self_test(struct imx_rngc *rngc)
+{
+	u32 cmd;
+	int ret;
+
+	imx_rngc_irq_unmask(rngc);
+
+	/* run self test */
+	cmd = readl(rngc->base + RNGC_COMMAND);
+	writel(cmd | RNGC_CMD_SELF_TEST, rngc->base + RNGC_COMMAND);
+
+	ret = wait_for_completion_timeout(&rngc->rng_op_done, RNGC_TIMEOUT);
+	if (!ret) {
+		imx_rngc_irq_mask_clear(rngc);
+		return -ETIMEDOUT;
+	}
+
+	if (rngc->err_reg != 0)
+		return -EIO;
+
+	return 0;
+}
+
+static int imx_rngc_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+	struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng);
+	unsigned int status;
+	unsigned int level;
+	int retval = 0;
+
+	while (max >= sizeof(u32)) {
+		status = readl(rngc->base + RNGC_STATUS);
+
+		/* is there some error while reading this random number? */
+		if (status & RNGC_STATUS_ERROR)
+			break;
+
+		/* how many random numbers are in FIFO? [0-16] */
+		level = (status & RNGC_STATUS_FIFO_LEVEL_MASK) >>
+			RNGC_STATUS_FIFO_LEVEL_SHIFT;
+
+		if (level) {
+			/* retrieve a random number from FIFO */
+			*(u32 *)data = readl(rngc->base + RNGC_FIFO);
+
+			retval += sizeof(u32);
+			data += sizeof(u32);
+			max -= sizeof(u32);
+		}
+	}
+
+	return retval ? retval : -EIO;
+}
+
+static irqreturn_t imx_rngc_irq(int irq, void *priv)
+{
+	struct imx_rngc *rngc = (struct imx_rngc *)priv;
+	u32 status;
+
+	/*
+	 * clearing the interrupt will also clear the error register
+	 * read error and status before clearing
+	 */
+	status = readl(rngc->base + RNGC_STATUS);
+	rngc->err_reg = readl(rngc->base + RNGC_ERROR);
+
+	imx_rngc_irq_mask_clear(rngc);
+
+	if (status & (RNGC_STATUS_SEED_DONE | RNGC_STATUS_ST_DONE))
+		complete(&rngc->rng_op_done);
+
+	return IRQ_HANDLED;
+}
+
+static int imx_rngc_init(struct hwrng *rng)
+{
+	struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng);
+	u32 cmd;
+	int ret;
+
+	/* clear error */
+	cmd = readl(rngc->base + RNGC_COMMAND);
+	writel(cmd | RNGC_CMD_CLR_ERR, rngc->base + RNGC_COMMAND);
+
+	/* create seed, repeat while there is some statistical error */
+	do {
+		imx_rngc_irq_unmask(rngc);
+
+		/* seed creation */
+		cmd = readl(rngc->base + RNGC_COMMAND);
+		writel(cmd | RNGC_CMD_SEED, rngc->base + RNGC_COMMAND);
+
+		ret = wait_for_completion_timeout(&rngc->rng_op_done,
+				RNGC_TIMEOUT);
+
+		if (!ret) {
+			imx_rngc_irq_mask_clear(rngc);
+			return -ETIMEDOUT;
+		}
+
+	} while (rngc->err_reg == RNGC_ERROR_STATUS_STAT_ERR);
+
+	return rngc->err_reg ? -EIO : 0;
+}
+
+static int imx_rngc_probe(struct platform_device *pdev)
+{
+	struct imx_rngc *rngc;
+	struct resource *res;
+	int ret;
+	int irq;
+
+	rngc = devm_kzalloc(&pdev->dev, sizeof(*rngc), GFP_KERNEL);
+	if (!rngc)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	rngc->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(rngc->base))
+		return PTR_ERR(rngc->base);
+
+	rngc->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(rngc->clk)) {
+		dev_err(&pdev->dev, "Can not get rng_clk\n");
+		return PTR_ERR(rngc->clk);
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		dev_err(&pdev->dev, "Couldn't get irq %d\n", irq);
+		return irq;
+	}
+
+	ret = clk_prepare_enable(rngc->clk);
+	if (ret)
+		return ret;
+
+	ret = devm_request_irq(&pdev->dev,
+			irq, imx_rngc_irq, 0, pdev->name, (void *)rngc);
+	if (ret) {
+		dev_err(rngc->dev, "Can't get interrupt working.\n");
+		goto err;
+	}
+
+	init_completion(&rngc->rng_op_done);
+
+	rngc->rng.name = pdev->name;
+	rngc->rng.init = imx_rngc_init;
+	rngc->rng.read = imx_rngc_read;
+
+	rngc->dev = &pdev->dev;
+	platform_set_drvdata(pdev, rngc);
+
+	imx_rngc_irq_mask_clear(rngc);
+
+	if (self_test) {
+		ret = imx_rngc_self_test(rngc);
+		if (ret) {
+			dev_err(rngc->dev, "FSL RNGC self test failed.\n");
+			goto err;
+		}
+	}
+
+	ret = hwrng_register(&rngc->rng);
+	if (ret) {
+		dev_err(&pdev->dev, "FSL RNGC registering failed (%d)\n", ret);
+		goto err;
+	}
+
+	dev_info(&pdev->dev, "Freescale RNGC registered.\n");
+	return 0;
+
+err:
+	clk_disable_unprepare(rngc->clk);
+
+	return ret;
+}
+
+static int __exit imx_rngc_remove(struct platform_device *pdev)
+{
+	struct imx_rngc *rngc = platform_get_drvdata(pdev);
+
+	hwrng_unregister(&rngc->rng);
+
+	clk_disable_unprepare(rngc->clk);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int imx_rngc_suspend(struct device *dev)
+{
+	struct imx_rngc *rngc = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(rngc->clk);
+
+	return 0;
+}
+
+static int imx_rngc_resume(struct device *dev)
+{
+	struct imx_rngc *rngc = dev_get_drvdata(dev);
+
+	clk_prepare_enable(rngc->clk);
+
+	return 0;
+}
+
+static const struct dev_pm_ops imx_rngc_pm_ops = {
+	.suspend	= imx_rngc_suspend,
+	.resume		= imx_rngc_resume,
+};
+#endif
+
+static const struct of_device_id imx_rngc_dt_ids[] = {
+	{ .compatible = "fsl,imx25-rngb", .data = NULL, },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, imx_rngc_dt_ids);
+
+static struct platform_driver imx_rngc_driver = {
+	.driver = {
+		.name = "imx_rngc",
+#ifdef CONFIG_PM
+		.pm = &imx_rngc_pm_ops,
+#endif
+		.of_match_table = imx_rngc_dt_ids,
+	},
+	.remove = __exit_p(imx_rngc_remove),
+};
+
+module_platform_driver_probe(imx_rngc_driver, imx_rngc_probe);
+
+MODULE_AUTHOR("Freescale Semiconductor, Inc.");
+MODULE_DESCRIPTION("H/W RNGC driver for i.MX");
+MODULE_LICENSE("GPL");

+ 46 - 3
drivers/crypto/Kconfig

@@ -525,12 +525,26 @@ config CRYPTO_DEV_ATMEL_SHA
 	  To compile this driver as a module, choose M here: the module
 	  To compile this driver as a module, choose M here: the module
 	  will be called atmel-sha.
 	  will be called atmel-sha.
 
 
+config CRYPTO_DEV_ATMEL_ECC
+	tristate "Support for Microchip / Atmel ECC hw accelerator"
+	depends on ARCH_AT91 || COMPILE_TEST
+	depends on I2C
+	select CRYPTO_ECDH
+	select CRC16
+	help
+	  Microhip / Atmel ECC hw accelerator.
+	  Select this if you want to use the Microchip / Atmel module for
+	  ECDH algorithm.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called atmel-ecc.
+
 config CRYPTO_DEV_CCP
 config CRYPTO_DEV_CCP
-	bool "Support for AMD Cryptographic Coprocessor"
+	bool "Support for AMD Secure Processor"
 	depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM
 	depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM
 	help
 	help
-	  The AMD Cryptographic Coprocessor provides hardware offload support
-	  for encryption, hashing and related operations.
+	  The AMD Secure Processor provides support for the Cryptographic Coprocessor
+	  (CCP) and the Platform Security Processor (PSP) devices.
 
 
 if CRYPTO_DEV_CCP
 if CRYPTO_DEV_CCP
 	source "drivers/crypto/ccp/Kconfig"
 	source "drivers/crypto/ccp/Kconfig"
@@ -616,6 +630,14 @@ config CRYPTO_DEV_SUN4I_SS
 	  To compile this driver as a module, choose M here: the module
 	  To compile this driver as a module, choose M here: the module
 	  will be called sun4i-ss.
 	  will be called sun4i-ss.
 
 
+config CRYPTO_DEV_SUN4I_SS_PRNG
+	bool "Support for Allwinner Security System PRNG"
+	depends on CRYPTO_DEV_SUN4I_SS
+	select CRYPTO_RNG
+	help
+	  Select this option if you want to provide kernel-side support for
+	  the Pseudo-Random Number Generator found in the Security System.
+
 config CRYPTO_DEV_ROCKCHIP
 config CRYPTO_DEV_ROCKCHIP
 	tristate "Rockchip's Cryptographic Engine driver"
 	tristate "Rockchip's Cryptographic Engine driver"
 	depends on OF && ARCH_ROCKCHIP
 	depends on OF && ARCH_ROCKCHIP
@@ -686,4 +708,25 @@ config CRYPTO_DEV_SAFEXCEL
 	  chain mode, AES cipher mode and SHA1/SHA224/SHA256/SHA512 hash
 	  chain mode, AES cipher mode and SHA1/SHA224/SHA256/SHA512 hash
 	  algorithms.
 	  algorithms.
 
 
+config CRYPTO_DEV_ARTPEC6
+	tristate "Support for Axis ARTPEC-6/7 hardware crypto acceleration."
+	depends on ARM && (ARCH_ARTPEC || COMPILE_TEST)
+	depends on HAS_DMA
+	depends on OF
+	select CRYPTO_AEAD
+	select CRYPTO_AES
+	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_CTR
+	select CRYPTO_HASH
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	select CRYPTO_SHA384
+	select CRYPTO_SHA512
+	help
+	  Enables the driver for the on-chip crypto accelerator
+	  of Axis ARTPEC SoCs.
+
+	  To compile this driver as a module, choose M here.
+
 endif # CRYPTO_HW
 endif # CRYPTO_HW

+ 3 - 1
drivers/crypto/Makefile

@@ -1,6 +1,7 @@
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
+obj-$(CONFIG_CRYPTO_DEV_ATMEL_ECC) += atmel-ecc.o
 obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o
 obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o
 obj-$(CONFIG_CRYPTO_DEV_CAVIUM_ZIP) += cavium/
 obj-$(CONFIG_CRYPTO_DEV_CAVIUM_ZIP) += cavium/
 obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
 obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
@@ -35,7 +36,7 @@ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
 obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
 obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
 obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
 obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
 obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
 obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
-obj-$(CONFIG_CRYPTO_DEV_STM32) += stm32/
+obj-$(CONFIG_ARCH_STM32) += stm32/
 obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/
 obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
 obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
@@ -43,3 +44,4 @@ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
 obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
 obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
 obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
 obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
 obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/
 obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/
+obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/

+ 781 - 0
drivers/crypto/atmel-ecc.c

@@ -0,0 +1,781 @@
+/*
+ * Microchip / Atmel ECC (I2C) driver.
+ *
+ * Copyright (c) 2017, Microchip Technology Inc.
+ * Author: Tudor Ambarus <tudor.ambarus@microchip.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/bitrev.h>
+#include <linux/crc16.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/ecdh.h>
+#include <crypto/kpp.h>
+#include "atmel-ecc.h"
+
+/* Used for binding tfm objects to i2c clients. */
+struct atmel_ecc_driver_data {
+	struct list_head i2c_client_list;
+	spinlock_t i2c_list_lock;
+} ____cacheline_aligned;
+
+static struct atmel_ecc_driver_data driver_data;
+
+/**
+ * atmel_ecc_i2c_client_priv - i2c_client private data
+ * @client              : pointer to i2c client device
+ * @i2c_client_list_node: part of i2c_client_list
+ * @lock                : lock for sending i2c commands
+ * @wake_token          : wake token array of zeros
+ * @wake_token_sz       : size in bytes of the wake_token
+ * @tfm_count           : number of active crypto transformations on i2c client
+ *
+ * Reads and writes from/to the i2c client are sequential. The first byte
+ * transmitted to the device is treated as the byte size. Any attempt to send
+ * more than this number of bytes will cause the device to not ACK those bytes.
+ * After the host writes a single command byte to the input buffer, reads are
+ * prohibited until after the device completes command execution. Use a mutex
+ * when sending i2c commands.
+ */
+struct atmel_ecc_i2c_client_priv {
+	struct i2c_client *client;
+	struct list_head i2c_client_list_node;
+	struct mutex lock;
+	u8 wake_token[WAKE_TOKEN_MAX_SIZE];
+	size_t wake_token_sz;
+	atomic_t tfm_count ____cacheline_aligned;
+};
+
+/**
+ * atmel_ecdh_ctx - transformation context
+ * @client     : pointer to i2c client device
+ * @fallback   : used for unsupported curves or when user wants to use its own
+ *               private key.
+ * @public_key : generated when calling set_secret(). It's the responsibility
+ *               of the user to not call set_secret() while
+ *               generate_public_key() or compute_shared_secret() are in flight.
+ * @curve_id   : elliptic curve id
+ * @n_sz       : size in bytes of the n prime
+ * @do_fallback: true when the device doesn't support the curve or when the user
+ *               wants to use its own private key.
+ */
+struct atmel_ecdh_ctx {
+	struct i2c_client *client;
+	struct crypto_kpp *fallback;
+	const u8 *public_key;
+	unsigned int curve_id;
+	size_t n_sz;
+	bool do_fallback;
+};
+
+/**
+ * atmel_ecc_work_data - data structure representing the work
+ * @ctx : transformation context.
+ * @cbk : pointer to a callback function to be invoked upon completion of this
+ *        request. This has the form:
+ *        callback(struct atmel_ecc_work_data *work_data, void *areq, u8 status)
+ *        where:
+ *        @work_data: data structure representing the work
+ *        @areq     : optional pointer to an argument passed with the original
+ *                    request.
+ *        @status   : status returned from the i2c client device or i2c error.
+ * @areq: optional pointer to a user argument for use at callback time.
+ * @work: describes the task to be executed.
+ * @cmd : structure used for communicating with the device.
+ */
+struct atmel_ecc_work_data {
+	struct atmel_ecdh_ctx *ctx;
+	void (*cbk)(struct atmel_ecc_work_data *work_data, void *areq,
+		    int status);
+	void *areq;
+	struct work_struct work;
+	struct atmel_ecc_cmd cmd;
+};
+
+static u16 atmel_ecc_crc16(u16 crc, const u8 *buffer, size_t len)
+{
+	return cpu_to_le16(bitrev16(crc16(crc, buffer, len)));
+}
+
+/**
+ * atmel_ecc_checksum() - Generate 16-bit CRC as required by ATMEL ECC.
+ * CRC16 verification of the count, opcode, param1, param2 and data bytes.
+ * The checksum is saved in little-endian format in the least significant
+ * two bytes of the command. CRC polynomial is 0x8005 and the initial register
+ * value should be zero.
+ *
+ * @cmd : structure used for communicating with the device.
+ */
+static void atmel_ecc_checksum(struct atmel_ecc_cmd *cmd)
+{
+	u8 *data = &cmd->count;
+	size_t len = cmd->count - CRC_SIZE;
+	u16 *crc16 = (u16 *)(data + len);
+
+	*crc16 = atmel_ecc_crc16(0, data, len);
+}
+
+static void atmel_ecc_init_read_cmd(struct atmel_ecc_cmd *cmd)
+{
+	cmd->word_addr = COMMAND;
+	cmd->opcode = OPCODE_READ;
+	/*
+	 * Read the word from Configuration zone that contains the lock bytes
+	 * (UserExtra, Selector, LockValue, LockConfig).
+	 */
+	cmd->param1 = CONFIG_ZONE;
+	cmd->param2 = DEVICE_LOCK_ADDR;
+	cmd->count = READ_COUNT;
+
+	atmel_ecc_checksum(cmd);
+
+	cmd->msecs = MAX_EXEC_TIME_READ;
+	cmd->rxsize = READ_RSP_SIZE;
+}
+
+static void atmel_ecc_init_genkey_cmd(struct atmel_ecc_cmd *cmd, u16 keyid)
+{
+	cmd->word_addr = COMMAND;
+	cmd->count = GENKEY_COUNT;
+	cmd->opcode = OPCODE_GENKEY;
+	cmd->param1 = GENKEY_MODE_PRIVATE;
+	/* a random private key will be generated and stored in slot keyID */
+	cmd->param2 = cpu_to_le16(keyid);
+
+	atmel_ecc_checksum(cmd);
+
+	cmd->msecs = MAX_EXEC_TIME_GENKEY;
+	cmd->rxsize = GENKEY_RSP_SIZE;
+}
+
+static int atmel_ecc_init_ecdh_cmd(struct atmel_ecc_cmd *cmd,
+				   struct scatterlist *pubkey)
+{
+	size_t copied;
+
+	cmd->word_addr = COMMAND;
+	cmd->count = ECDH_COUNT;
+	cmd->opcode = OPCODE_ECDH;
+	cmd->param1 = ECDH_PREFIX_MODE;
+	/* private key slot */
+	cmd->param2 = cpu_to_le16(DATA_SLOT_2);
+
+	/*
+	 * The device only supports NIST P256 ECC keys. The public key size will
+	 * always be the same. Use a macro for the key size to avoid unnecessary
+	 * computations.
+	 */
+	copied = sg_copy_to_buffer(pubkey, 1, cmd->data, ATMEL_ECC_PUBKEY_SIZE);
+	if (copied != ATMEL_ECC_PUBKEY_SIZE)
+		return -EINVAL;
+
+	atmel_ecc_checksum(cmd);
+
+	cmd->msecs = MAX_EXEC_TIME_ECDH;
+	cmd->rxsize = ECDH_RSP_SIZE;
+
+	return 0;
+}
+
+/*
+ * After wake and after execution of a command, there will be error, status, or
+ * result bytes in the device's output register that can be retrieved by the
+ * system. When the length of that group is four bytes, the codes returned are
+ * detailed in error_list.
+ */
+static int atmel_ecc_status(struct device *dev, u8 *status)
+{
+	size_t err_list_len = ARRAY_SIZE(error_list);
+	int i;
+	u8 err_id = status[1];
+
+	if (*status != STATUS_SIZE)
+		return 0;
+
+	if (err_id == STATUS_WAKE_SUCCESSFUL || err_id == STATUS_NOERR)
+		return 0;
+
+	for (i = 0; i < err_list_len; i++)
+		if (error_list[i].value == err_id)
+			break;
+
+	/* if err_id is not in the error_list then ignore it */
+	if (i != err_list_len) {
+		dev_err(dev, "%02x: %s:\n", err_id, error_list[i].error_text);
+		return err_id;
+	}
+
+	return 0;
+}
+
+static int atmel_ecc_wakeup(struct i2c_client *client)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+	u8 status[STATUS_RSP_SIZE];
+	int ret;
+
+	/*
+	 * The device ignores any levels or transitions on the SCL pin when the
+	 * device is idle, asleep or during waking up. Don't check for error
+	 * when waking up the device.
+	 */
+	i2c_master_send(client, i2c_priv->wake_token, i2c_priv->wake_token_sz);
+
+	/*
+	 * Wait to wake the device. Typical execution times for ecdh and genkey
+	 * are around tens of milliseconds. Delta is chosen to 50 microseconds.
+	 */
+	usleep_range(TWHI_MIN, TWHI_MAX);
+
+	ret = i2c_master_recv(client, status, STATUS_SIZE);
+	if (ret < 0)
+		return ret;
+
+	return atmel_ecc_status(&client->dev, status);
+}
+
+static int atmel_ecc_sleep(struct i2c_client *client)
+{
+	u8 sleep = SLEEP_TOKEN;
+
+	return i2c_master_send(client, &sleep, 1);
+}
+
+static void atmel_ecdh_done(struct atmel_ecc_work_data *work_data, void *areq,
+			    int status)
+{
+	struct kpp_request *req = areq;
+	struct atmel_ecdh_ctx *ctx = work_data->ctx;
+	struct atmel_ecc_cmd *cmd = &work_data->cmd;
+	size_t copied;
+	size_t n_sz = ctx->n_sz;
+
+	if (status)
+		goto free_work_data;
+
+	/* copy the shared secret */
+	copied = sg_copy_from_buffer(req->dst, 1, &cmd->data[RSP_DATA_IDX],
+				     n_sz);
+	if (copied != n_sz)
+		status = -EINVAL;
+
+	/* fall through */
+free_work_data:
+	kzfree(work_data);
+	kpp_request_complete(req, status);
+}
+
+/*
+ * atmel_ecc_send_receive() - send a command to the device and receive its
+ *                            response.
+ * @client: i2c client device
+ * @cmd   : structure used to communicate with the device
+ *
+ * After the device receives a Wake token, a watchdog counter starts within the
+ * device. After the watchdog timer expires, the device enters sleep mode
+ * regardless of whether some I/O transmission or command execution is in
+ * progress. If a command is attempted when insufficient time remains prior to
+ * watchdog timer execution, the device will return the watchdog timeout error
+ * code without attempting to execute the command. There is no way to reset the
+ * counter other than to put the device into sleep or idle mode and then
+ * wake it up again.
+ */
+static int atmel_ecc_send_receive(struct i2c_client *client,
+				  struct atmel_ecc_cmd *cmd)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+	int ret;
+
+	mutex_lock(&i2c_priv->lock);
+
+	ret = atmel_ecc_wakeup(client);
+	if (ret)
+		goto err;
+
+	/* send the command */
+	ret = i2c_master_send(client, (u8 *)cmd, cmd->count + WORD_ADDR_SIZE);
+	if (ret < 0)
+		goto err;
+
+	/* delay the appropriate amount of time for command to execute */
+	msleep(cmd->msecs);
+
+	/* receive the response */
+	ret = i2c_master_recv(client, cmd->data, cmd->rxsize);
+	if (ret < 0)
+		goto err;
+
+	/* put the device into low-power mode */
+	ret = atmel_ecc_sleep(client);
+	if (ret < 0)
+		goto err;
+
+	mutex_unlock(&i2c_priv->lock);
+	return atmel_ecc_status(&client->dev, cmd->data);
+err:
+	mutex_unlock(&i2c_priv->lock);
+	return ret;
+}
+
+static void atmel_ecc_work_handler(struct work_struct *work)
+{
+	struct atmel_ecc_work_data *work_data =
+			container_of(work, struct atmel_ecc_work_data, work);
+	struct atmel_ecc_cmd *cmd = &work_data->cmd;
+	struct i2c_client *client = work_data->ctx->client;
+	int status;
+
+	status = atmel_ecc_send_receive(client, cmd);
+	work_data->cbk(work_data, work_data->areq, status);
+}
+
+static void atmel_ecc_enqueue(struct atmel_ecc_work_data *work_data,
+			      void (*cbk)(struct atmel_ecc_work_data *work_data,
+					  void *areq, int status),
+			      void *areq)
+{
+	work_data->cbk = (void *)cbk;
+	work_data->areq = areq;
+
+	INIT_WORK(&work_data->work, atmel_ecc_work_handler);
+	schedule_work(&work_data->work);
+}
+
+static unsigned int atmel_ecdh_supported_curve(unsigned int curve_id)
+{
+	if (curve_id == ECC_CURVE_NIST_P256)
+		return ATMEL_ECC_NIST_P256_N_SIZE;
+
+	return 0;
+}
+
+/*
+ * A random private key is generated and stored in the device. The device
+ * returns the pair public key.
+ */
+static int atmel_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
+				 unsigned int len)
+{
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+	struct atmel_ecc_cmd *cmd;
+	void *public_key;
+	struct ecdh params;
+	int ret = -ENOMEM;
+
+	/* free the old public key, if any */
+	kfree(ctx->public_key);
+	/* make sure you don't free the old public key twice */
+	ctx->public_key = NULL;
+
+	if (crypto_ecdh_decode_key(buf, len, &params) < 0) {
+		dev_err(&ctx->client->dev, "crypto_ecdh_decode_key failed\n");
+		return -EINVAL;
+	}
+
+	ctx->n_sz = atmel_ecdh_supported_curve(params.curve_id);
+	if (!ctx->n_sz || params.key_size) {
+		/* fallback to ecdh software implementation */
+		ctx->do_fallback = true;
+		return crypto_kpp_set_secret(ctx->fallback, buf, len);
+	}
+
+	cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	/*
+	 * The device only supports NIST P256 ECC keys. The public key size will
+	 * always be the same. Use a macro for the key size to avoid unnecessary
+	 * computations.
+	 */
+	public_key = kmalloc(ATMEL_ECC_PUBKEY_SIZE, GFP_KERNEL);
+	if (!public_key)
+		goto free_cmd;
+
+	ctx->do_fallback = false;
+	ctx->curve_id = params.curve_id;
+
+	atmel_ecc_init_genkey_cmd(cmd, DATA_SLOT_2);
+
+	ret = atmel_ecc_send_receive(ctx->client, cmd);
+	if (ret)
+		goto free_public_key;
+
+	/* save the public key */
+	memcpy(public_key, &cmd->data[RSP_DATA_IDX], ATMEL_ECC_PUBKEY_SIZE);
+	ctx->public_key = public_key;
+
+	kfree(cmd);
+	return 0;
+
+free_public_key:
+	kfree(public_key);
+free_cmd:
+	kfree(cmd);
+	return ret;
+}
+
+static int atmel_ecdh_generate_public_key(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+	size_t copied;
+	int ret = 0;
+
+	if (ctx->do_fallback) {
+		kpp_request_set_tfm(req, ctx->fallback);
+		return crypto_kpp_generate_public_key(req);
+	}
+
+	/* public key was saved at private key generation */
+	copied = sg_copy_from_buffer(req->dst, 1, ctx->public_key,
+				     ATMEL_ECC_PUBKEY_SIZE);
+	if (copied != ATMEL_ECC_PUBKEY_SIZE)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+static int atmel_ecdh_compute_shared_secret(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+	struct atmel_ecc_work_data *work_data;
+	gfp_t gfp;
+	int ret;
+
+	if (ctx->do_fallback) {
+		kpp_request_set_tfm(req, ctx->fallback);
+		return crypto_kpp_compute_shared_secret(req);
+	}
+
+	gfp = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL :
+							     GFP_ATOMIC;
+
+	work_data = kmalloc(sizeof(*work_data), gfp);
+	if (!work_data)
+		return -ENOMEM;
+
+	work_data->ctx = ctx;
+
+	ret = atmel_ecc_init_ecdh_cmd(&work_data->cmd, req->src);
+	if (ret)
+		goto free_work_data;
+
+	atmel_ecc_enqueue(work_data, atmel_ecdh_done, req);
+
+	return -EINPROGRESS;
+
+free_work_data:
+	kfree(work_data);
+	return ret;
+}
+
+static struct i2c_client *atmel_ecc_i2c_client_alloc(void)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv, *min_i2c_priv = NULL;
+	struct i2c_client *client = ERR_PTR(-ENODEV);
+	int min_tfm_cnt = INT_MAX;
+	int tfm_cnt;
+
+	spin_lock(&driver_data.i2c_list_lock);
+
+	if (list_empty(&driver_data.i2c_client_list)) {
+		spin_unlock(&driver_data.i2c_list_lock);
+		return ERR_PTR(-ENODEV);
+	}
+
+	list_for_each_entry(i2c_priv, &driver_data.i2c_client_list,
+			    i2c_client_list_node) {
+		tfm_cnt = atomic_read(&i2c_priv->tfm_count);
+		if (tfm_cnt < min_tfm_cnt) {
+			min_tfm_cnt = tfm_cnt;
+			min_i2c_priv = i2c_priv;
+		}
+		if (!min_tfm_cnt)
+			break;
+	}
+
+	if (min_i2c_priv) {
+		atomic_inc(&min_i2c_priv->tfm_count);
+		client = min_i2c_priv->client;
+	}
+
+	spin_unlock(&driver_data.i2c_list_lock);
+
+	return client;
+}
+
+static void atmel_ecc_i2c_client_free(struct i2c_client *client)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+
+	atomic_dec(&i2c_priv->tfm_count);
+}
+
+static int atmel_ecdh_init_tfm(struct crypto_kpp *tfm)
+{
+	const char *alg = kpp_alg_name(tfm);
+	struct crypto_kpp *fallback;
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+
+	ctx->client = atmel_ecc_i2c_client_alloc();
+	if (IS_ERR(ctx->client)) {
+		pr_err("tfm - i2c_client binding failed\n");
+		return PTR_ERR(ctx->client);
+	}
+
+	fallback = crypto_alloc_kpp(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback)) {
+		dev_err(&ctx->client->dev, "Failed to allocate transformation for '%s': %ld\n",
+			alg, PTR_ERR(fallback));
+		return PTR_ERR(fallback);
+	}
+
+	crypto_kpp_set_flags(fallback, crypto_kpp_get_flags(tfm));
+
+	dev_info(&ctx->client->dev, "Using '%s' as fallback implementation.\n",
+		 crypto_tfm_alg_driver_name(crypto_kpp_tfm(fallback)));
+
+	ctx->fallback = fallback;
+
+	return 0;
+}
+
+static void atmel_ecdh_exit_tfm(struct crypto_kpp *tfm)
+{
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+
+	kfree(ctx->public_key);
+	crypto_free_kpp(ctx->fallback);
+	atmel_ecc_i2c_client_free(ctx->client);
+}
+
+static unsigned int atmel_ecdh_max_size(struct crypto_kpp *tfm)
+{
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+
+	if (ctx->fallback)
+		return crypto_kpp_maxsize(ctx->fallback);
+
+	/*
+	 * The device only supports NIST P256 ECC keys. The public key size will
+	 * always be the same. Use a macro for the key size to avoid unnecessary
+	 * computations.
+	 */
+	return ATMEL_ECC_PUBKEY_SIZE;
+}
+
+static struct kpp_alg atmel_ecdh = {
+	.set_secret = atmel_ecdh_set_secret,
+	.generate_public_key = atmel_ecdh_generate_public_key,
+	.compute_shared_secret = atmel_ecdh_compute_shared_secret,
+	.init = atmel_ecdh_init_tfm,
+	.exit = atmel_ecdh_exit_tfm,
+	.max_size = atmel_ecdh_max_size,
+	.base = {
+		.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+		.cra_name = "ecdh",
+		.cra_driver_name = "atmel-ecdh",
+		.cra_priority = ATMEL_ECC_PRIORITY,
+		.cra_module = THIS_MODULE,
+		.cra_ctxsize = sizeof(struct atmel_ecdh_ctx),
+	},
+};
+
+static inline size_t atmel_ecc_wake_token_sz(u32 bus_clk_rate)
+{
+	u32 no_of_bits = DIV_ROUND_UP(TWLO_USEC * bus_clk_rate, USEC_PER_SEC);
+
+	/* return the size of the wake_token in bytes */
+	return DIV_ROUND_UP(no_of_bits, 8);
+}
+
+static int device_sanity_check(struct i2c_client *client)
+{
+	struct atmel_ecc_cmd *cmd;
+	int ret;
+
+	cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	atmel_ecc_init_read_cmd(cmd);
+
+	ret = atmel_ecc_send_receive(client, cmd);
+	if (ret)
+		goto free_cmd;
+
+	/*
+	 * It is vital that the Configuration, Data and OTP zones be locked
+	 * prior to release into the field of the system containing the device.
+	 * Failure to lock these zones may permit modification of any secret
+	 * keys and may lead to other security problems.
+	 */
+	if (cmd->data[LOCK_CONFIG_IDX] || cmd->data[LOCK_VALUE_IDX]) {
+		dev_err(&client->dev, "Configuration or Data and OTP zones are unlocked!\n");
+		ret = -ENOTSUPP;
+	}
+
+	/* fall through */
+free_cmd:
+	kfree(cmd);
+	return ret;
+}
+
+static int atmel_ecc_probe(struct i2c_client *client,
+			   const struct i2c_device_id *id)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv;
+	struct device *dev = &client->dev;
+	int ret;
+	u32 bus_clk_rate;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		dev_err(dev, "I2C_FUNC_I2C not supported\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(client->adapter->dev.of_node,
+				   "clock-frequency", &bus_clk_rate);
+	if (ret) {
+		dev_err(dev, "of: failed to read clock-frequency property\n");
+		return ret;
+	}
+
+	if (bus_clk_rate > 1000000L) {
+		dev_err(dev, "%d exceeds maximum supported clock frequency (1MHz)\n",
+			bus_clk_rate);
+		return -EINVAL;
+	}
+
+	i2c_priv = devm_kmalloc(dev, sizeof(*i2c_priv), GFP_KERNEL);
+	if (!i2c_priv)
+		return -ENOMEM;
+
+	i2c_priv->client = client;
+	mutex_init(&i2c_priv->lock);
+
+	/*
+	 * WAKE_TOKEN_MAX_SIZE was calculated for the maximum bus_clk_rate -
+	 * 1MHz. The previous bus_clk_rate check ensures us that wake_token_sz
+	 * will always be smaller than or equal to WAKE_TOKEN_MAX_SIZE.
+	 */
+	i2c_priv->wake_token_sz = atmel_ecc_wake_token_sz(bus_clk_rate);
+
+	memset(i2c_priv->wake_token, 0, sizeof(i2c_priv->wake_token));
+
+	atomic_set(&i2c_priv->tfm_count, 0);
+
+	i2c_set_clientdata(client, i2c_priv);
+
+	ret = device_sanity_check(client);
+	if (ret)
+		return ret;
+
+	spin_lock(&driver_data.i2c_list_lock);
+	list_add_tail(&i2c_priv->i2c_client_list_node,
+		      &driver_data.i2c_client_list);
+	spin_unlock(&driver_data.i2c_list_lock);
+
+	ret = crypto_register_kpp(&atmel_ecdh);
+	if (ret) {
+		spin_lock(&driver_data.i2c_list_lock);
+		list_del(&i2c_priv->i2c_client_list_node);
+		spin_unlock(&driver_data.i2c_list_lock);
+
+		dev_err(dev, "%s alg registration failed\n",
+			atmel_ecdh.base.cra_driver_name);
+	} else {
+		dev_info(dev, "atmel ecc algorithms registered in /proc/crypto\n");
+	}
+
+	return ret;
+}
+
+static int atmel_ecc_remove(struct i2c_client *client)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+
+	/* Return EBUSY if i2c client already allocated. */
+	if (atomic_read(&i2c_priv->tfm_count)) {
+		dev_err(&client->dev, "Device is busy\n");
+		return -EBUSY;
+	}
+
+	crypto_unregister_kpp(&atmel_ecdh);
+
+	spin_lock(&driver_data.i2c_list_lock);
+	list_del(&i2c_priv->i2c_client_list_node);
+	spin_unlock(&driver_data.i2c_list_lock);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id atmel_ecc_dt_ids[] = {
+	{
+		.compatible = "atmel,atecc508a",
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(of, atmel_ecc_dt_ids);
+#endif
+
+static const struct i2c_device_id atmel_ecc_id[] = {
+	{ "atecc508a", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, atmel_ecc_id);
+
+static struct i2c_driver atmel_ecc_driver = {
+	.driver = {
+		.name	= "atmel-ecc",
+		.of_match_table = of_match_ptr(atmel_ecc_dt_ids),
+	},
+	.probe		= atmel_ecc_probe,
+	.remove		= atmel_ecc_remove,
+	.id_table	= atmel_ecc_id,
+};
+
+static int __init atmel_ecc_init(void)
+{
+	spin_lock_init(&driver_data.i2c_list_lock);
+	INIT_LIST_HEAD(&driver_data.i2c_client_list);
+	return i2c_add_driver(&atmel_ecc_driver);
+}
+
+static void __exit atmel_ecc_exit(void)
+{
+	flush_scheduled_work();
+	i2c_del_driver(&atmel_ecc_driver);
+}
+
+module_init(atmel_ecc_init);
+module_exit(atmel_ecc_exit);
+
+MODULE_AUTHOR("Tudor Ambarus <tudor.ambarus@microchip.com>");
+MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver");
+MODULE_LICENSE("GPL v2");

+ 128 - 0
drivers/crypto/atmel-ecc.h

@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2017, Microchip Technology Inc.
+ * Author: Tudor Ambarus <tudor.ambarus@microchip.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef __ATMEL_ECC_H__
+#define __ATMEL_ECC_H__
+
+#define ATMEL_ECC_PRIORITY		300
+
+#define COMMAND				0x03 /* packet function */
+#define SLEEP_TOKEN			0x01
+#define WAKE_TOKEN_MAX_SIZE		8
+
+/* Definitions of Data and Command sizes */
+#define WORD_ADDR_SIZE			1
+#define COUNT_SIZE			1
+#define CRC_SIZE			2
+#define CMD_OVERHEAD_SIZE		(COUNT_SIZE + CRC_SIZE)
+
+/* size in bytes of the n prime */
+#define ATMEL_ECC_NIST_P256_N_SIZE	32
+#define ATMEL_ECC_PUBKEY_SIZE		(2 * ATMEL_ECC_NIST_P256_N_SIZE)
+
+#define STATUS_RSP_SIZE			4
+#define ECDH_RSP_SIZE			(32 + CMD_OVERHEAD_SIZE)
+#define GENKEY_RSP_SIZE			(ATMEL_ECC_PUBKEY_SIZE + \
+					 CMD_OVERHEAD_SIZE)
+#define READ_RSP_SIZE			(4 + CMD_OVERHEAD_SIZE)
+#define MAX_RSP_SIZE			GENKEY_RSP_SIZE
+
+/**
+ * atmel_ecc_cmd - structure used for communicating with the device.
+ * @word_addr: indicates the function of the packet sent to the device. This
+ *             byte should have a value of COMMAND for normal operation.
+ * @count    : number of bytes to be transferred to (or from) the device.
+ * @opcode   : the command code.
+ * @param1   : the first parameter; always present.
+ * @param2   : the second parameter; always present.
+ * @data     : optional remaining input data. Includes a 2-byte CRC.
+ * @rxsize   : size of the data received from i2c client.
+ * @msecs    : command execution time in milliseconds
+ */
+struct atmel_ecc_cmd {
+	u8 word_addr;
+	u8 count;
+	u8 opcode;
+	u8 param1;
+	u16 param2;
+	u8 data[MAX_RSP_SIZE];
+	u8 msecs;
+	u16 rxsize;
+} __packed;
+
+/* Status/Error codes */
+#define STATUS_SIZE			0x04
+#define STATUS_NOERR			0x00
+#define STATUS_WAKE_SUCCESSFUL		0x11
+
+static const struct {
+	u8 value;
+	const char *error_text;
+} error_list[] = {
+	{ 0x01, "CheckMac or Verify miscompare" },
+	{ 0x03, "Parse Error" },
+	{ 0x05, "ECC Fault" },
+	{ 0x0F, "Execution Error" },
+	{ 0xEE, "Watchdog about to expire" },
+	{ 0xFF, "CRC or other communication error" },
+};
+
+/* Definitions for eeprom organization */
+#define CONFIG_ZONE			0
+
+/* Definitions for Indexes common to all commands */
+#define RSP_DATA_IDX			1 /* buffer index of data in response */
+#define DATA_SLOT_2			2 /* used for ECDH private key */
+
+/* Definitions for the device lock state */
+#define DEVICE_LOCK_ADDR		0x15
+#define LOCK_VALUE_IDX			(RSP_DATA_IDX + 2)
+#define LOCK_CONFIG_IDX			(RSP_DATA_IDX + 3)
+
+/*
+ * Wake High delay to data communication (microseconds). SDA should be stable
+ * high for this entire duration.
+ */
+#define TWHI_MIN			1500
+#define TWHI_MAX			1550
+
+/* Wake Low duration */
+#define TWLO_USEC			60
+
+/* Command execution time (milliseconds) */
+#define MAX_EXEC_TIME_ECDH		58
+#define MAX_EXEC_TIME_GENKEY		115
+#define MAX_EXEC_TIME_READ		1
+
+/* Command opcode */
+#define OPCODE_ECDH			0x43
+#define OPCODE_GENKEY			0x40
+#define OPCODE_READ			0x02
+
+/* Definitions for the READ Command */
+#define READ_COUNT			7
+
+/* Definitions for the GenKey Command */
+#define GENKEY_COUNT			7
+#define GENKEY_MODE_PRIVATE		0x04
+
+/* Definitions for the ECDH Command */
+#define ECDH_COUNT			71
+#define ECDH_PREFIX_MODE		0x00
+
+#endif /* __ATMEL_ECC_H__ */

+ 1 - 1
drivers/crypto/atmel-sha.c

@@ -2883,7 +2883,7 @@ sha_dd_err:
 
 
 static int atmel_sha_remove(struct platform_device *pdev)
 static int atmel_sha_remove(struct platform_device *pdev)
 {
 {
-	static struct atmel_sha_dev *sha_dd;
+	struct atmel_sha_dev *sha_dd;
 
 
 	sha_dd = platform_get_drvdata(pdev);
 	sha_dd = platform_get_drvdata(pdev);
 	if (!sha_dd)
 	if (!sha_dd)

+ 1 - 1
drivers/crypto/atmel-tdes.c

@@ -1487,7 +1487,7 @@ tdes_dd_err:
 
 
 static int atmel_tdes_remove(struct platform_device *pdev)
 static int atmel_tdes_remove(struct platform_device *pdev)
 {
 {
-	static struct atmel_tdes_dev *tdes_dd;
+	struct atmel_tdes_dev *tdes_dd;
 
 
 	tdes_dd = platform_get_drvdata(pdev);
 	tdes_dd = platform_get_drvdata(pdev);
 	if (!tdes_dd)
 	if (!tdes_dd)

+ 1 - 0
drivers/crypto/axis/Makefile

@@ -0,0 +1 @@
+obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) := artpec6_crypto.o

+ 3192 - 0
drivers/crypto/axis/artpec6_crypto.c

@@ -0,0 +1,3192 @@
+/*
+ *   Driver for ARTPEC-6 crypto block using the kernel asynchronous crypto api.
+ *
+ *    Copyright (C) 2014-2017  Axis Communications AB
+ */
+#define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
+
+#include <linux/bitfield.h>
+#include <linux/crypto.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/fault-inject.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+#include <crypto/aes.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha.h>
+#include <crypto/xts.h>
+
+/* Max length of a line in all cache levels for Artpec SoCs. */
+#define ARTPEC_CACHE_LINE_MAX	32
+
+#define PDMA_OUT_CFG		0x0000
+#define PDMA_OUT_BUF_CFG	0x0004
+#define PDMA_OUT_CMD		0x0008
+#define PDMA_OUT_DESCRQ_PUSH	0x0010
+#define PDMA_OUT_DESCRQ_STAT	0x0014
+
+#define A6_PDMA_IN_CFG		0x0028
+#define A6_PDMA_IN_BUF_CFG	0x002c
+#define A6_PDMA_IN_CMD		0x0030
+#define A6_PDMA_IN_STATQ_PUSH	0x0038
+#define A6_PDMA_IN_DESCRQ_PUSH	0x0044
+#define A6_PDMA_IN_DESCRQ_STAT	0x0048
+#define A6_PDMA_INTR_MASK	0x0068
+#define A6_PDMA_ACK_INTR	0x006c
+#define A6_PDMA_MASKED_INTR	0x0074
+
+#define A7_PDMA_IN_CFG		0x002c
+#define A7_PDMA_IN_BUF_CFG	0x0030
+#define A7_PDMA_IN_CMD		0x0034
+#define A7_PDMA_IN_STATQ_PUSH	0x003c
+#define A7_PDMA_IN_DESCRQ_PUSH	0x0048
+#define A7_PDMA_IN_DESCRQ_STAT	0x004C
+#define A7_PDMA_INTR_MASK	0x006c
+#define A7_PDMA_ACK_INTR	0x0070
+#define A7_PDMA_MASKED_INTR	0x0078
+
+#define PDMA_OUT_CFG_EN				BIT(0)
+
+#define PDMA_OUT_BUF_CFG_DATA_BUF_SIZE		GENMASK(4, 0)
+#define PDMA_OUT_BUF_CFG_DESCR_BUF_SIZE		GENMASK(9, 5)
+
+#define PDMA_OUT_CMD_START			BIT(0)
+#define A6_PDMA_OUT_CMD_STOP			BIT(3)
+#define A7_PDMA_OUT_CMD_STOP			BIT(2)
+
+#define PDMA_OUT_DESCRQ_PUSH_LEN		GENMASK(5, 0)
+#define PDMA_OUT_DESCRQ_PUSH_ADDR		GENMASK(31, 6)
+
+#define PDMA_OUT_DESCRQ_STAT_LEVEL		GENMASK(3, 0)
+#define PDMA_OUT_DESCRQ_STAT_SIZE		GENMASK(7, 4)
+
+#define PDMA_IN_CFG_EN				BIT(0)
+
+#define PDMA_IN_BUF_CFG_DATA_BUF_SIZE		GENMASK(4, 0)
+#define PDMA_IN_BUF_CFG_DESCR_BUF_SIZE		GENMASK(9, 5)
+#define PDMA_IN_BUF_CFG_STAT_BUF_SIZE		GENMASK(14, 10)
+
+#define PDMA_IN_CMD_START			BIT(0)
+#define A6_PDMA_IN_CMD_FLUSH_STAT		BIT(2)
+#define A6_PDMA_IN_CMD_STOP			BIT(3)
+#define A7_PDMA_IN_CMD_FLUSH_STAT		BIT(1)
+#define A7_PDMA_IN_CMD_STOP			BIT(2)
+
+#define PDMA_IN_STATQ_PUSH_LEN			GENMASK(5, 0)
+#define PDMA_IN_STATQ_PUSH_ADDR			GENMASK(31, 6)
+
+#define PDMA_IN_DESCRQ_PUSH_LEN			GENMASK(5, 0)
+#define PDMA_IN_DESCRQ_PUSH_ADDR		GENMASK(31, 6)
+
+#define PDMA_IN_DESCRQ_STAT_LEVEL		GENMASK(3, 0)
+#define PDMA_IN_DESCRQ_STAT_SIZE		GENMASK(7, 4)
+
+#define A6_PDMA_INTR_MASK_IN_DATA		BIT(2)
+#define A6_PDMA_INTR_MASK_IN_EOP		BIT(3)
+#define A6_PDMA_INTR_MASK_IN_EOP_FLUSH		BIT(4)
+
+#define A7_PDMA_INTR_MASK_IN_DATA		BIT(3)
+#define A7_PDMA_INTR_MASK_IN_EOP		BIT(4)
+#define A7_PDMA_INTR_MASK_IN_EOP_FLUSH		BIT(5)
+
+#define A6_CRY_MD_OPER		GENMASK(19, 16)
+
+#define A6_CRY_MD_HASH_SEL_CTX	GENMASK(21, 20)
+#define A6_CRY_MD_HASH_HMAC_FIN	BIT(23)
+
+#define A6_CRY_MD_CIPHER_LEN	GENMASK(21, 20)
+#define A6_CRY_MD_CIPHER_DECR	BIT(22)
+#define A6_CRY_MD_CIPHER_TWEAK	BIT(23)
+#define A6_CRY_MD_CIPHER_DSEQ	BIT(24)
+
+#define A7_CRY_MD_OPER		GENMASK(11, 8)
+
+#define A7_CRY_MD_HASH_SEL_CTX	GENMASK(13, 12)
+#define A7_CRY_MD_HASH_HMAC_FIN	BIT(15)
+
+#define A7_CRY_MD_CIPHER_LEN	GENMASK(13, 12)
+#define A7_CRY_MD_CIPHER_DECR	BIT(14)
+#define A7_CRY_MD_CIPHER_TWEAK	BIT(15)
+#define A7_CRY_MD_CIPHER_DSEQ	BIT(16)
+
+/* DMA metadata constants */
+#define regk_crypto_aes_cbc     0x00000002
+#define regk_crypto_aes_ctr     0x00000003
+#define regk_crypto_aes_ecb     0x00000001
+#define regk_crypto_aes_gcm     0x00000004
+#define regk_crypto_aes_xts     0x00000005
+#define regk_crypto_cache       0x00000002
+#define a6_regk_crypto_dlkey    0x0000000a
+#define a7_regk_crypto_dlkey    0x0000000e
+#define regk_crypto_ext         0x00000001
+#define regk_crypto_hmac_sha1   0x00000007
+#define regk_crypto_hmac_sha256 0x00000009
+#define regk_crypto_hmac_sha384 0x0000000b
+#define regk_crypto_hmac_sha512 0x0000000d
+#define regk_crypto_init        0x00000000
+#define regk_crypto_key_128     0x00000000
+#define regk_crypto_key_192     0x00000001
+#define regk_crypto_key_256     0x00000002
+#define regk_crypto_null        0x00000000
+#define regk_crypto_sha1        0x00000006
+#define regk_crypto_sha256      0x00000008
+#define regk_crypto_sha384      0x0000000a
+#define regk_crypto_sha512      0x0000000c
+
+/* DMA descriptor structures */
+struct pdma_descr_ctrl  {
+	unsigned char short_descr : 1;
+	unsigned char pad1        : 1;
+	unsigned char eop         : 1;
+	unsigned char intr        : 1;
+	unsigned char short_len   : 3;
+	unsigned char pad2        : 1;
+} __packed;
+
+struct pdma_data_descr {
+	unsigned int len : 24;
+	unsigned int buf : 32;
+} __packed;
+
+struct pdma_short_descr {
+	unsigned char data[7];
+} __packed;
+
+struct pdma_descr {
+	struct pdma_descr_ctrl ctrl;
+	union {
+		struct pdma_data_descr   data;
+		struct pdma_short_descr  shrt;
+	};
+};
+
+struct pdma_stat_descr {
+	unsigned char pad1        : 1;
+	unsigned char pad2        : 1;
+	unsigned char eop         : 1;
+	unsigned char pad3        : 5;
+	unsigned int  len         : 24;
+};
+
+/* Each descriptor array can hold max 64 entries */
+#define PDMA_DESCR_COUNT	64
+
+#define MODULE_NAME   "Artpec-6 CA"
+
+/* Hash modes (including HMAC variants) */
+#define ARTPEC6_CRYPTO_HASH_SHA1	1
+#define ARTPEC6_CRYPTO_HASH_SHA256	2
+#define ARTPEC6_CRYPTO_HASH_SHA384	3
+#define ARTPEC6_CRYPTO_HASH_SHA512	4
+
+/* Crypto modes */
+#define ARTPEC6_CRYPTO_CIPHER_AES_ECB	1
+#define ARTPEC6_CRYPTO_CIPHER_AES_CBC	2
+#define ARTPEC6_CRYPTO_CIPHER_AES_CTR	3
+#define ARTPEC6_CRYPTO_CIPHER_AES_XTS	5
+
+/* The PDMA is a DMA-engine tightly coupled with a ciphering engine.
+ * It operates on a descriptor array with up to 64 descriptor entries.
+ * The arrays must be 64 byte aligned in memory.
+ *
+ * The ciphering unit has no registers and is completely controlled by
+ * a 4-byte metadata that is inserted at the beginning of each dma packet.
+ *
+ * A dma packet is a sequence of descriptors terminated by setting the .eop
+ * field in the final descriptor of the packet.
+ *
+ * Multiple packets are used for providing context data, key data and
+ * the plain/ciphertext.
+ *
+ *   PDMA Descriptors (Array)
+ *  +------+------+------+~~+-------+------+----
+ *  |  0   |  1   |  2   |~~| 11 EOP|  12  |  ....
+ *  +--+---+--+---+----+-+~~+-------+----+-+----
+ *     |      |        |       |         |
+ *     |      |        |       |         |
+ *   __|__  +-------++-------++-------+ +----+
+ *  | MD  | |Payload||Payload||Payload| | MD |
+ *  +-----+ +-------++-------++-------+ +----+
+ */
+
+struct artpec6_crypto_bounce_buffer {
+	struct list_head list;
+	size_t length;
+	struct scatterlist *sg;
+	size_t offset;
+	/* buf is aligned to ARTPEC_CACHE_LINE_MAX and
+	 * holds up to ARTPEC_CACHE_LINE_MAX bytes data.
+	 */
+	void *buf;
+};
+
+struct artpec6_crypto_dma_map {
+	dma_addr_t dma_addr;
+	size_t size;
+	enum dma_data_direction dir;
+};
+
+struct artpec6_crypto_dma_descriptors {
+	struct pdma_descr out[PDMA_DESCR_COUNT] __aligned(64);
+	struct pdma_descr in[PDMA_DESCR_COUNT] __aligned(64);
+	u32 stat[PDMA_DESCR_COUNT] __aligned(64);
+	struct list_head bounce_buffers;
+	/* Enough maps for all out/in buffers, and all three descr. arrays */
+	struct artpec6_crypto_dma_map maps[PDMA_DESCR_COUNT * 2 + 2];
+	dma_addr_t out_dma_addr;
+	dma_addr_t in_dma_addr;
+	dma_addr_t stat_dma_addr;
+	size_t out_cnt;
+	size_t in_cnt;
+	size_t map_count;
+};
+
+enum artpec6_crypto_variant {
+	ARTPEC6_CRYPTO,
+	ARTPEC7_CRYPTO,
+};
+
+struct artpec6_crypto {
+	void __iomem *base;
+	spinlock_t queue_lock;
+	struct list_head queue; /* waiting for pdma fifo space */
+	struct list_head pending; /* submitted to pdma fifo */
+	struct tasklet_struct task;
+	struct kmem_cache *dma_cache;
+	int pending_count;
+	struct timer_list timer;
+	enum artpec6_crypto_variant variant;
+	void *pad_buffer; /* cache-aligned block padding buffer */
+	void *zero_buffer;
+};
+
+enum artpec6_crypto_hash_flags {
+	HASH_FLAG_INIT_CTX = 2,
+	HASH_FLAG_UPDATE = 4,
+	HASH_FLAG_FINALIZE = 8,
+	HASH_FLAG_HMAC = 16,
+	HASH_FLAG_UPDATE_KEY = 32,
+};
+
+struct artpec6_crypto_req_common {
+	struct list_head list;
+	struct artpec6_crypto_dma_descriptors *dma;
+	struct crypto_async_request *req;
+	void (*complete)(struct crypto_async_request *req);
+	gfp_t gfp_flags;
+};
+
+struct artpec6_hash_request_context {
+	char partial_buffer[SHA512_BLOCK_SIZE];
+	char partial_buffer_out[SHA512_BLOCK_SIZE];
+	char key_buffer[SHA512_BLOCK_SIZE];
+	char pad_buffer[SHA512_BLOCK_SIZE + 32];
+	unsigned char digeststate[SHA512_DIGEST_SIZE];
+	size_t partial_bytes;
+	u64 digcnt;
+	u32 key_md;
+	u32 hash_md;
+	enum artpec6_crypto_hash_flags hash_flags;
+	struct artpec6_crypto_req_common common;
+};
+
+struct artpec6_hash_export_state {
+	char partial_buffer[SHA512_BLOCK_SIZE];
+	unsigned char digeststate[SHA512_DIGEST_SIZE];
+	size_t partial_bytes;
+	u64 digcnt;
+	int oper;
+	unsigned int hash_flags;
+};
+
+struct artpec6_hashalg_context {
+	char hmac_key[SHA512_BLOCK_SIZE];
+	size_t hmac_key_length;
+	struct crypto_shash *child_hash;
+};
+
+struct artpec6_crypto_request_context {
+	u32 cipher_md;
+	bool decrypt;
+	struct artpec6_crypto_req_common common;
+};
+
+struct artpec6_cryptotfm_context {
+	unsigned char aes_key[2*AES_MAX_KEY_SIZE];
+	size_t key_length;
+	u32 key_md;
+	int crypto_type;
+	struct crypto_skcipher *fallback;
+};
+
+struct artpec6_crypto_aead_hw_ctx {
+	__be64	aad_length_bits;
+	__be64  text_length_bits;
+	__u8	J0[AES_BLOCK_SIZE];
+};
+
+struct artpec6_crypto_aead_req_ctx {
+	struct artpec6_crypto_aead_hw_ctx hw_ctx;
+	u32 cipher_md;
+	bool decrypt;
+	struct artpec6_crypto_req_common common;
+	__u8 decryption_tag[AES_BLOCK_SIZE] ____cacheline_aligned;
+};
+
+/* The crypto framework makes it hard to avoid this global. */
+static struct device *artpec6_crypto_dev;
+
+static struct dentry *dbgfs_root;
+
+#ifdef CONFIG_FAULT_INJECTION
+static DECLARE_FAULT_ATTR(artpec6_crypto_fail_status_read);
+static DECLARE_FAULT_ATTR(artpec6_crypto_fail_dma_array_full);
+#endif
+
+enum {
+	ARTPEC6_CRYPTO_PREPARE_HASH_NO_START,
+	ARTPEC6_CRYPTO_PREPARE_HASH_START,
+};
+
+static int artpec6_crypto_prepare_aead(struct aead_request *areq);
+static int artpec6_crypto_prepare_crypto(struct skcipher_request *areq);
+static int artpec6_crypto_prepare_hash(struct ahash_request *areq);
+
+static void
+artpec6_crypto_complete_crypto(struct crypto_async_request *req);
+static void
+artpec6_crypto_complete_cbc_encrypt(struct crypto_async_request *req);
+static void
+artpec6_crypto_complete_cbc_decrypt(struct crypto_async_request *req);
+static void
+artpec6_crypto_complete_aead(struct crypto_async_request *req);
+static void
+artpec6_crypto_complete_hash(struct crypto_async_request *req);
+
+static int
+artpec6_crypto_common_destroy(struct artpec6_crypto_req_common *common);
+
+static void
+artpec6_crypto_start_dma(struct artpec6_crypto_req_common *common);
+
+struct artpec6_crypto_walk {
+	struct scatterlist *sg;
+	size_t offset;
+};
+
+static void artpec6_crypto_walk_init(struct artpec6_crypto_walk *awalk,
+				     struct scatterlist *sg)
+{
+	awalk->sg = sg;
+	awalk->offset = 0;
+}
+
+static size_t artpec6_crypto_walk_advance(struct artpec6_crypto_walk *awalk,
+					  size_t nbytes)
+{
+	while (nbytes && awalk->sg) {
+		size_t piece;
+
+		WARN_ON(awalk->offset > awalk->sg->length);
+
+		piece = min(nbytes, (size_t)awalk->sg->length - awalk->offset);
+		nbytes -= piece;
+		awalk->offset += piece;
+		if (awalk->offset == awalk->sg->length) {
+			awalk->sg = sg_next(awalk->sg);
+			awalk->offset = 0;
+		}
+
+	}
+
+	return nbytes;
+}
+
+static size_t
+artpec6_crypto_walk_chunklen(const struct artpec6_crypto_walk *awalk)
+{
+	WARN_ON(awalk->sg->length == awalk->offset);
+
+	return awalk->sg->length - awalk->offset;
+}
+
+static dma_addr_t
+artpec6_crypto_walk_chunk_phys(const struct artpec6_crypto_walk *awalk)
+{
+	return sg_phys(awalk->sg) + awalk->offset;
+}
+
+static void
+artpec6_crypto_copy_bounce_buffers(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct artpec6_crypto_bounce_buffer *b;
+	struct artpec6_crypto_bounce_buffer *next;
+
+	list_for_each_entry_safe(b, next, &dma->bounce_buffers, list) {
+		pr_debug("bounce entry %p: %zu bytes @ %zu from %p\n",
+			 b, b->length, b->offset, b->buf);
+		sg_pcopy_from_buffer(b->sg,
+				   1,
+				   b->buf,
+				   b->length,
+				   b->offset);
+
+		list_del(&b->list);
+		kfree(b);
+	}
+}
+
+static inline bool artpec6_crypto_busy(void)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	int fifo_count = ac->pending_count;
+
+	return fifo_count > 6;
+}
+
+static int artpec6_crypto_submit(struct artpec6_crypto_req_common *req)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	int ret = -EBUSY;
+
+	spin_lock_bh(&ac->queue_lock);
+
+	if (!artpec6_crypto_busy()) {
+		list_add_tail(&req->list, &ac->pending);
+		artpec6_crypto_start_dma(req);
+		ret = -EINPROGRESS;
+	} else if (req->req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG) {
+		list_add_tail(&req->list, &ac->queue);
+	} else {
+		artpec6_crypto_common_destroy(req);
+	}
+
+	spin_unlock_bh(&ac->queue_lock);
+
+	return ret;
+}
+
+static void artpec6_crypto_start_dma(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	void __iomem *base = ac->base;
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	u32 ind, statd, outd;
+
+	/* Make descriptor content visible to the DMA before starting it. */
+	wmb();
+
+	ind = FIELD_PREP(PDMA_IN_DESCRQ_PUSH_LEN, dma->in_cnt - 1) |
+	      FIELD_PREP(PDMA_IN_DESCRQ_PUSH_ADDR, dma->in_dma_addr >> 6);
+
+	statd = FIELD_PREP(PDMA_IN_STATQ_PUSH_LEN, dma->in_cnt - 1) |
+		FIELD_PREP(PDMA_IN_STATQ_PUSH_ADDR, dma->stat_dma_addr >> 6);
+
+	outd = FIELD_PREP(PDMA_OUT_DESCRQ_PUSH_LEN, dma->out_cnt - 1) |
+	       FIELD_PREP(PDMA_OUT_DESCRQ_PUSH_ADDR, dma->out_dma_addr >> 6);
+
+	if (variant == ARTPEC6_CRYPTO) {
+		writel_relaxed(ind, base + A6_PDMA_IN_DESCRQ_PUSH);
+		writel_relaxed(statd, base + A6_PDMA_IN_STATQ_PUSH);
+		writel_relaxed(PDMA_IN_CMD_START, base + A6_PDMA_IN_CMD);
+	} else {
+		writel_relaxed(ind, base + A7_PDMA_IN_DESCRQ_PUSH);
+		writel_relaxed(statd, base + A7_PDMA_IN_STATQ_PUSH);
+		writel_relaxed(PDMA_IN_CMD_START, base + A7_PDMA_IN_CMD);
+	}
+
+	writel_relaxed(outd, base + PDMA_OUT_DESCRQ_PUSH);
+	writel_relaxed(PDMA_OUT_CMD_START, base + PDMA_OUT_CMD);
+
+	ac->pending_count++;
+}
+
+static void
+artpec6_crypto_init_dma_operation(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+
+	dma->out_cnt = 0;
+	dma->in_cnt = 0;
+	dma->map_count = 0;
+	INIT_LIST_HEAD(&dma->bounce_buffers);
+}
+
+static bool fault_inject_dma_descr(void)
+{
+#ifdef CONFIG_FAULT_INJECTION
+	return should_fail(&artpec6_crypto_fail_dma_array_full, 1);
+#else
+	return false;
+#endif
+}
+
+/** artpec6_crypto_setup_out_descr_phys - Setup an out channel with a
+ *                                        physical address
+ *
+ * @addr: The physical address of the data buffer
+ * @len:  The length of the data buffer
+ * @eop:  True if this is the last buffer in the packet
+ *
+ * @return 0 on success or -ENOSPC if there are no more descriptors available
+ */
+static int
+artpec6_crypto_setup_out_descr_phys(struct artpec6_crypto_req_common *common,
+				    dma_addr_t addr, size_t len, bool eop)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (dma->out_cnt >= PDMA_DESCR_COUNT ||
+	    fault_inject_dma_descr()) {
+		pr_err("No free OUT DMA descriptors available!\n");
+		return -ENOSPC;
+	}
+
+	d = &dma->out[dma->out_cnt++];
+	memset(d, 0, sizeof(*d));
+
+	d->ctrl.short_descr = 0;
+	d->ctrl.eop = eop;
+	d->data.len = len;
+	d->data.buf = addr;
+	return 0;
+}
+
+/** artpec6_crypto_setup_out_descr_short - Setup a short out descriptor
+ *
+ * @dst: The virtual address of the data
+ * @len: The length of the data, must be between 1 to 7 bytes
+ * @eop: True if this is the last buffer in the packet
+ *
+ * @return 0 on success
+ *	-ENOSPC if no more descriptors are available
+ *	-EINVAL if the data length exceeds 7 bytes
+ */
+static int
+artpec6_crypto_setup_out_descr_short(struct artpec6_crypto_req_common *common,
+				     void *dst, unsigned int len, bool eop)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (dma->out_cnt >= PDMA_DESCR_COUNT ||
+	    fault_inject_dma_descr()) {
+		pr_err("No free OUT DMA descriptors available!\n");
+		return -ENOSPC;
+	} else if (len > 7 || len < 1) {
+		return -EINVAL;
+	}
+	d = &dma->out[dma->out_cnt++];
+	memset(d, 0, sizeof(*d));
+
+	d->ctrl.short_descr = 1;
+	d->ctrl.short_len = len;
+	d->ctrl.eop = eop;
+	memcpy(d->shrt.data, dst, len);
+	return 0;
+}
+
+static int artpec6_crypto_dma_map_page(struct artpec6_crypto_req_common *common,
+				      struct page *page, size_t offset,
+				      size_t size,
+				      enum dma_data_direction dir,
+				      dma_addr_t *dma_addr_out)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct device *dev = artpec6_crypto_dev;
+	struct artpec6_crypto_dma_map *map;
+	dma_addr_t dma_addr;
+
+	*dma_addr_out = 0;
+
+	if (dma->map_count >= ARRAY_SIZE(dma->maps))
+		return -ENOMEM;
+
+	dma_addr = dma_map_page(dev, page, offset, size, dir);
+	if (dma_mapping_error(dev, dma_addr))
+		return -ENOMEM;
+
+	map = &dma->maps[dma->map_count++];
+	map->size = size;
+	map->dma_addr = dma_addr;
+	map->dir = dir;
+
+	*dma_addr_out = dma_addr;
+
+	return 0;
+}
+
+static int
+artpec6_crypto_dma_map_single(struct artpec6_crypto_req_common *common,
+			      void *ptr, size_t size,
+			      enum dma_data_direction dir,
+			      dma_addr_t *dma_addr_out)
+{
+	struct page *page = virt_to_page(ptr);
+	size_t offset = (uintptr_t)ptr & ~PAGE_MASK;
+
+	return artpec6_crypto_dma_map_page(common, page, offset, size, dir,
+					  dma_addr_out);
+}
+
+static int
+artpec6_crypto_dma_map_descs(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	int ret;
+
+	ret = artpec6_crypto_dma_map_single(common, dma->in,
+				sizeof(dma->in[0]) * dma->in_cnt,
+				DMA_TO_DEVICE, &dma->in_dma_addr);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_dma_map_single(common, dma->out,
+				sizeof(dma->out[0]) * dma->out_cnt,
+				DMA_TO_DEVICE, &dma->out_dma_addr);
+	if (ret)
+		return ret;
+
+	/* We only read one stat descriptor */
+	dma->stat[dma->in_cnt - 1] = 0;
+
+	/*
+	 * DMA_BIDIRECTIONAL since we need our zeroing of the stat descriptor
+	 * to be written.
+	 */
+	return artpec6_crypto_dma_map_single(common,
+				dma->stat + dma->in_cnt - 1,
+				sizeof(dma->stat[0]),
+				DMA_BIDIRECTIONAL,
+				&dma->stat_dma_addr);
+}
+
+static void
+artpec6_crypto_dma_unmap_all(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct device *dev = artpec6_crypto_dev;
+	int i;
+
+	for (i = 0; i < dma->map_count; i++) {
+		struct artpec6_crypto_dma_map *map = &dma->maps[i];
+
+		dma_unmap_page(dev, map->dma_addr, map->size, map->dir);
+	}
+
+	dma->map_count = 0;
+}
+
+/** artpec6_crypto_setup_out_descr - Setup an out descriptor
+ *
+ * @dst: The virtual address of the data
+ * @len: The length of the data
+ * @eop: True if this is the last buffer in the packet
+ * @use_short: If this is true and the data length is 7 bytes or less then
+ *	a short descriptor will be used
+ *
+ * @return 0 on success
+ *	Any errors from artpec6_crypto_setup_out_descr_short() or
+ *	setup_out_descr_phys()
+ */
+static int
+artpec6_crypto_setup_out_descr(struct artpec6_crypto_req_common *common,
+			       void *dst, unsigned int len, bool eop,
+			       bool use_short)
+{
+	if (use_short && len < 7) {
+		return artpec6_crypto_setup_out_descr_short(common, dst, len,
+							    eop);
+	} else {
+		int ret;
+		dma_addr_t dma_addr;
+
+		ret = artpec6_crypto_dma_map_single(common, dst, len,
+						   DMA_TO_DEVICE,
+						   &dma_addr);
+		if (ret)
+			return ret;
+
+		return artpec6_crypto_setup_out_descr_phys(common, dma_addr,
+							   len, eop);
+	}
+}
+
+/** artpec6_crypto_setup_in_descr_phys - Setup an in channel with a
+ *                                       physical address
+ *
+ * @addr: The physical address of the data buffer
+ * @len:  The length of the data buffer
+ * @intr: True if an interrupt should be fired after HW processing of this
+ *	  descriptor
+ *
+ */
+static int
+artpec6_crypto_setup_in_descr_phys(struct artpec6_crypto_req_common *common,
+			       dma_addr_t addr, unsigned int len, bool intr)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (dma->in_cnt >= PDMA_DESCR_COUNT ||
+	    fault_inject_dma_descr()) {
+		pr_err("No free IN DMA descriptors available!\n");
+		return -ENOSPC;
+	}
+	d = &dma->in[dma->in_cnt++];
+	memset(d, 0, sizeof(*d));
+
+	d->ctrl.intr = intr;
+	d->data.len = len;
+	d->data.buf = addr;
+	return 0;
+}
+
+/** artpec6_crypto_setup_in_descr - Setup an in channel descriptor
+ *
+ * @buffer: The virtual address to of the data buffer
+ * @len:    The length of the data buffer
+ * @last:   If this is the last data buffer in the request (i.e. an interrupt
+ *	    is needed
+ *
+ * Short descriptors are not used for the in channel
+ */
+static int
+artpec6_crypto_setup_in_descr(struct artpec6_crypto_req_common *common,
+			  void *buffer, unsigned int len, bool last)
+{
+	dma_addr_t dma_addr;
+	int ret;
+
+	ret = artpec6_crypto_dma_map_single(common, buffer, len,
+					   DMA_FROM_DEVICE, &dma_addr);
+	if (ret)
+		return ret;
+
+	return artpec6_crypto_setup_in_descr_phys(common, dma_addr, len, last);
+}
+
+static struct artpec6_crypto_bounce_buffer *
+artpec6_crypto_alloc_bounce(gfp_t flags)
+{
+	void *base;
+	size_t alloc_size = sizeof(struct artpec6_crypto_bounce_buffer) +
+			    2 * ARTPEC_CACHE_LINE_MAX;
+	struct artpec6_crypto_bounce_buffer *bbuf = kzalloc(alloc_size, flags);
+
+	if (!bbuf)
+		return NULL;
+
+	base = bbuf + 1;
+	bbuf->buf = PTR_ALIGN(base, ARTPEC_CACHE_LINE_MAX);
+	return bbuf;
+}
+
+static int setup_bounce_buffer_in(struct artpec6_crypto_req_common *common,
+				  struct artpec6_crypto_walk *walk, size_t size)
+{
+	struct artpec6_crypto_bounce_buffer *bbuf;
+	int ret;
+
+	bbuf = artpec6_crypto_alloc_bounce(common->gfp_flags);
+	if (!bbuf)
+		return -ENOMEM;
+
+	bbuf->length = size;
+	bbuf->sg = walk->sg;
+	bbuf->offset = walk->offset;
+
+	ret =  artpec6_crypto_setup_in_descr(common, bbuf->buf, size, false);
+	if (ret) {
+		kfree(bbuf);
+		return ret;
+	}
+
+	pr_debug("BOUNCE %zu offset %zu\n", size, walk->offset);
+	list_add_tail(&bbuf->list, &common->dma->bounce_buffers);
+	return 0;
+}
+
+static int
+artpec6_crypto_setup_sg_descrs_in(struct artpec6_crypto_req_common *common,
+				  struct artpec6_crypto_walk *walk,
+				  size_t count)
+{
+	size_t chunk;
+	int ret;
+	dma_addr_t addr;
+
+	while (walk->sg && count) {
+		chunk = min(count, artpec6_crypto_walk_chunklen(walk));
+		addr = artpec6_crypto_walk_chunk_phys(walk);
+
+		/* When destination buffers are not aligned to the cache line
+		 * size we need bounce buffers. The DMA-API requires that the
+		 * entire line is owned by the DMA buffer and this holds also
+		 * for the case when coherent DMA is used.
+		 */
+		if (!IS_ALIGNED(addr, ARTPEC_CACHE_LINE_MAX)) {
+			chunk = min_t(dma_addr_t, chunk,
+				      ALIGN(addr, ARTPEC_CACHE_LINE_MAX) -
+				      addr);
+
+			pr_debug("CHUNK-b %pad:%zu\n", &addr, chunk);
+			ret = setup_bounce_buffer_in(common, walk, chunk);
+		} else if (chunk < ARTPEC_CACHE_LINE_MAX) {
+			pr_debug("CHUNK-b %pad:%zu\n", &addr, chunk);
+			ret = setup_bounce_buffer_in(common, walk, chunk);
+		} else {
+			dma_addr_t dma_addr;
+
+			chunk = chunk & ~(ARTPEC_CACHE_LINE_MAX-1);
+
+			pr_debug("CHUNK %pad:%zu\n", &addr, chunk);
+
+			ret = artpec6_crypto_dma_map_page(common,
+							 sg_page(walk->sg),
+							 walk->sg->offset +
+							 walk->offset,
+							 chunk,
+							 DMA_FROM_DEVICE,
+							 &dma_addr);
+			if (ret)
+				return ret;
+
+			ret = artpec6_crypto_setup_in_descr_phys(common,
+								 dma_addr,
+								 chunk, false);
+		}
+
+		if (ret)
+			return ret;
+
+		count = count - chunk;
+		artpec6_crypto_walk_advance(walk, chunk);
+	}
+
+	if (count)
+		pr_err("EOL unexpected %zu bytes left\n", count);
+
+	return count ? -EINVAL : 0;
+}
+
+static int
+artpec6_crypto_setup_sg_descrs_out(struct artpec6_crypto_req_common *common,
+				   struct artpec6_crypto_walk *walk,
+				   size_t count)
+{
+	size_t chunk;
+	int ret;
+	dma_addr_t addr;
+
+	while (walk->sg && count) {
+		chunk = min(count, artpec6_crypto_walk_chunklen(walk));
+		addr = artpec6_crypto_walk_chunk_phys(walk);
+
+		pr_debug("OUT-CHUNK %pad:%zu\n", &addr, chunk);
+
+		if (addr & 3) {
+			char buf[3];
+
+			chunk = min_t(size_t, chunk, (4-(addr&3)));
+
+			sg_pcopy_to_buffer(walk->sg, 1, buf, chunk,
+					   walk->offset);
+
+			ret = artpec6_crypto_setup_out_descr_short(common, buf,
+								   chunk,
+								   false);
+		} else {
+			dma_addr_t dma_addr;
+
+			ret = artpec6_crypto_dma_map_page(common,
+							 sg_page(walk->sg),
+							 walk->sg->offset +
+							 walk->offset,
+							 chunk,
+							 DMA_TO_DEVICE,
+							 &dma_addr);
+			if (ret)
+				return ret;
+
+			ret = artpec6_crypto_setup_out_descr_phys(common,
+								 dma_addr,
+								 chunk, false);
+		}
+
+		if (ret)
+			return ret;
+
+		count = count - chunk;
+		artpec6_crypto_walk_advance(walk, chunk);
+	}
+
+	if (count)
+		pr_err("EOL unexpected %zu bytes left\n", count);
+
+	return count ? -EINVAL : 0;
+}
+
+
+/** artpec6_crypto_terminate_out_descrs - Set the EOP on the last out descriptor
+ *
+ * If the out descriptor list is non-empty, then the eop flag on the
+ * last used out descriptor will be set.
+ *
+ * @return  0 on success
+ *	-EINVAL if the out descriptor is empty or has overflown
+ */
+static int
+artpec6_crypto_terminate_out_descrs(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (!dma->out_cnt || dma->out_cnt > PDMA_DESCR_COUNT) {
+		pr_err("%s: OUT descriptor list is %s\n",
+			MODULE_NAME, dma->out_cnt ? "empty" : "full");
+		return -EINVAL;
+
+	}
+
+	d = &dma->out[dma->out_cnt-1];
+	d->ctrl.eop = 1;
+
+	return 0;
+}
+
+/** artpec6_crypto_terminate_in_descrs - Set the interrupt flag on the last
+ *                                       in descriptor
+ *
+ * See artpec6_crypto_terminate_out_descrs() for return values
+ */
+static int
+artpec6_crypto_terminate_in_descrs(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (!dma->in_cnt || dma->in_cnt > PDMA_DESCR_COUNT) {
+		pr_err("%s: IN descriptor list is %s\n",
+			MODULE_NAME, dma->in_cnt ? "empty" : "full");
+		return -EINVAL;
+	}
+
+	d = &dma->in[dma->in_cnt-1];
+	d->ctrl.intr = 1;
+	return 0;
+}
+
+/** create_hash_pad - Create a Secure Hash conformant pad
+ *
+ * @dst:      The destination buffer to write the pad. Must be at least 64 bytes
+ * @dgstlen:  The total length of the hash digest in bytes
+ * @bitcount: The total length of the digest in bits
+ *
+ * @return The total number of padding bytes written to @dst
+ */
+static size_t
+create_hash_pad(int oper, unsigned char *dst, u64 dgstlen, u64 bitcount)
+{
+	unsigned int mod, target, diff, pad_bytes, size_bytes;
+	__be64 bits = __cpu_to_be64(bitcount);
+
+	switch (oper) {
+	case regk_crypto_sha1:
+	case regk_crypto_sha256:
+	case regk_crypto_hmac_sha1:
+	case regk_crypto_hmac_sha256:
+		target = 448 / 8;
+		mod = 512 / 8;
+		size_bytes = 8;
+		break;
+	default:
+		target = 896 / 8;
+		mod = 1024 / 8;
+		size_bytes = 16;
+		break;
+	}
+
+	target -= 1;
+	diff = dgstlen & (mod - 1);
+	pad_bytes = diff > target ? target + mod - diff : target - diff;
+
+	memset(dst + 1, 0, pad_bytes);
+	dst[0] = 0x80;
+
+	if (size_bytes == 16) {
+		memset(dst + 1 + pad_bytes, 0, 8);
+		memcpy(dst + 1 + pad_bytes + 8, &bits, 8);
+	} else {
+		memcpy(dst + 1 + pad_bytes, &bits, 8);
+	}
+
+	return pad_bytes + size_bytes + 1;
+}
+
+static int artpec6_crypto_common_init(struct artpec6_crypto_req_common *common,
+		struct crypto_async_request *parent,
+		void (*complete)(struct crypto_async_request *req),
+		struct scatterlist *dstsg, unsigned int nbytes)
+{
+	gfp_t flags;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+
+	flags = (parent->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		 GFP_KERNEL : GFP_ATOMIC;
+
+	common->gfp_flags = flags;
+	common->dma = kmem_cache_alloc(ac->dma_cache, flags);
+	if (!common->dma)
+		return -ENOMEM;
+
+	common->req = parent;
+	common->complete = complete;
+	return 0;
+}
+
+static void
+artpec6_crypto_bounce_destroy(struct artpec6_crypto_dma_descriptors *dma)
+{
+	struct artpec6_crypto_bounce_buffer *b;
+	struct artpec6_crypto_bounce_buffer *next;
+
+	list_for_each_entry_safe(b, next, &dma->bounce_buffers, list) {
+		kfree(b);
+	}
+}
+
+static int
+artpec6_crypto_common_destroy(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+
+	artpec6_crypto_dma_unmap_all(common);
+	artpec6_crypto_bounce_destroy(common->dma);
+	kmem_cache_free(ac->dma_cache, common->dma);
+	common->dma = NULL;
+	return 0;
+}
+
+/*
+ * Ciphering functions.
+ */
+static int artpec6_crypto_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher);
+	struct artpec6_crypto_request_context *req_ctx = NULL;
+	void (*complete)(struct crypto_async_request *req);
+	int ret;
+
+	req_ctx = skcipher_request_ctx(req);
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+	case ARTPEC6_CRYPTO_CIPHER_AES_ECB:
+	case ARTPEC6_CRYPTO_CIPHER_AES_XTS:
+		req_ctx->decrypt = 0;
+		break;
+	default:
+		break;
+	}
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+		complete = artpec6_crypto_complete_cbc_encrypt;
+		break;
+	default:
+		complete = artpec6_crypto_complete_crypto;
+		break;
+	}
+
+	ret = artpec6_crypto_common_init(&req_ctx->common,
+				  &req->base,
+				  complete,
+				  req->dst, req->cryptlen);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_prepare_crypto(req);
+	if (ret) {
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		return ret;
+	}
+
+	return artpec6_crypto_submit(&req_ctx->common);
+}
+
+static int artpec6_crypto_decrypt(struct skcipher_request *req)
+{
+	int ret;
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher);
+	struct artpec6_crypto_request_context *req_ctx = NULL;
+	void (*complete)(struct crypto_async_request *req);
+
+	req_ctx = skcipher_request_ctx(req);
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+	case ARTPEC6_CRYPTO_CIPHER_AES_ECB:
+	case ARTPEC6_CRYPTO_CIPHER_AES_XTS:
+		req_ctx->decrypt = 1;
+		break;
+	default:
+		break;
+	}
+
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+		complete = artpec6_crypto_complete_cbc_decrypt;
+		break;
+	default:
+		complete = artpec6_crypto_complete_crypto;
+		break;
+	}
+
+	ret = artpec6_crypto_common_init(&req_ctx->common, &req->base,
+				  complete,
+				  req->dst, req->cryptlen);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_prepare_crypto(req);
+	if (ret) {
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		return ret;
+	}
+
+	return artpec6_crypto_submit(&req_ctx->common);
+}
+
+static int
+artpec6_crypto_ctr_crypt(struct skcipher_request *req, bool encrypt)
+{
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher);
+	size_t iv_len = crypto_skcipher_ivsize(cipher);
+	unsigned int counter = be32_to_cpup((__be32 *)
+					    (req->iv + iv_len - 4));
+	unsigned int nblks = ALIGN(req->cryptlen, AES_BLOCK_SIZE) /
+			     AES_BLOCK_SIZE;
+
+	/*
+	 * The hardware uses only the last 32-bits as the counter while the
+	 * kernel tests (aes_ctr_enc_tv_template[4] for example) expect that
+	 * the whole IV is a counter.  So fallback if the counter is going to
+	 * overlow.
+	 */
+	if (counter + nblks < counter) {
+		int ret;
+
+		pr_debug("counter %x will overflow (nblks %u), falling back\n",
+			 counter, counter + nblks);
+
+		ret = crypto_skcipher_setkey(ctx->fallback, ctx->aes_key,
+					     ctx->key_length);
+		if (ret)
+			return ret;
+
+		{
+			SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+
+			skcipher_request_set_tfm(subreq, ctx->fallback);
+			skcipher_request_set_callback(subreq, req->base.flags,
+						      NULL, NULL);
+			skcipher_request_set_crypt(subreq, req->src, req->dst,
+						   req->cryptlen, req->iv);
+			ret = encrypt ? crypto_skcipher_encrypt(subreq)
+				      : crypto_skcipher_decrypt(subreq);
+			skcipher_request_zero(subreq);
+		}
+		return ret;
+	}
+
+	return encrypt ? artpec6_crypto_encrypt(req)
+		       : artpec6_crypto_decrypt(req);
+}
+
+static int artpec6_crypto_ctr_encrypt(struct skcipher_request *req)
+{
+	return artpec6_crypto_ctr_crypt(req, true);
+}
+
+static int artpec6_crypto_ctr_decrypt(struct skcipher_request *req)
+{
+	return artpec6_crypto_ctr_crypt(req, false);
+}
+
+/*
+ * AEAD functions
+ */
+static int artpec6_crypto_aead_init(struct crypto_aead *tfm)
+{
+	struct artpec6_cryptotfm_context *tfm_ctx = crypto_aead_ctx(tfm);
+
+	memset(tfm_ctx, 0, sizeof(*tfm_ctx));
+
+	crypto_aead_set_reqsize(tfm,
+				sizeof(struct artpec6_crypto_aead_req_ctx));
+
+	return 0;
+}
+
+static int artpec6_crypto_aead_set_key(struct crypto_aead *tfm, const u8 *key,
+			       unsigned int len)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_tfm_ctx(&tfm->base);
+
+	if (len != 16 && len != 24 && len != 32) {
+		crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -1;
+	}
+
+	ctx->key_length = len;
+
+	memcpy(ctx->aes_key, key, len);
+	return 0;
+}
+
+static int artpec6_crypto_aead_encrypt(struct aead_request *req)
+{
+	int ret;
+	struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(req);
+
+	req_ctx->decrypt = false;
+	ret = artpec6_crypto_common_init(&req_ctx->common, &req->base,
+				  artpec6_crypto_complete_aead,
+				  NULL, 0);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_prepare_aead(req);
+	if (ret) {
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		return ret;
+	}
+
+	return artpec6_crypto_submit(&req_ctx->common);
+}
+
+static int artpec6_crypto_aead_decrypt(struct aead_request *req)
+{
+	int ret;
+	struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(req);
+
+	req_ctx->decrypt = true;
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	ret = artpec6_crypto_common_init(&req_ctx->common,
+				  &req->base,
+				  artpec6_crypto_complete_aead,
+				  NULL, 0);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_prepare_aead(req);
+	if (ret) {
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		return ret;
+	}
+
+	return artpec6_crypto_submit(&req_ctx->common);
+}
+
+static int artpec6_crypto_prepare_hash(struct ahash_request *areq)
+{
+	struct artpec6_hashalg_context *ctx = crypto_tfm_ctx(areq->base.tfm);
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(areq);
+	size_t digestsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(areq));
+	size_t contextsize = digestsize == SHA384_DIGEST_SIZE ?
+		SHA512_DIGEST_SIZE : digestsize;
+	size_t blocksize = crypto_tfm_alg_blocksize(
+		crypto_ahash_tfm(crypto_ahash_reqtfm(areq)));
+	struct artpec6_crypto_req_common *common = &req_ctx->common;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	u32 sel_ctx;
+	bool ext_ctx = false;
+	bool run_hw = false;
+	int error = 0;
+
+	artpec6_crypto_init_dma_operation(common);
+
+	/* Upload HMAC key, must be first the first packet */
+	if (req_ctx->hash_flags & HASH_FLAG_HMAC) {
+		if (variant == ARTPEC6_CRYPTO) {
+			req_ctx->key_md = FIELD_PREP(A6_CRY_MD_OPER,
+						     a6_regk_crypto_dlkey);
+		} else {
+			req_ctx->key_md = FIELD_PREP(A7_CRY_MD_OPER,
+						     a7_regk_crypto_dlkey);
+		}
+
+		/* Copy and pad up the key */
+		memcpy(req_ctx->key_buffer, ctx->hmac_key,
+		       ctx->hmac_key_length);
+		memset(req_ctx->key_buffer + ctx->hmac_key_length, 0,
+		       blocksize - ctx->hmac_key_length);
+
+		error = artpec6_crypto_setup_out_descr(common,
+					(void *)&req_ctx->key_md,
+					sizeof(req_ctx->key_md), false, false);
+		if (error)
+			return error;
+
+		error = artpec6_crypto_setup_out_descr(common,
+					req_ctx->key_buffer, blocksize,
+					true, false);
+		if (error)
+			return error;
+	}
+
+	if (!(req_ctx->hash_flags & HASH_FLAG_INIT_CTX)) {
+		/* Restore context */
+		sel_ctx = regk_crypto_ext;
+		ext_ctx = true;
+	} else {
+		sel_ctx = regk_crypto_init;
+	}
+
+	if (variant == ARTPEC6_CRYPTO) {
+		req_ctx->hash_md &= ~A6_CRY_MD_HASH_SEL_CTX;
+		req_ctx->hash_md |= FIELD_PREP(A6_CRY_MD_HASH_SEL_CTX, sel_ctx);
+
+		/* If this is the final round, set the final flag */
+		if (req_ctx->hash_flags & HASH_FLAG_FINALIZE)
+			req_ctx->hash_md |= A6_CRY_MD_HASH_HMAC_FIN;
+	} else {
+		req_ctx->hash_md &= ~A7_CRY_MD_HASH_SEL_CTX;
+		req_ctx->hash_md |= FIELD_PREP(A7_CRY_MD_HASH_SEL_CTX, sel_ctx);
+
+		/* If this is the final round, set the final flag */
+		if (req_ctx->hash_flags & HASH_FLAG_FINALIZE)
+			req_ctx->hash_md |= A7_CRY_MD_HASH_HMAC_FIN;
+	}
+
+	/* Setup up metadata descriptors */
+	error = artpec6_crypto_setup_out_descr(common,
+				(void *)&req_ctx->hash_md,
+				sizeof(req_ctx->hash_md), false, false);
+	if (error)
+		return error;
+
+	error = artpec6_crypto_setup_in_descr(common, ac->pad_buffer, 4, false);
+	if (error)
+		return error;
+
+	if (ext_ctx) {
+		error = artpec6_crypto_setup_out_descr(common,
+					req_ctx->digeststate,
+					contextsize, false, false);
+
+		if (error)
+			return error;
+	}
+
+	if (req_ctx->hash_flags & HASH_FLAG_UPDATE) {
+		size_t done_bytes = 0;
+		size_t total_bytes = areq->nbytes + req_ctx->partial_bytes;
+		size_t ready_bytes = round_down(total_bytes, blocksize);
+		struct artpec6_crypto_walk walk;
+
+		run_hw = ready_bytes > 0;
+		if (req_ctx->partial_bytes && ready_bytes) {
+			/* We have a partial buffer and will at least some bytes
+			 * to the HW. Empty this partial buffer before tackling
+			 * the SG lists
+			 */
+			memcpy(req_ctx->partial_buffer_out,
+				req_ctx->partial_buffer,
+				req_ctx->partial_bytes);
+
+			error = artpec6_crypto_setup_out_descr(common,
+						req_ctx->partial_buffer_out,
+						req_ctx->partial_bytes,
+						false, true);
+			if (error)
+				return error;
+
+			/* Reset partial buffer */
+			done_bytes += req_ctx->partial_bytes;
+			req_ctx->partial_bytes = 0;
+		}
+
+		artpec6_crypto_walk_init(&walk, areq->src);
+
+		error = artpec6_crypto_setup_sg_descrs_out(common, &walk,
+							   ready_bytes -
+							   done_bytes);
+		if (error)
+			return error;
+
+		if (walk.sg) {
+			size_t sg_skip = ready_bytes - done_bytes;
+			size_t sg_rem = areq->nbytes - sg_skip;
+
+			sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
+					   req_ctx->partial_buffer +
+					   req_ctx->partial_bytes,
+					   sg_rem, sg_skip);
+
+			req_ctx->partial_bytes += sg_rem;
+		}
+
+		req_ctx->digcnt += ready_bytes;
+		req_ctx->hash_flags &= ~(HASH_FLAG_UPDATE);
+	}
+
+	/* Finalize */
+	if (req_ctx->hash_flags & HASH_FLAG_FINALIZE) {
+		bool needtrim = contextsize != digestsize;
+		size_t hash_pad_len;
+		u64 digest_bits;
+		u32 oper;
+
+		if (variant == ARTPEC6_CRYPTO)
+			oper = FIELD_GET(A6_CRY_MD_OPER, req_ctx->hash_md);
+		else
+			oper = FIELD_GET(A7_CRY_MD_OPER, req_ctx->hash_md);
+
+		/* Write out the partial buffer if present */
+		if (req_ctx->partial_bytes) {
+			memcpy(req_ctx->partial_buffer_out,
+			       req_ctx->partial_buffer,
+			       req_ctx->partial_bytes);
+			error = artpec6_crypto_setup_out_descr(common,
+						req_ctx->partial_buffer_out,
+						req_ctx->partial_bytes,
+						false, true);
+			if (error)
+				return error;
+
+			req_ctx->digcnt += req_ctx->partial_bytes;
+			req_ctx->partial_bytes = 0;
+		}
+
+		if (req_ctx->hash_flags & HASH_FLAG_HMAC)
+			digest_bits = 8 * (req_ctx->digcnt + blocksize);
+		else
+			digest_bits = 8 * req_ctx->digcnt;
+
+		/* Add the hash pad */
+		hash_pad_len = create_hash_pad(oper, req_ctx->pad_buffer,
+					       req_ctx->digcnt, digest_bits);
+		error = artpec6_crypto_setup_out_descr(common,
+						      req_ctx->pad_buffer,
+						      hash_pad_len, false,
+						      true);
+		req_ctx->digcnt = 0;
+
+		if (error)
+			return error;
+
+		/* Descriptor for the final result */
+		error = artpec6_crypto_setup_in_descr(common, areq->result,
+						      digestsize,
+						      !needtrim);
+		if (error)
+			return error;
+
+		if (needtrim) {
+			/* Discard the extra context bytes for SHA-384 */
+			error = artpec6_crypto_setup_in_descr(common,
+					req_ctx->partial_buffer,
+					digestsize - contextsize, true);
+			if (error)
+				return error;
+		}
+
+	} else { /* This is not the final operation for this request */
+		if (!run_hw)
+			return ARTPEC6_CRYPTO_PREPARE_HASH_NO_START;
+
+		/* Save the result to the context */
+		error = artpec6_crypto_setup_in_descr(common,
+						      req_ctx->digeststate,
+						      contextsize, false);
+		if (error)
+			return error;
+		/* fall through */
+	}
+
+	req_ctx->hash_flags &= ~(HASH_FLAG_INIT_CTX | HASH_FLAG_UPDATE |
+				 HASH_FLAG_FINALIZE);
+
+	error = artpec6_crypto_terminate_in_descrs(common);
+	if (error)
+		return error;
+
+	error = artpec6_crypto_terminate_out_descrs(common);
+	if (error)
+		return error;
+
+	error = artpec6_crypto_dma_map_descs(common);
+	if (error)
+		return error;
+
+	return ARTPEC6_CRYPTO_PREPARE_HASH_START;
+}
+
+
+static int artpec6_crypto_aes_ecb_init(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	tfm->reqsize = sizeof(struct artpec6_crypto_request_context);
+	ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_ECB;
+
+	return 0;
+}
+
+static int artpec6_crypto_aes_ctr_init(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->fallback = crypto_alloc_skcipher(crypto_tfm_alg_name(&tfm->base),
+					      0,
+					      CRYPTO_ALG_ASYNC |
+					      CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(ctx->fallback))
+		return PTR_ERR(ctx->fallback);
+
+	tfm->reqsize = sizeof(struct artpec6_crypto_request_context);
+	ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_CTR;
+
+	return 0;
+}
+
+static int artpec6_crypto_aes_cbc_init(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	tfm->reqsize = sizeof(struct artpec6_crypto_request_context);
+	ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_CBC;
+
+	return 0;
+}
+
+static int artpec6_crypto_aes_xts_init(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	tfm->reqsize = sizeof(struct artpec6_crypto_request_context);
+	ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_XTS;
+
+	return 0;
+}
+
+static void artpec6_crypto_aes_exit(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	memset(ctx, 0, sizeof(*ctx));
+}
+
+static void artpec6_crypto_aes_ctr_exit(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_skcipher(ctx->fallback);
+	artpec6_crypto_aes_exit(tfm);
+}
+
+static int
+artpec6_crypto_cipher_set_key(struct crypto_skcipher *cipher, const u8 *key,
+			      unsigned int keylen)
+{
+	struct artpec6_cryptotfm_context *ctx =
+		crypto_skcipher_ctx(cipher);
+
+	switch (keylen) {
+	case 16:
+	case 24:
+	case 32:
+		break;
+	default:
+		crypto_skcipher_set_flags(cipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->aes_key, key, keylen);
+	ctx->key_length = keylen;
+	return 0;
+}
+
+static int
+artpec6_crypto_xts_set_key(struct crypto_skcipher *cipher, const u8 *key,
+			      unsigned int keylen)
+{
+	struct artpec6_cryptotfm_context *ctx =
+		crypto_skcipher_ctx(cipher);
+	int ret;
+
+	ret = xts_check_key(&cipher->base, key, keylen);
+	if (ret)
+		return ret;
+
+	switch (keylen) {
+	case 32:
+	case 48:
+	case 64:
+		break;
+	default:
+		crypto_skcipher_set_flags(cipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->aes_key, key, keylen);
+	ctx->key_length = keylen;
+	return 0;
+}
+
+/** artpec6_crypto_process_crypto - Prepare an async block cipher crypto request
+ *
+ * @req: The asynch request to process
+ *
+ * @return 0 if the dma job was successfully prepared
+ *	  <0 on error
+ *
+ * This function sets up the PDMA descriptors for a block cipher request.
+ *
+ * The required padding is added for AES-CTR using a statically defined
+ * buffer.
+ *
+ * The PDMA descriptor list will be as follows:
+ *
+ * OUT: [KEY_MD][KEY][EOP]<CIPHER_MD>[IV]<data_0>...[data_n][AES-CTR_pad]<eop>
+ * IN:  <CIPHER_MD><data_0>...[data_n]<intr>
+ *
+ */
+static int artpec6_crypto_prepare_crypto(struct skcipher_request *areq)
+{
+	int ret;
+	struct artpec6_crypto_walk walk;
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher);
+	struct artpec6_crypto_request_context *req_ctx = NULL;
+	size_t iv_len = crypto_skcipher_ivsize(cipher);
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	struct artpec6_crypto_req_common *common;
+	bool cipher_decr = false;
+	size_t cipher_klen;
+	u32 cipher_len = 0; /* Same as regk_crypto_key_128 for NULL crypto */
+	u32 oper;
+
+	req_ctx = skcipher_request_ctx(areq);
+	common = &req_ctx->common;
+
+	artpec6_crypto_init_dma_operation(common);
+
+	if (variant == ARTPEC6_CRYPTO)
+		ctx->key_md = FIELD_PREP(A6_CRY_MD_OPER, a6_regk_crypto_dlkey);
+	else
+		ctx->key_md = FIELD_PREP(A7_CRY_MD_OPER, a7_regk_crypto_dlkey);
+
+	ret = artpec6_crypto_setup_out_descr(common, (void *)&ctx->key_md,
+					     sizeof(ctx->key_md), false, false);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_setup_out_descr(common, ctx->aes_key,
+					      ctx->key_length, true, false);
+	if (ret)
+		return ret;
+
+	req_ctx->cipher_md = 0;
+
+	if (ctx->crypto_type == ARTPEC6_CRYPTO_CIPHER_AES_XTS)
+		cipher_klen = ctx->key_length/2;
+	else
+		cipher_klen =  ctx->key_length;
+
+	/* Metadata */
+	switch (cipher_klen) {
+	case 16:
+		cipher_len = regk_crypto_key_128;
+		break;
+	case 24:
+		cipher_len = regk_crypto_key_192;
+		break;
+	case 32:
+		cipher_len = regk_crypto_key_256;
+		break;
+	default:
+		pr_err("%s: Invalid key length %d!\n",
+			MODULE_NAME, ctx->key_length);
+		return -EINVAL;
+	}
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_ECB:
+		oper = regk_crypto_aes_ecb;
+		cipher_decr = req_ctx->decrypt;
+		break;
+
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+		oper = regk_crypto_aes_cbc;
+		cipher_decr = req_ctx->decrypt;
+		break;
+
+	case ARTPEC6_CRYPTO_CIPHER_AES_CTR:
+		oper = regk_crypto_aes_ctr;
+		cipher_decr = false;
+		break;
+
+	case ARTPEC6_CRYPTO_CIPHER_AES_XTS:
+		oper = regk_crypto_aes_xts;
+		cipher_decr = req_ctx->decrypt;
+
+		if (variant == ARTPEC6_CRYPTO)
+			req_ctx->cipher_md |= A6_CRY_MD_CIPHER_DSEQ;
+		else
+			req_ctx->cipher_md |= A7_CRY_MD_CIPHER_DSEQ;
+		break;
+
+	default:
+		pr_err("%s: Invalid cipher mode %d!\n",
+			MODULE_NAME, ctx->crypto_type);
+		return -EINVAL;
+	}
+
+	if (variant == ARTPEC6_CRYPTO) {
+		req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_OPER, oper);
+		req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_CIPHER_LEN,
+						 cipher_len);
+		if (cipher_decr)
+			req_ctx->cipher_md |= A6_CRY_MD_CIPHER_DECR;
+	} else {
+		req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_OPER, oper);
+		req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_CIPHER_LEN,
+						 cipher_len);
+		if (cipher_decr)
+			req_ctx->cipher_md |= A7_CRY_MD_CIPHER_DECR;
+	}
+
+	ret = artpec6_crypto_setup_out_descr(common,
+					    &req_ctx->cipher_md,
+					    sizeof(req_ctx->cipher_md),
+					    false, false);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_setup_in_descr(common, ac->pad_buffer, 4, false);
+	if (ret)
+		return ret;
+
+	if (iv_len) {
+		ret = artpec6_crypto_setup_out_descr(common, areq->iv, iv_len,
+						     false, false);
+		if (ret)
+			return ret;
+	}
+	/* Data out */
+	artpec6_crypto_walk_init(&walk, areq->src);
+	ret = artpec6_crypto_setup_sg_descrs_out(common, &walk, areq->cryptlen);
+	if (ret)
+		return ret;
+
+	/* Data in */
+	artpec6_crypto_walk_init(&walk, areq->dst);
+	ret = artpec6_crypto_setup_sg_descrs_in(common, &walk, areq->cryptlen);
+	if (ret)
+		return ret;
+
+	/* CTR-mode padding required by the HW. */
+	if (ctx->crypto_type == ARTPEC6_CRYPTO_CIPHER_AES_CTR ||
+	    ctx->crypto_type == ARTPEC6_CRYPTO_CIPHER_AES_XTS) {
+		size_t pad = ALIGN(areq->cryptlen, AES_BLOCK_SIZE) -
+			     areq->cryptlen;
+
+		if (pad) {
+			ret = artpec6_crypto_setup_out_descr(common,
+							     ac->pad_buffer,
+							     pad, false, false);
+			if (ret)
+				return ret;
+
+			ret = artpec6_crypto_setup_in_descr(common,
+							    ac->pad_buffer, pad,
+							    false);
+			if (ret)
+				return ret;
+		}
+	}
+
+	ret = artpec6_crypto_terminate_out_descrs(common);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_terminate_in_descrs(common);
+	if (ret)
+		return ret;
+
+	return artpec6_crypto_dma_map_descs(common);
+}
+
+static int artpec6_crypto_prepare_aead(struct aead_request *areq)
+{
+	size_t count;
+	int ret;
+	size_t input_length;
+	struct artpec6_cryptotfm_context *ctx = crypto_tfm_ctx(areq->base.tfm);
+	struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(areq);
+	struct crypto_aead *cipher = crypto_aead_reqtfm(areq);
+	struct artpec6_crypto_req_common *common = &req_ctx->common;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	u32 md_cipher_len;
+
+	artpec6_crypto_init_dma_operation(common);
+
+	/* Key */
+	if (variant == ARTPEC6_CRYPTO) {
+		ctx->key_md = FIELD_PREP(A6_CRY_MD_OPER,
+					 a6_regk_crypto_dlkey);
+	} else {
+		ctx->key_md = FIELD_PREP(A7_CRY_MD_OPER,
+					 a7_regk_crypto_dlkey);
+	}
+	ret = artpec6_crypto_setup_out_descr(common, (void *)&ctx->key_md,
+					     sizeof(ctx->key_md), false, false);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_setup_out_descr(common, ctx->aes_key,
+					     ctx->key_length, true, false);
+	if (ret)
+		return ret;
+
+	req_ctx->cipher_md = 0;
+
+	switch (ctx->key_length) {
+	case 16:
+		md_cipher_len = regk_crypto_key_128;
+		break;
+	case 24:
+		md_cipher_len = regk_crypto_key_192;
+		break;
+	case 32:
+		md_cipher_len = regk_crypto_key_256;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (variant == ARTPEC6_CRYPTO) {
+		req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_OPER,
+						 regk_crypto_aes_gcm);
+		req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_CIPHER_LEN,
+						 md_cipher_len);
+		if (req_ctx->decrypt)
+			req_ctx->cipher_md |= A6_CRY_MD_CIPHER_DECR;
+	} else {
+		req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_OPER,
+						 regk_crypto_aes_gcm);
+		req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_CIPHER_LEN,
+						 md_cipher_len);
+		if (req_ctx->decrypt)
+			req_ctx->cipher_md |= A7_CRY_MD_CIPHER_DECR;
+	}
+
+	ret = artpec6_crypto_setup_out_descr(common,
+					    (void *) &req_ctx->cipher_md,
+					    sizeof(req_ctx->cipher_md), false,
+					    false);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_setup_in_descr(common, ac->pad_buffer, 4, false);
+	if (ret)
+		return ret;
+
+	/* For the decryption, cryptlen includes the tag. */
+	input_length = areq->cryptlen;
+	if (req_ctx->decrypt)
+		input_length -= AES_BLOCK_SIZE;
+
+	/* Prepare the context buffer */
+	req_ctx->hw_ctx.aad_length_bits =
+		__cpu_to_be64(8*areq->assoclen);
+
+	req_ctx->hw_ctx.text_length_bits =
+		__cpu_to_be64(8*input_length);
+
+	memcpy(req_ctx->hw_ctx.J0, areq->iv, crypto_aead_ivsize(cipher));
+	// The HW omits the initial increment of the counter field.
+	crypto_inc(req_ctx->hw_ctx.J0+12, 4);
+
+	ret = artpec6_crypto_setup_out_descr(common, &req_ctx->hw_ctx,
+		sizeof(struct artpec6_crypto_aead_hw_ctx), false, false);
+	if (ret)
+		return ret;
+
+	{
+		struct artpec6_crypto_walk walk;
+
+		artpec6_crypto_walk_init(&walk, areq->src);
+
+		/* Associated data */
+		count = areq->assoclen;
+		ret = artpec6_crypto_setup_sg_descrs_out(common, &walk, count);
+		if (ret)
+			return ret;
+
+		if (!IS_ALIGNED(areq->assoclen, 16)) {
+			size_t assoc_pad = 16 - (areq->assoclen % 16);
+			/* The HW mandates zero padding here */
+			ret = artpec6_crypto_setup_out_descr(common,
+							     ac->zero_buffer,
+							     assoc_pad, false,
+							     false);
+			if (ret)
+				return ret;
+		}
+
+		/* Data to crypto */
+		count = input_length;
+		ret = artpec6_crypto_setup_sg_descrs_out(common, &walk, count);
+		if (ret)
+			return ret;
+
+		if (!IS_ALIGNED(input_length, 16)) {
+			size_t crypto_pad = 16 - (input_length % 16);
+			/* The HW mandates zero padding here */
+			ret = artpec6_crypto_setup_out_descr(common,
+							     ac->zero_buffer,
+							     crypto_pad,
+							     false,
+							     false);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/* Data from crypto */
+	{
+		struct artpec6_crypto_walk walk;
+		size_t output_len = areq->cryptlen;
+
+		if (req_ctx->decrypt)
+			output_len -= AES_BLOCK_SIZE;
+
+		artpec6_crypto_walk_init(&walk, areq->dst);
+
+		/* skip associated data in the output */
+		count = artpec6_crypto_walk_advance(&walk, areq->assoclen);
+		if (count)
+			return -EINVAL;
+
+		count = output_len;
+		ret = artpec6_crypto_setup_sg_descrs_in(common, &walk, count);
+		if (ret)
+			return ret;
+
+		/* Put padding between the cryptotext and the auth tag */
+		if (!IS_ALIGNED(output_len, 16)) {
+			size_t crypto_pad = 16 - (output_len % 16);
+
+			ret = artpec6_crypto_setup_in_descr(common,
+							    ac->pad_buffer,
+							    crypto_pad, false);
+			if (ret)
+				return ret;
+		}
+
+		/* The authentication tag shall follow immediately after
+		 * the output ciphertext. For decryption it is put in a context
+		 * buffer for later compare against the input tag.
+		 */
+		count = AES_BLOCK_SIZE;
+
+		if (req_ctx->decrypt) {
+			ret = artpec6_crypto_setup_in_descr(common,
+				req_ctx->decryption_tag, count, false);
+			if (ret)
+				return ret;
+
+		} else {
+			ret = artpec6_crypto_setup_sg_descrs_in(common, &walk,
+								count);
+			if (ret)
+				return ret;
+		}
+
+	}
+
+	ret = artpec6_crypto_terminate_in_descrs(common);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_terminate_out_descrs(common);
+	if (ret)
+		return ret;
+
+	return artpec6_crypto_dma_map_descs(common);
+}
+
+static void artpec6_crypto_process_queue(struct artpec6_crypto *ac)
+{
+	struct artpec6_crypto_req_common *req;
+
+	while (!list_empty(&ac->queue) && !artpec6_crypto_busy()) {
+		req = list_first_entry(&ac->queue,
+				       struct artpec6_crypto_req_common,
+				       list);
+		list_move_tail(&req->list, &ac->pending);
+		artpec6_crypto_start_dma(req);
+
+		req->req->complete(req->req, -EINPROGRESS);
+	}
+
+	/*
+	 * In some cases, the hardware can raise an in_eop_flush interrupt
+	 * before actually updating the status, so we have an timer which will
+	 * recheck the status on timeout.  Since the cases are expected to be
+	 * very rare, we use a relatively large timeout value.  There should be
+	 * no noticeable negative effect if we timeout spuriously.
+	 */
+	if (ac->pending_count)
+		mod_timer(&ac->timer, jiffies + msecs_to_jiffies(100));
+	else
+		del_timer(&ac->timer);
+}
+
+static void artpec6_crypto_timeout(unsigned long data)
+{
+	struct artpec6_crypto *ac = (struct artpec6_crypto *) data;
+
+	dev_info_ratelimited(artpec6_crypto_dev, "timeout\n");
+
+	tasklet_schedule(&ac->task);
+}
+
+static void artpec6_crypto_task(unsigned long data)
+{
+	struct artpec6_crypto *ac = (struct artpec6_crypto *)data;
+	struct artpec6_crypto_req_common *req;
+	struct artpec6_crypto_req_common *n;
+
+	if (list_empty(&ac->pending)) {
+		pr_debug("Spurious IRQ\n");
+		return;
+	}
+
+	spin_lock_bh(&ac->queue_lock);
+
+	list_for_each_entry_safe(req, n, &ac->pending, list) {
+		struct artpec6_crypto_dma_descriptors *dma = req->dma;
+		u32 stat;
+
+		dma_sync_single_for_cpu(artpec6_crypto_dev, dma->stat_dma_addr,
+					sizeof(dma->stat[0]),
+					DMA_BIDIRECTIONAL);
+
+		stat = req->dma->stat[req->dma->in_cnt-1];
+
+		/* A non-zero final status descriptor indicates
+		 * this job has finished.
+		 */
+		pr_debug("Request %p status is %X\n", req, stat);
+		if (!stat)
+			break;
+
+		/* Allow testing of timeout handling with fault injection */
+#ifdef CONFIG_FAULT_INJECTION
+		if (should_fail(&artpec6_crypto_fail_status_read, 1))
+			continue;
+#endif
+
+		pr_debug("Completing request %p\n", req);
+
+		list_del(&req->list);
+
+		artpec6_crypto_dma_unmap_all(req);
+		artpec6_crypto_copy_bounce_buffers(req);
+
+		ac->pending_count--;
+		artpec6_crypto_common_destroy(req);
+		req->complete(req->req);
+	}
+
+	artpec6_crypto_process_queue(ac);
+
+	spin_unlock_bh(&ac->queue_lock);
+}
+
+static void artpec6_crypto_complete_crypto(struct crypto_async_request *req)
+{
+	req->complete(req, 0);
+}
+
+static void
+artpec6_crypto_complete_cbc_decrypt(struct crypto_async_request *req)
+{
+	struct skcipher_request *cipher_req = container_of(req,
+		struct skcipher_request, base);
+
+	scatterwalk_map_and_copy(cipher_req->iv, cipher_req->src,
+				 cipher_req->cryptlen - AES_BLOCK_SIZE,
+				 AES_BLOCK_SIZE, 0);
+	req->complete(req, 0);
+}
+
+static void
+artpec6_crypto_complete_cbc_encrypt(struct crypto_async_request *req)
+{
+	struct skcipher_request *cipher_req = container_of(req,
+		struct skcipher_request, base);
+
+	scatterwalk_map_and_copy(cipher_req->iv, cipher_req->dst,
+				 cipher_req->cryptlen - AES_BLOCK_SIZE,
+				 AES_BLOCK_SIZE, 0);
+	req->complete(req, 0);
+}
+
+static void artpec6_crypto_complete_aead(struct crypto_async_request *req)
+{
+	int result = 0;
+
+	/* Verify GCM hashtag. */
+	struct aead_request *areq = container_of(req,
+		struct aead_request, base);
+	struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(areq);
+
+	if (req_ctx->decrypt) {
+		u8 input_tag[AES_BLOCK_SIZE];
+
+		sg_pcopy_to_buffer(areq->src,
+				   sg_nents(areq->src),
+				   input_tag,
+				   AES_BLOCK_SIZE,
+				   areq->assoclen + areq->cryptlen -
+				   AES_BLOCK_SIZE);
+
+		if (memcmp(req_ctx->decryption_tag,
+			   input_tag,
+			   AES_BLOCK_SIZE)) {
+			pr_debug("***EBADMSG:\n");
+			print_hex_dump_debug("ref:", DUMP_PREFIX_ADDRESS, 32, 1,
+					     input_tag, AES_BLOCK_SIZE, true);
+			print_hex_dump_debug("out:", DUMP_PREFIX_ADDRESS, 32, 1,
+					     req_ctx->decryption_tag,
+					     AES_BLOCK_SIZE, true);
+
+			result = -EBADMSG;
+		}
+	}
+
+	req->complete(req, result);
+}
+
+static void artpec6_crypto_complete_hash(struct crypto_async_request *req)
+{
+	req->complete(req, 0);
+}
+
+
+/*------------------- Hash functions -----------------------------------------*/
+static int
+artpec6_crypto_hash_set_key(struct crypto_ahash *tfm,
+		    const u8 *key, unsigned int keylen)
+{
+	struct artpec6_hashalg_context *tfm_ctx = crypto_tfm_ctx(&tfm->base);
+	size_t blocksize;
+	int ret;
+
+	if (!keylen) {
+		pr_err("Invalid length (%d) of HMAC key\n",
+			keylen);
+		return -EINVAL;
+	}
+
+	memset(tfm_ctx->hmac_key, 0, sizeof(tfm_ctx->hmac_key));
+
+	blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+
+	if (keylen > blocksize) {
+		SHASH_DESC_ON_STACK(hdesc, tfm_ctx->child_hash);
+
+		hdesc->tfm = tfm_ctx->child_hash;
+		hdesc->flags = crypto_ahash_get_flags(tfm) &
+			       CRYPTO_TFM_REQ_MAY_SLEEP;
+
+		tfm_ctx->hmac_key_length = blocksize;
+		ret = crypto_shash_digest(hdesc, key, keylen,
+					  tfm_ctx->hmac_key);
+		if (ret)
+			return ret;
+
+	} else {
+		memcpy(tfm_ctx->hmac_key, key, keylen);
+		tfm_ctx->hmac_key_length = keylen;
+	}
+
+	return 0;
+}
+
+static int
+artpec6_crypto_init_hash(struct ahash_request *req, u8 type, int hmac)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+	u32 oper;
+
+	memset(req_ctx, 0, sizeof(*req_ctx));
+
+	req_ctx->hash_flags = HASH_FLAG_INIT_CTX;
+	if (hmac)
+		req_ctx->hash_flags |= (HASH_FLAG_HMAC | HASH_FLAG_UPDATE_KEY);
+
+	switch (type) {
+	case ARTPEC6_CRYPTO_HASH_SHA1:
+		oper = hmac ? regk_crypto_hmac_sha1 : regk_crypto_sha1;
+		break;
+	case ARTPEC6_CRYPTO_HASH_SHA256:
+		oper = hmac ? regk_crypto_hmac_sha256 : regk_crypto_sha256;
+		break;
+	case ARTPEC6_CRYPTO_HASH_SHA384:
+		oper = hmac ? regk_crypto_hmac_sha384 : regk_crypto_sha384;
+		break;
+	case ARTPEC6_CRYPTO_HASH_SHA512:
+		oper = hmac ? regk_crypto_hmac_sha512 : regk_crypto_sha512;
+		break;
+
+	default:
+		pr_err("%s: Unsupported hash type 0x%x\n", MODULE_NAME, type);
+		return -EINVAL;
+	}
+
+	if (variant == ARTPEC6_CRYPTO)
+		req_ctx->hash_md = FIELD_PREP(A6_CRY_MD_OPER, oper);
+	else
+		req_ctx->hash_md = FIELD_PREP(A7_CRY_MD_OPER, oper);
+
+	return 0;
+}
+
+static int artpec6_crypto_prepare_submit_hash(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+	int ret;
+
+	if (!req_ctx->common.dma) {
+		ret = artpec6_crypto_common_init(&req_ctx->common,
+					  &req->base,
+					  artpec6_crypto_complete_hash,
+					  NULL, 0);
+
+		if (ret)
+			return ret;
+	}
+
+	ret = artpec6_crypto_prepare_hash(req);
+	switch (ret) {
+	case ARTPEC6_CRYPTO_PREPARE_HASH_START:
+		ret = artpec6_crypto_submit(&req_ctx->common);
+		break;
+
+	case ARTPEC6_CRYPTO_PREPARE_HASH_NO_START:
+		ret = 0;
+		/* Fallthrough */
+
+	default:
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		break;
+	}
+
+	return ret;
+}
+
+static int artpec6_crypto_hash_final(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	req_ctx->hash_flags |= HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_hash_update(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_sha1_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA1, 0);
+}
+
+static int artpec6_crypto_sha1_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA1, 0);
+
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_sha256_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 0);
+}
+
+static int artpec6_crypto_sha256_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 0);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int __maybe_unused artpec6_crypto_sha384_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 0);
+}
+
+static int __maybe_unused
+artpec6_crypto_sha384_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 0);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_sha512_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 0);
+}
+
+static int artpec6_crypto_sha512_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 0);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_hmac_sha256_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 1);
+}
+
+static int __maybe_unused
+artpec6_crypto_hmac_sha384_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 1);
+}
+
+static int artpec6_crypto_hmac_sha512_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 1);
+}
+
+static int artpec6_crypto_hmac_sha256_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 1);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int __maybe_unused
+artpec6_crypto_hmac_sha384_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 1);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_hmac_sha512_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 1);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_ahash_init_common(struct crypto_tfm *tfm,
+				    const char *base_hash_name)
+{
+	struct artpec6_hashalg_context *tfm_ctx = crypto_tfm_ctx(tfm);
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct artpec6_hash_request_context));
+	memset(tfm_ctx, 0, sizeof(*tfm_ctx));
+
+	if (base_hash_name) {
+		struct crypto_shash *child;
+
+		child = crypto_alloc_shash(base_hash_name, 0,
+					   CRYPTO_ALG_NEED_FALLBACK);
+
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		tfm_ctx->child_hash = child;
+	}
+
+	return 0;
+}
+
+static int artpec6_crypto_ahash_init(struct crypto_tfm *tfm)
+{
+	return artpec6_crypto_ahash_init_common(tfm, NULL);
+}
+
+static int artpec6_crypto_ahash_init_hmac_sha256(struct crypto_tfm *tfm)
+{
+	return artpec6_crypto_ahash_init_common(tfm, "sha256");
+}
+
+static int __maybe_unused
+artpec6_crypto_ahash_init_hmac_sha384(struct crypto_tfm *tfm)
+{
+	return artpec6_crypto_ahash_init_common(tfm, "sha384");
+}
+
+static int artpec6_crypto_ahash_init_hmac_sha512(struct crypto_tfm *tfm)
+{
+	return artpec6_crypto_ahash_init_common(tfm, "sha512");
+}
+
+static void artpec6_crypto_ahash_exit(struct crypto_tfm *tfm)
+{
+	struct artpec6_hashalg_context *tfm_ctx = crypto_tfm_ctx(tfm);
+
+	if (tfm_ctx->child_hash)
+		crypto_free_shash(tfm_ctx->child_hash);
+
+	memset(tfm_ctx->hmac_key, 0, sizeof(tfm_ctx->hmac_key));
+	tfm_ctx->hmac_key_length = 0;
+}
+
+static int artpec6_crypto_hash_export(struct ahash_request *req, void *out)
+{
+	const struct artpec6_hash_request_context *ctx = ahash_request_ctx(req);
+	struct artpec6_hash_export_state *state = out;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+
+	BUILD_BUG_ON(sizeof(state->partial_buffer) !=
+		     sizeof(ctx->partial_buffer));
+	BUILD_BUG_ON(sizeof(state->digeststate) != sizeof(ctx->digeststate));
+
+	state->digcnt = ctx->digcnt;
+	state->partial_bytes = ctx->partial_bytes;
+	state->hash_flags = ctx->hash_flags;
+
+	if (variant == ARTPEC6_CRYPTO)
+		state->oper = FIELD_GET(A6_CRY_MD_OPER, ctx->hash_md);
+	else
+		state->oper = FIELD_GET(A7_CRY_MD_OPER, ctx->hash_md);
+
+	memcpy(state->partial_buffer, ctx->partial_buffer,
+	       sizeof(state->partial_buffer));
+	memcpy(state->digeststate, ctx->digeststate,
+	       sizeof(state->digeststate));
+
+	return 0;
+}
+
+static int artpec6_crypto_hash_import(struct ahash_request *req, const void *in)
+{
+	struct artpec6_hash_request_context *ctx = ahash_request_ctx(req);
+	const struct artpec6_hash_export_state *state = in;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	ctx->digcnt = state->digcnt;
+	ctx->partial_bytes = state->partial_bytes;
+	ctx->hash_flags = state->hash_flags;
+
+	if (variant == ARTPEC6_CRYPTO)
+		ctx->hash_md = FIELD_PREP(A6_CRY_MD_OPER, state->oper);
+	else
+		ctx->hash_md = FIELD_PREP(A7_CRY_MD_OPER, state->oper);
+
+	memcpy(ctx->partial_buffer, state->partial_buffer,
+	       sizeof(state->partial_buffer));
+	memcpy(ctx->digeststate, state->digeststate,
+	       sizeof(state->digeststate));
+
+	return 0;
+}
+
+static int init_crypto_hw(struct artpec6_crypto *ac)
+{
+	enum artpec6_crypto_variant variant = ac->variant;
+	void __iomem *base = ac->base;
+	u32 out_descr_buf_size;
+	u32 out_data_buf_size;
+	u32 in_data_buf_size;
+	u32 in_descr_buf_size;
+	u32 in_stat_buf_size;
+	u32 in, out;
+
+	/*
+	 * The PDMA unit contains 1984 bytes of internal memory for the OUT
+	 * channels and 1024 bytes for the IN channel. This is an elastic
+	 * memory used to internally store the descriptors and data. The values
+	 * ares specified in 64 byte incremements.  Trustzone buffers are not
+	 * used at this stage.
+	 */
+	out_data_buf_size = 16;  /* 1024 bytes for data */
+	out_descr_buf_size = 15; /* 960 bytes for descriptors */
+	in_data_buf_size = 8;    /* 512 bytes for data */
+	in_descr_buf_size = 4;   /* 256 bytes for descriptors */
+	in_stat_buf_size = 4;   /* 256 bytes for stat descrs */
+
+	BUILD_BUG_ON_MSG((out_data_buf_size
+				+ out_descr_buf_size) * 64 > 1984,
+			  "Invalid OUT configuration");
+
+	BUILD_BUG_ON_MSG((in_data_buf_size
+				+ in_descr_buf_size
+				+ in_stat_buf_size) * 64 > 1024,
+			  "Invalid IN configuration");
+
+	in = FIELD_PREP(PDMA_IN_BUF_CFG_DATA_BUF_SIZE, in_data_buf_size) |
+	     FIELD_PREP(PDMA_IN_BUF_CFG_DESCR_BUF_SIZE, in_descr_buf_size) |
+	     FIELD_PREP(PDMA_IN_BUF_CFG_STAT_BUF_SIZE, in_stat_buf_size);
+
+	out = FIELD_PREP(PDMA_OUT_BUF_CFG_DATA_BUF_SIZE, out_data_buf_size) |
+	      FIELD_PREP(PDMA_OUT_BUF_CFG_DESCR_BUF_SIZE, out_descr_buf_size);
+
+	writel_relaxed(out, base + PDMA_OUT_BUF_CFG);
+	writel_relaxed(PDMA_OUT_CFG_EN, base + PDMA_OUT_CFG);
+
+	if (variant == ARTPEC6_CRYPTO) {
+		writel_relaxed(in, base + A6_PDMA_IN_BUF_CFG);
+		writel_relaxed(PDMA_IN_CFG_EN, base + A6_PDMA_IN_CFG);
+		writel_relaxed(A6_PDMA_INTR_MASK_IN_DATA |
+			       A6_PDMA_INTR_MASK_IN_EOP_FLUSH,
+			       base + A6_PDMA_INTR_MASK);
+	} else {
+		writel_relaxed(in, base + A7_PDMA_IN_BUF_CFG);
+		writel_relaxed(PDMA_IN_CFG_EN, base + A7_PDMA_IN_CFG);
+		writel_relaxed(A7_PDMA_INTR_MASK_IN_DATA |
+			       A7_PDMA_INTR_MASK_IN_EOP_FLUSH,
+			       base + A7_PDMA_INTR_MASK);
+	}
+
+	return 0;
+}
+
+static void artpec6_crypto_disable_hw(struct artpec6_crypto *ac)
+{
+	enum artpec6_crypto_variant variant = ac->variant;
+	void __iomem *base = ac->base;
+
+	if (variant == ARTPEC6_CRYPTO) {
+		writel_relaxed(A6_PDMA_IN_CMD_STOP, base + A6_PDMA_IN_CMD);
+		writel_relaxed(0, base + A6_PDMA_IN_CFG);
+		writel_relaxed(A6_PDMA_OUT_CMD_STOP, base + PDMA_OUT_CMD);
+	} else {
+		writel_relaxed(A7_PDMA_IN_CMD_STOP, base + A7_PDMA_IN_CMD);
+		writel_relaxed(0, base + A7_PDMA_IN_CFG);
+		writel_relaxed(A7_PDMA_OUT_CMD_STOP, base + PDMA_OUT_CMD);
+	}
+
+	writel_relaxed(0, base + PDMA_OUT_CFG);
+
+}
+
+static irqreturn_t artpec6_crypto_irq(int irq, void *dev_id)
+{
+	struct artpec6_crypto *ac = dev_id;
+	enum artpec6_crypto_variant variant = ac->variant;
+	void __iomem *base = ac->base;
+	u32 mask_in_data, mask_in_eop_flush;
+	u32 in_cmd_flush_stat, in_cmd_reg;
+	u32 ack_intr_reg;
+	u32 ack = 0;
+	u32 intr;
+
+	if (variant == ARTPEC6_CRYPTO) {
+		intr = readl_relaxed(base + A6_PDMA_MASKED_INTR);
+		mask_in_data = A6_PDMA_INTR_MASK_IN_DATA;
+		mask_in_eop_flush = A6_PDMA_INTR_MASK_IN_EOP_FLUSH;
+		in_cmd_flush_stat = A6_PDMA_IN_CMD_FLUSH_STAT;
+		in_cmd_reg = A6_PDMA_IN_CMD;
+		ack_intr_reg = A6_PDMA_ACK_INTR;
+	} else {
+		intr = readl_relaxed(base + A7_PDMA_MASKED_INTR);
+		mask_in_data = A7_PDMA_INTR_MASK_IN_DATA;
+		mask_in_eop_flush = A7_PDMA_INTR_MASK_IN_EOP_FLUSH;
+		in_cmd_flush_stat = A7_PDMA_IN_CMD_FLUSH_STAT;
+		in_cmd_reg = A7_PDMA_IN_CMD;
+		ack_intr_reg = A7_PDMA_ACK_INTR;
+	}
+
+	/* We get two interrupt notifications from each job.
+	 * The in_data means all data was sent to memory and then
+	 * we request a status flush command to write the per-job
+	 * status to its status vector. This ensures that the
+	 * tasklet can detect exactly how many submitted jobs
+	 * that have finished.
+	 */
+	if (intr & mask_in_data)
+		ack |= mask_in_data;
+
+	if (intr & mask_in_eop_flush)
+		ack |= mask_in_eop_flush;
+	else
+		writel_relaxed(in_cmd_flush_stat, base + in_cmd_reg);
+
+	writel_relaxed(ack, base + ack_intr_reg);
+
+	if (intr & mask_in_eop_flush)
+		tasklet_schedule(&ac->task);
+
+	return IRQ_HANDLED;
+}
+
+/*------------------- Algorithm definitions ----------------------------------*/
+
+/* Hashes */
+static struct ahash_alg hash_algos[] = {
+	/* SHA-1 */
+	{
+		.init = artpec6_crypto_sha1_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_sha1_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.halg.digestsize = SHA1_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "sha1",
+			.cra_driver_name = "artpec-sha1",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA1_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* SHA-256 */
+	{
+		.init = artpec6_crypto_sha256_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_sha256_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.halg.digestsize = SHA256_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "sha256",
+			.cra_driver_name = "artpec-sha256",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA256_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* HMAC SHA-256 */
+	{
+		.init = artpec6_crypto_hmac_sha256_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_hmac_sha256_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.setkey = artpec6_crypto_hash_set_key,
+		.halg.digestsize = SHA256_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "hmac(sha256)",
+			.cra_driver_name = "artpec-hmac-sha256",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA256_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init_hmac_sha256,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+};
+
+static struct ahash_alg artpec7_hash_algos[] = {
+	/* SHA-384 */
+	{
+		.init = artpec6_crypto_sha384_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_sha384_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.halg.digestsize = SHA384_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "sha384",
+			.cra_driver_name = "artpec-sha384",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA384_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* HMAC SHA-384 */
+	{
+		.init = artpec6_crypto_hmac_sha384_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_hmac_sha384_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.setkey = artpec6_crypto_hash_set_key,
+		.halg.digestsize = SHA384_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "hmac(sha384)",
+			.cra_driver_name = "artpec-hmac-sha384",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA384_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init_hmac_sha384,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* SHA-512 */
+	{
+		.init = artpec6_crypto_sha512_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_sha512_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.halg.digestsize = SHA512_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "sha512",
+			.cra_driver_name = "artpec-sha512",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA512_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* HMAC SHA-512 */
+	{
+		.init = artpec6_crypto_hmac_sha512_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_hmac_sha512_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.setkey = artpec6_crypto_hash_set_key,
+		.halg.digestsize = SHA512_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "hmac(sha512)",
+			.cra_driver_name = "artpec-hmac-sha512",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA512_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init_hmac_sha512,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+};
+
+/* Crypto */
+static struct skcipher_alg crypto_algos[] = {
+	/* AES - ECB */
+	{
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "artpec6-ecb-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.setkey = artpec6_crypto_cipher_set_key,
+		.encrypt = artpec6_crypto_encrypt,
+		.decrypt = artpec6_crypto_decrypt,
+		.init = artpec6_crypto_aes_ecb_init,
+		.exit = artpec6_crypto_aes_exit,
+	},
+	/* AES - CTR */
+	{
+		.base = {
+			.cra_name = "ctr(aes)",
+			.cra_driver_name = "artpec6-ctr-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize = AES_BLOCK_SIZE,
+		.setkey = artpec6_crypto_cipher_set_key,
+		.encrypt = artpec6_crypto_ctr_encrypt,
+		.decrypt = artpec6_crypto_ctr_decrypt,
+		.init = artpec6_crypto_aes_ctr_init,
+		.exit = artpec6_crypto_aes_ctr_exit,
+	},
+	/* AES - CBC */
+	{
+		.base = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "artpec6-cbc-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize = AES_BLOCK_SIZE,
+		.setkey = artpec6_crypto_cipher_set_key,
+		.encrypt = artpec6_crypto_encrypt,
+		.decrypt = artpec6_crypto_decrypt,
+		.init = artpec6_crypto_aes_cbc_init,
+		.exit = artpec6_crypto_aes_exit
+	},
+	/* AES - XTS */
+	{
+		.base = {
+			.cra_name = "xts(aes)",
+			.cra_driver_name = "artpec6-xts-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_ASYNC,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = 2*AES_MIN_KEY_SIZE,
+		.max_keysize = 2*AES_MAX_KEY_SIZE,
+		.ivsize = 16,
+		.setkey = artpec6_crypto_xts_set_key,
+		.encrypt = artpec6_crypto_encrypt,
+		.decrypt = artpec6_crypto_decrypt,
+		.init = artpec6_crypto_aes_xts_init,
+		.exit = artpec6_crypto_aes_exit,
+	},
+};
+
+static struct aead_alg aead_algos[] = {
+	{
+		.init   = artpec6_crypto_aead_init,
+		.setkey = artpec6_crypto_aead_set_key,
+		.encrypt = artpec6_crypto_aead_encrypt,
+		.decrypt = artpec6_crypto_aead_decrypt,
+		.ivsize = AES_BLOCK_SIZE,
+		.maxauthsize = AES_BLOCK_SIZE,
+
+		.base = {
+			.cra_name = "gcm(aes)",
+			.cra_driver_name = "artpec-gcm-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+	}
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+struct dbgfs_u32 {
+	char *name;
+	mode_t mode;
+	u32 *flag;
+	char *desc;
+};
+
+static void artpec6_crypto_init_debugfs(void)
+{
+	dbgfs_root = debugfs_create_dir("artpec6_crypto", NULL);
+
+	if (!dbgfs_root || IS_ERR(dbgfs_root)) {
+		dbgfs_root = NULL;
+		pr_err("%s: Could not initialise debugfs!\n", MODULE_NAME);
+		return;
+	}
+
+#ifdef CONFIG_FAULT_INJECTION
+	fault_create_debugfs_attr("fail_status_read", dbgfs_root,
+				  &artpec6_crypto_fail_status_read);
+
+	fault_create_debugfs_attr("fail_dma_array_full", dbgfs_root,
+				  &artpec6_crypto_fail_dma_array_full);
+#endif
+}
+
+static void artpec6_crypto_free_debugfs(void)
+{
+	if (!dbgfs_root)
+		return;
+
+	debugfs_remove_recursive(dbgfs_root);
+	dbgfs_root = NULL;
+}
+#endif
+
+static const struct of_device_id artpec6_crypto_of_match[] = {
+	{ .compatible = "axis,artpec6-crypto", .data = (void *)ARTPEC6_CRYPTO },
+	{ .compatible = "axis,artpec7-crypto", .data = (void *)ARTPEC7_CRYPTO },
+	{}
+};
+MODULE_DEVICE_TABLE(of, artpec6_crypto_of_match);
+
+static int artpec6_crypto_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	enum artpec6_crypto_variant variant;
+	struct artpec6_crypto *ac;
+	struct device *dev = &pdev->dev;
+	void __iomem *base;
+	struct resource *res;
+	int irq;
+	int err;
+
+	if (artpec6_crypto_dev)
+		return -ENODEV;
+
+	match = of_match_node(artpec6_crypto_of_match, dev->of_node);
+	if (!match)
+		return -EINVAL;
+
+	variant = (enum artpec6_crypto_variant)match->data;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return -ENODEV;
+
+	ac = devm_kzalloc(&pdev->dev, sizeof(struct artpec6_crypto),
+			  GFP_KERNEL);
+	if (!ac)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, ac);
+	ac->variant = variant;
+
+	spin_lock_init(&ac->queue_lock);
+	INIT_LIST_HEAD(&ac->queue);
+	INIT_LIST_HEAD(&ac->pending);
+	setup_timer(&ac->timer, artpec6_crypto_timeout, (unsigned long) ac);
+
+	ac->base = base;
+
+	ac->dma_cache = kmem_cache_create("artpec6_crypto_dma",
+		sizeof(struct artpec6_crypto_dma_descriptors),
+		64,
+		0,
+		NULL);
+	if (!ac->dma_cache)
+		return -ENOMEM;
+
+#ifdef CONFIG_DEBUG_FS
+	artpec6_crypto_init_debugfs();
+#endif
+
+	tasklet_init(&ac->task, artpec6_crypto_task,
+		     (unsigned long)ac);
+
+	ac->pad_buffer = devm_kzalloc(&pdev->dev, 2 * ARTPEC_CACHE_LINE_MAX,
+				      GFP_KERNEL);
+	if (!ac->pad_buffer)
+		return -ENOMEM;
+	ac->pad_buffer = PTR_ALIGN(ac->pad_buffer, ARTPEC_CACHE_LINE_MAX);
+
+	ac->zero_buffer = devm_kzalloc(&pdev->dev, 2 * ARTPEC_CACHE_LINE_MAX,
+				      GFP_KERNEL);
+	if (!ac->zero_buffer)
+		return -ENOMEM;
+	ac->zero_buffer = PTR_ALIGN(ac->zero_buffer, ARTPEC_CACHE_LINE_MAX);
+
+	err = init_crypto_hw(ac);
+	if (err)
+		goto free_cache;
+
+	err = devm_request_irq(&pdev->dev, irq, artpec6_crypto_irq, 0,
+			       "artpec6-crypto", ac);
+	if (err)
+		goto disable_hw;
+
+	artpec6_crypto_dev = &pdev->dev;
+
+	err = crypto_register_ahashes(hash_algos, ARRAY_SIZE(hash_algos));
+	if (err) {
+		dev_err(dev, "Failed to register ahashes\n");
+		goto disable_hw;
+	}
+
+	if (variant != ARTPEC6_CRYPTO) {
+		err = crypto_register_ahashes(artpec7_hash_algos,
+					      ARRAY_SIZE(artpec7_hash_algos));
+		if (err) {
+			dev_err(dev, "Failed to register ahashes\n");
+			goto unregister_ahashes;
+		}
+	}
+
+	err = crypto_register_skciphers(crypto_algos, ARRAY_SIZE(crypto_algos));
+	if (err) {
+		dev_err(dev, "Failed to register ciphers\n");
+		goto unregister_a7_ahashes;
+	}
+
+	err = crypto_register_aeads(aead_algos, ARRAY_SIZE(aead_algos));
+	if (err) {
+		dev_err(dev, "Failed to register aeads\n");
+		goto unregister_algs;
+	}
+
+	return 0;
+
+unregister_algs:
+	crypto_unregister_skciphers(crypto_algos, ARRAY_SIZE(crypto_algos));
+unregister_a7_ahashes:
+	if (variant != ARTPEC6_CRYPTO)
+		crypto_unregister_ahashes(artpec7_hash_algos,
+					  ARRAY_SIZE(artpec7_hash_algos));
+unregister_ahashes:
+	crypto_unregister_ahashes(hash_algos, ARRAY_SIZE(hash_algos));
+disable_hw:
+	artpec6_crypto_disable_hw(ac);
+free_cache:
+	kmem_cache_destroy(ac->dma_cache);
+	return err;
+}
+
+static int artpec6_crypto_remove(struct platform_device *pdev)
+{
+	struct artpec6_crypto *ac = platform_get_drvdata(pdev);
+	int irq = platform_get_irq(pdev, 0);
+
+	crypto_unregister_ahashes(hash_algos, ARRAY_SIZE(hash_algos));
+	if (ac->variant != ARTPEC6_CRYPTO)
+		crypto_unregister_ahashes(artpec7_hash_algos,
+					  ARRAY_SIZE(artpec7_hash_algos));
+	crypto_unregister_skciphers(crypto_algos, ARRAY_SIZE(crypto_algos));
+	crypto_unregister_aeads(aead_algos, ARRAY_SIZE(aead_algos));
+
+	tasklet_disable(&ac->task);
+	devm_free_irq(&pdev->dev, irq, ac);
+	tasklet_kill(&ac->task);
+	del_timer_sync(&ac->timer);
+
+	artpec6_crypto_disable_hw(ac);
+
+	kmem_cache_destroy(ac->dma_cache);
+#ifdef CONFIG_DEBUG_FS
+	artpec6_crypto_free_debugfs();
+#endif
+	return 0;
+}
+
+static struct platform_driver artpec6_crypto_driver = {
+	.probe   = artpec6_crypto_probe,
+	.remove  = artpec6_crypto_remove,
+	.driver  = {
+		.name  = "artpec6-crypto",
+		.owner = THIS_MODULE,
+		.of_match_table = artpec6_crypto_of_match,
+	},
+};
+
+module_platform_driver(artpec6_crypto_driver);
+
+MODULE_AUTHOR("Axis Communications AB");
+MODULE_DESCRIPTION("ARTPEC-6 Crypto driver");
+MODULE_LICENSE("GPL");

+ 54 - 60
drivers/crypto/bcm/cipher.c

@@ -90,8 +90,6 @@ static int aead_pri = 150;
 module_param(aead_pri, int, 0644);
 module_param(aead_pri, int, 0644);
 MODULE_PARM_DESC(aead_pri, "Priority for AEAD algos");
 MODULE_PARM_DESC(aead_pri, "Priority for AEAD algos");
 
 
-#define MAX_SPUS 16
-
 /* A type 3 BCM header, expected to precede the SPU header for SPU-M.
 /* A type 3 BCM header, expected to precede the SPU header for SPU-M.
  * Bits 3 and 4 in the first byte encode the channel number (the dma ringset).
  * Bits 3 and 4 in the first byte encode the channel number (the dma ringset).
  * 0x60 - ring 0
  * 0x60 - ring 0
@@ -120,7 +118,7 @@ static u8 select_channel(void)
 {
 {
 	u8 chan_idx = atomic_inc_return(&iproc_priv.next_chan);
 	u8 chan_idx = atomic_inc_return(&iproc_priv.next_chan);
 
 
-	return chan_idx % iproc_priv.spu.num_spu;
+	return chan_idx % iproc_priv.spu.num_chan;
 }
 }
 
 
 /**
 /**
@@ -4528,8 +4526,13 @@ static void spu_functions_register(struct device *dev,
  */
  */
 static int spu_mb_init(struct device *dev)
 static int spu_mb_init(struct device *dev)
 {
 {
-	struct mbox_client *mcl = &iproc_priv.mcl[iproc_priv.spu.num_spu];
-	int err;
+	struct mbox_client *mcl = &iproc_priv.mcl;
+	int err, i;
+
+	iproc_priv.mbox = devm_kcalloc(dev, iproc_priv.spu.num_chan,
+				  sizeof(struct mbox_chan *), GFP_KERNEL);
+	if (!iproc_priv.mbox)
+		return -ENOMEM;
 
 
 	mcl->dev = dev;
 	mcl->dev = dev;
 	mcl->tx_block = false;
 	mcl->tx_block = false;
@@ -4538,25 +4541,33 @@ static int spu_mb_init(struct device *dev)
 	mcl->rx_callback = spu_rx_callback;
 	mcl->rx_callback = spu_rx_callback;
 	mcl->tx_done = NULL;
 	mcl->tx_done = NULL;
 
 
-	iproc_priv.mbox[iproc_priv.spu.num_spu] =
-			mbox_request_channel(mcl, 0);
-	if (IS_ERR(iproc_priv.mbox[iproc_priv.spu.num_spu])) {
-		err = (int)PTR_ERR(iproc_priv.mbox[iproc_priv.spu.num_spu]);
-		dev_err(dev,
-			"Mbox channel %d request failed with err %d",
-			iproc_priv.spu.num_spu, err);
-		iproc_priv.mbox[iproc_priv.spu.num_spu] = NULL;
-		return err;
+	for (i = 0; i < iproc_priv.spu.num_chan; i++) {
+		iproc_priv.mbox[i] = mbox_request_channel(mcl, i);
+		if (IS_ERR(iproc_priv.mbox[i])) {
+			err = (int)PTR_ERR(iproc_priv.mbox[i]);
+			dev_err(dev,
+				"Mbox channel %d request failed with err %d",
+				i, err);
+			iproc_priv.mbox[i] = NULL;
+			goto free_channels;
+		}
 	}
 	}
 
 
 	return 0;
 	return 0;
+free_channels:
+	for (i = 0; i < iproc_priv.spu.num_chan; i++) {
+		if (iproc_priv.mbox[i])
+			mbox_free_channel(iproc_priv.mbox[i]);
+	}
+
+	return err;
 }
 }
 
 
 static void spu_mb_release(struct platform_device *pdev)
 static void spu_mb_release(struct platform_device *pdev)
 {
 {
 	int i;
 	int i;
 
 
-	for (i = 0; i < iproc_priv.spu.num_spu; i++)
+	for (i = 0; i < iproc_priv.spu.num_chan; i++)
 		mbox_free_channel(iproc_priv.mbox[i]);
 		mbox_free_channel(iproc_priv.mbox[i]);
 }
 }
 
 
@@ -4567,7 +4578,7 @@ static void spu_counters_init(void)
 
 
 	atomic_set(&iproc_priv.session_count, 0);
 	atomic_set(&iproc_priv.session_count, 0);
 	atomic_set(&iproc_priv.stream_count, 0);
 	atomic_set(&iproc_priv.stream_count, 0);
-	atomic_set(&iproc_priv.next_chan, (int)iproc_priv.spu.num_spu);
+	atomic_set(&iproc_priv.next_chan, (int)iproc_priv.spu.num_chan);
 	atomic64_set(&iproc_priv.bytes_in, 0);
 	atomic64_set(&iproc_priv.bytes_in, 0);
 	atomic64_set(&iproc_priv.bytes_out, 0);
 	atomic64_set(&iproc_priv.bytes_out, 0);
 	for (i = 0; i < SPU_OP_NUM; i++) {
 	for (i = 0; i < SPU_OP_NUM; i++) {
@@ -4809,47 +4820,38 @@ static int spu_dt_read(struct platform_device *pdev)
 	struct resource *spu_ctrl_regs;
 	struct resource *spu_ctrl_regs;
 	const struct of_device_id *match;
 	const struct of_device_id *match;
 	const struct spu_type_subtype *matched_spu_type;
 	const struct spu_type_subtype *matched_spu_type;
-	void __iomem *spu_reg_vbase[MAX_SPUS];
-	int err;
+	struct device_node *dn = pdev->dev.of_node;
+	int err, i;
 
 
-	match = of_match_device(of_match_ptr(bcm_spu_dt_ids), dev);
-	matched_spu_type = match->data;
+	/* Count number of mailbox channels */
+	spu->num_chan = of_count_phandle_with_args(dn, "mboxes", "#mbox-cells");
 
 
-	if (iproc_priv.spu.num_spu > 1) {
-		/* If this is 2nd or later SPU, make sure it's same type */
-		if ((spu->spu_type != matched_spu_type->type) ||
-		    (spu->spu_subtype != matched_spu_type->subtype)) {
-			err = -EINVAL;
-			dev_err(&pdev->dev, "Multiple SPU types not allowed");
-			return err;
-		}
-	} else {
-		/* Record type of first SPU */
-		spu->spu_type = matched_spu_type->type;
-		spu->spu_subtype = matched_spu_type->subtype;
+	match = of_match_device(of_match_ptr(bcm_spu_dt_ids), dev);
+	if (!match) {
+		dev_err(&pdev->dev, "Failed to match device\n");
+		return -ENODEV;
 	}
 	}
 
 
-	/* Get and map SPU registers */
-	spu_ctrl_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!spu_ctrl_regs) {
-		err = -EINVAL;
-		dev_err(&pdev->dev, "Invalid/missing registers for SPU\n");
-		return err;
-	}
+	matched_spu_type = match->data;
 
 
-	spu_reg_vbase[iproc_priv.spu.num_spu] =
-				devm_ioremap_resource(dev, spu_ctrl_regs);
-	if (IS_ERR(spu_reg_vbase[iproc_priv.spu.num_spu])) {
-		err = PTR_ERR(spu_reg_vbase[iproc_priv.spu.num_spu]);
-		dev_err(&pdev->dev, "Failed to map registers: %d\n",
-			err);
-		spu_reg_vbase[iproc_priv.spu.num_spu] = NULL;
-		return err;
-	}
+	spu->spu_type = matched_spu_type->type;
+	spu->spu_subtype = matched_spu_type->subtype;
 
 
-	dev_dbg(dev, "SPU %d detected.", iproc_priv.spu.num_spu);
+	i = 0;
+	for (i = 0; (i < MAX_SPUS) && ((spu_ctrl_regs =
+		platform_get_resource(pdev, IORESOURCE_MEM, i)) != NULL); i++) {
 
 
-	spu->reg_vbase[iproc_priv.spu.num_spu] = spu_reg_vbase;
+		spu->reg_vbase[i] = devm_ioremap_resource(dev, spu_ctrl_regs);
+		if (IS_ERR(spu->reg_vbase[i])) {
+			err = PTR_ERR(spu->reg_vbase[i]);
+			dev_err(&pdev->dev, "Failed to map registers: %d\n",
+				err);
+			spu->reg_vbase[i] = NULL;
+			return err;
+		}
+	}
+	spu->num_spu = i;
+	dev_dbg(dev, "Device has %d SPUs", spu->num_spu);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -4860,8 +4862,8 @@ int bcm_spu_probe(struct platform_device *pdev)
 	struct spu_hw *spu = &iproc_priv.spu;
 	struct spu_hw *spu = &iproc_priv.spu;
 	int err = 0;
 	int err = 0;
 
 
-	iproc_priv.pdev[iproc_priv.spu.num_spu] = pdev;
-	platform_set_drvdata(iproc_priv.pdev[iproc_priv.spu.num_spu],
+	iproc_priv.pdev  = pdev;
+	platform_set_drvdata(iproc_priv.pdev,
 			     &iproc_priv);
 			     &iproc_priv);
 
 
 	err = spu_dt_read(pdev);
 	err = spu_dt_read(pdev);
@@ -4872,12 +4874,6 @@ int bcm_spu_probe(struct platform_device *pdev)
 	if (err < 0)
 	if (err < 0)
 		goto failure;
 		goto failure;
 
 
-	iproc_priv.spu.num_spu++;
-
-	/* If already initialized, we've just added another SPU and are done */
-	if (iproc_priv.inited)
-		return 0;
-
 	if (spu->spu_type == SPU_TYPE_SPUM)
 	if (spu->spu_type == SPU_TYPE_SPUM)
 		iproc_priv.bcm_hdr_len = 8;
 		iproc_priv.bcm_hdr_len = 8;
 	else if (spu->spu_type == SPU_TYPE_SPU2)
 	else if (spu->spu_type == SPU_TYPE_SPU2)
@@ -4893,8 +4889,6 @@ int bcm_spu_probe(struct platform_device *pdev)
 	if (err < 0)
 	if (err < 0)
 		goto fail_reg;
 		goto fail_reg;
 
 
-	iproc_priv.inited = true;
-
 	return 0;
 	return 0;
 
 
 fail_reg:
 fail_reg:

+ 7 - 6
drivers/crypto/bcm/cipher.h

@@ -427,10 +427,13 @@ struct spu_hw {
 
 
 	/* The number of SPUs on this platform */
 	/* The number of SPUs on this platform */
 	u32 num_spu;
 	u32 num_spu;
+
+	/* The number of SPU channels on this platform */
+	u32 num_chan;
 };
 };
 
 
 struct device_private {
 struct device_private {
-	struct platform_device *pdev[MAX_SPUS];
+	struct platform_device *pdev;
 
 
 	struct spu_hw spu;
 	struct spu_hw spu;
 
 
@@ -470,12 +473,10 @@ struct device_private {
 	/* Number of ICV check failures for AEAD messages */
 	/* Number of ICV check failures for AEAD messages */
 	atomic_t bad_icv;
 	atomic_t bad_icv;
 
 
-	struct mbox_client mcl[MAX_SPUS];
-	/* Array of mailbox channel pointers, one for each channel */
-	struct mbox_chan *mbox[MAX_SPUS];
+	struct mbox_client mcl;
 
 
-	/* Driver initialized */
-	bool inited;
+	/* Array of mailbox channel pointers, one for each channel */
+	struct mbox_chan **mbox;
 };
 };
 
 
 extern struct device_private iproc_priv;
 extern struct device_private iproc_priv;

+ 15 - 51
drivers/crypto/caam/caamalg.c

@@ -81,40 +81,6 @@
 #define debug(format, arg...)
 #define debug(format, arg...)
 #endif
 #endif
 
 
-#ifdef DEBUG
-#include <linux/highmem.h>
-
-static void dbg_dump_sg(const char *level, const char *prefix_str,
-			int prefix_type, int rowsize, int groupsize,
-			struct scatterlist *sg, size_t tlen, bool ascii)
-{
-	struct scatterlist *it;
-	void *it_page;
-	size_t len;
-	void *buf;
-
-	for (it = sg; it != NULL && tlen > 0 ; it = sg_next(sg)) {
-		/*
-		 * make sure the scatterlist's page
-		 * has a valid virtual memory mapping
-		 */
-		it_page = kmap_atomic(sg_page(it));
-		if (unlikely(!it_page)) {
-			printk(KERN_ERR "dbg_dump_sg: kmap failed\n");
-			return;
-		}
-
-		buf = it_page + it->offset;
-		len = min_t(size_t, tlen, it->length);
-		print_hex_dump(level, prefix_str, prefix_type, rowsize,
-			       groupsize, buf, len, ascii);
-		tlen -= len;
-
-		kunmap_atomic(it_page);
-	}
-}
-#endif
-
 static struct list_head alg_list;
 static struct list_head alg_list;
 
 
 struct caam_alg_entry {
 struct caam_alg_entry {
@@ -898,10 +864,10 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
 	print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
 	print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       edesc->src_nents > 1 ? 100 : ivsize, 1);
 		       edesc->src_nents > 1 ? 100 : ivsize, 1);
-	dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 #endif
 #endif
+	caam_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+		     edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 
 
 	ablkcipher_unmap(jrdev, edesc, req);
 	ablkcipher_unmap(jrdev, edesc, req);
 
 
@@ -937,10 +903,10 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 	print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
 	print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       ivsize, 1);
 		       ivsize, 1);
-	dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 #endif
 #endif
+	caam_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+		     edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 
 
 	ablkcipher_unmap(jrdev, edesc, req);
 	ablkcipher_unmap(jrdev, edesc, req);
 
 
@@ -1107,10 +1073,10 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
 		       ivsize, 1);
 		       ivsize, 1);
 	pr_err("asked=%d, nbytes%d\n",
 	pr_err("asked=%d, nbytes%d\n",
 	       (int)edesc->src_nents > 1 ? 100 : req->nbytes, req->nbytes);
 	       (int)edesc->src_nents > 1 ? 100 : req->nbytes, req->nbytes);
-	dbg_dump_sg(KERN_ERR, "src    @"__stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		    edesc->src_nents > 1 ? 100 : req->nbytes, 1);
 #endif
 #endif
+	caam_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+		     edesc->src_nents > 1 ? 100 : req->nbytes, 1);
 
 
 	len = desc_len(sh_desc);
 	len = desc_len(sh_desc);
 	init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
 	init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
@@ -1164,10 +1130,10 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
 	print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ",
 	print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       ivsize, 1);
 		       ivsize, 1);
-	dbg_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		    edesc->src_nents > 1 ? 100 : req->nbytes, 1);
 #endif
 #endif
+	caam_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+		     edesc->src_nents > 1 ? 100 : req->nbytes, 1);
 
 
 	len = desc_len(sh_desc);
 	len = desc_len(sh_desc);
 	init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
 	init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
@@ -1449,11 +1415,9 @@ static int aead_decrypt(struct aead_request *req)
 	u32 *desc;
 	u32 *desc;
 	int ret = 0;
 	int ret = 0;
 
 
-#ifdef DEBUG
-	dbg_dump_sg(KERN_ERR, "dec src@"__stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		    req->assoclen + req->cryptlen, 1);
-#endif
+	caam_dump_sg(KERN_ERR, "dec src@" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+		     req->assoclen + req->cryptlen, 1);
 
 
 	/* allocate extended descriptor */
 	/* allocate extended descriptor */
 	edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN,
 	edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN,

+ 2 - 3
drivers/crypto/caam/caamalg_desc.c

@@ -599,7 +599,7 @@ void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 
 
 	/* skip key loading if they are loaded due to sharing */
 	/* skip key loading if they are loaded due to sharing */
 	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
 	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD | JUMP_COND_SELF);
+				   JUMP_COND_SHRD);
 	if (cdata->key_inline)
 	if (cdata->key_inline)
 		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
 		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
 				  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
 				  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
@@ -688,8 +688,7 @@ void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
 
 
 	/* skip key loading if they are loaded due to sharing */
 	/* skip key loading if they are loaded due to sharing */
 	key_jump_cmd = append_jump(desc, JUMP_JSL |
 	key_jump_cmd = append_jump(desc, JUMP_JSL |
-				   JUMP_TEST_ALL | JUMP_COND_SHRD |
-				   JUMP_COND_SELF);
+				   JUMP_TEST_ALL | JUMP_COND_SHRD);
 	if (cdata->key_inline)
 	if (cdata->key_inline)
 		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
 		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
 				  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
 				  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);

+ 47 - 8
drivers/crypto/caam/caamalg_qi.c

@@ -12,7 +12,6 @@
 #include "intern.h"
 #include "intern.h"
 #include "desc_constr.h"
 #include "desc_constr.h"
 #include "error.h"
 #include "error.h"
-#include "sg_sw_sec4.h"
 #include "sg_sw_qm.h"
 #include "sg_sw_qm.h"
 #include "key_gen.h"
 #include "key_gen.h"
 #include "qi.h"
 #include "qi.h"
@@ -399,6 +398,7 @@ badkey:
  * @iv_dma: dma address of iv for checking continuity and link table
  * @iv_dma: dma address of iv for checking continuity and link table
  * @qm_sg_bytes: length of dma mapped h/w link table
  * @qm_sg_bytes: length of dma mapped h/w link table
  * @qm_sg_dma: bus physical mapped address of h/w link table
  * @qm_sg_dma: bus physical mapped address of h/w link table
+ * @assoclen: associated data length, in CAAM endianness
  * @assoclen_dma: bus physical mapped address of req->assoclen
  * @assoclen_dma: bus physical mapped address of req->assoclen
  * @drv_req: driver-specific request structure
  * @drv_req: driver-specific request structure
  * @sgt: the h/w link table
  * @sgt: the h/w link table
@@ -409,8 +409,12 @@ struct aead_edesc {
 	dma_addr_t iv_dma;
 	dma_addr_t iv_dma;
 	int qm_sg_bytes;
 	int qm_sg_bytes;
 	dma_addr_t qm_sg_dma;
 	dma_addr_t qm_sg_dma;
+	unsigned int assoclen;
 	dma_addr_t assoclen_dma;
 	dma_addr_t assoclen_dma;
 	struct caam_drv_req drv_req;
 	struct caam_drv_req drv_req;
+#define CAAM_QI_MAX_AEAD_SG						\
+	((CAAM_QI_MEMCACHE_SIZE - offsetof(struct aead_edesc, sgt)) /	\
+	 sizeof(struct qm_sg_entry))
 	struct qm_sg_entry sgt[0];
 	struct qm_sg_entry sgt[0];
 };
 };
 
 
@@ -431,6 +435,9 @@ struct ablkcipher_edesc {
 	int qm_sg_bytes;
 	int qm_sg_bytes;
 	dma_addr_t qm_sg_dma;
 	dma_addr_t qm_sg_dma;
 	struct caam_drv_req drv_req;
 	struct caam_drv_req drv_req;
+#define CAAM_QI_MAX_ABLKCIPHER_SG					    \
+	((CAAM_QI_MEMCACHE_SIZE - offsetof(struct ablkcipher_edesc, sgt)) / \
+	 sizeof(struct qm_sg_entry))
 	struct qm_sg_entry sgt[0];
 	struct qm_sg_entry sgt[0];
 };
 };
 
 
@@ -660,6 +667,14 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	 */
 	 */
 	qm_sg_ents = 1 + !!ivsize + mapped_src_nents +
 	qm_sg_ents = 1 + !!ivsize + mapped_src_nents +
 		     (mapped_dst_nents > 1 ? mapped_dst_nents : 0);
 		     (mapped_dst_nents > 1 ? mapped_dst_nents : 0);
+	if (unlikely(qm_sg_ents > CAAM_QI_MAX_AEAD_SG)) {
+		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+			qm_sg_ents, CAAM_QI_MAX_AEAD_SG);
+		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, op_type, 0, 0);
+		qi_cache_free(edesc);
+		return ERR_PTR(-ENOMEM);
+	}
 	sg_table = &edesc->sgt[0];
 	sg_table = &edesc->sgt[0];
 	qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
 	qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
 
 
@@ -670,7 +685,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	edesc->drv_req.cbk = aead_done;
 	edesc->drv_req.cbk = aead_done;
 	edesc->drv_req.drv_ctx = drv_ctx;
 	edesc->drv_req.drv_ctx = drv_ctx;
 
 
-	edesc->assoclen_dma = dma_map_single(qidev, &req->assoclen, 4,
+	edesc->assoclen = cpu_to_caam32(req->assoclen);
+	edesc->assoclen_dma = dma_map_single(qidev, &edesc->assoclen, 4,
 					     DMA_TO_DEVICE);
 					     DMA_TO_DEVICE);
 	if (dma_mapping_error(qidev, edesc->assoclen_dma)) {
 	if (dma_mapping_error(qidev, edesc->assoclen_dma)) {
 		dev_err(qidev, "unable to map assoclen\n");
 		dev_err(qidev, "unable to map assoclen\n");
@@ -776,9 +792,9 @@ static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *caam_ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct caam_ctx *caam_ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *qidev = caam_ctx->qidev;
 	struct device *qidev = caam_ctx->qidev;
-#ifdef DEBUG
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 
 
+#ifdef DEBUG
 	dev_err(qidev, "%s %d: status 0x%x\n", __func__, __LINE__, status);
 	dev_err(qidev, "%s %d: status 0x%x\n", __func__, __LINE__, status);
 #endif
 #endif
 
 
@@ -791,14 +807,21 @@ static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
 	print_hex_dump(KERN_ERR, "dstiv  @" __stringify(__LINE__)": ",
 	print_hex_dump(KERN_ERR, "dstiv  @" __stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       edesc->src_nents > 1 ? 100 : ivsize, 1);
 		       edesc->src_nents > 1 ? 100 : ivsize, 1);
-	dbg_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
+	caam_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+		     edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 #endif
 #endif
 
 
 	ablkcipher_unmap(qidev, edesc, req);
 	ablkcipher_unmap(qidev, edesc, req);
 	qi_cache_free(edesc);
 	qi_cache_free(edesc);
 
 
+	/*
+	 * The crypto API expects us to set the IV (req->info) to the last
+	 * ciphertext block. This is used e.g. by the CTS mode.
+	 */
+	scatterwalk_map_and_copy(req->info, req->dst, req->nbytes - ivsize,
+				 ivsize, 0);
+
 	ablkcipher_request_complete(req, status);
 	ablkcipher_request_complete(req, status);
 }
 }
 
 
@@ -880,6 +903,15 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	}
 	}
 	dst_sg_idx = qm_sg_ents;
 	dst_sg_idx = qm_sg_ents;
 
 
+	qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
+	if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
+		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+			qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
+		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, op_type, 0, 0);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	/* allocate space for base edesc and link tables */
 	/* allocate space for base edesc and link tables */
 	edesc = qi_cache_alloc(GFP_DMA | flags);
 	edesc = qi_cache_alloc(GFP_DMA | flags);
 	if (unlikely(!edesc)) {
 	if (unlikely(!edesc)) {
@@ -892,7 +924,6 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	edesc->src_nents = src_nents;
 	edesc->src_nents = src_nents;
 	edesc->dst_nents = dst_nents;
 	edesc->dst_nents = dst_nents;
 	edesc->iv_dma = iv_dma;
 	edesc->iv_dma = iv_dma;
-	qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
 	sg_table = &edesc->sgt[0];
 	sg_table = &edesc->sgt[0];
 	edesc->qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
 	edesc->qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
 	edesc->drv_req.app_ctx = req;
 	edesc->drv_req.app_ctx = req;
@@ -1026,6 +1057,14 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 		qm_sg_ents += 1 + mapped_dst_nents;
 		qm_sg_ents += 1 + mapped_dst_nents;
 	}
 	}
 
 
+	if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
+		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+			qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
+		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, GIVENCRYPT, 0, 0);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	/* allocate space for base edesc and link tables */
 	/* allocate space for base edesc and link tables */
 	edesc = qi_cache_alloc(GFP_DMA | flags);
 	edesc = qi_cache_alloc(GFP_DMA | flags);
 	if (!edesc) {
 	if (!edesc) {
@@ -1968,7 +2007,7 @@ static struct caam_aead_alg driver_aeads[] = {
 				.cra_name = "echainiv(authenc(hmac(sha256),"
 				.cra_name = "echainiv(authenc(hmac(sha256),"
 					    "cbc(des)))",
 					    "cbc(des)))",
 				.cra_driver_name = "echainiv-authenc-"
 				.cra_driver_name = "echainiv-authenc-"
-						   "hmac-sha256-cbc-desi-"
+						   "hmac-sha256-cbc-des-"
 						   "caam-qi",
 						   "caam-qi",
 				.cra_blocksize = DES_BLOCK_SIZE,
 				.cra_blocksize = DES_BLOCK_SIZE,
 			},
 			},

+ 3 - 4
drivers/crypto/caam/caamhash.c

@@ -791,8 +791,8 @@ static int ahash_update_ctx(struct ahash_request *req)
 							 to_hash - *buflen,
 							 to_hash - *buflen,
 							 *next_buflen, 0);
 							 *next_buflen, 0);
 		} else {
 		} else {
-			(edesc->sec4_sg + sec4_sg_src_index - 1)->len |=
-				cpu_to_caam32(SEC4_SG_LEN_FIN);
+			sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index -
+					    1);
 		}
 		}
 
 
 		desc = edesc->hw_desc;
 		desc = edesc->hw_desc;
@@ -882,8 +882,7 @@ static int ahash_final_ctx(struct ahash_request *req)
 	if (ret)
 	if (ret)
 		goto unmap_ctx;
 		goto unmap_ctx;
 
 
-	(edesc->sec4_sg + sec4_sg_src_index - 1)->len |=
-		cpu_to_caam32(SEC4_SG_LEN_FIN);
+	sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index - 1);
 
 
 	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 					    sec4_sg_bytes, DMA_TO_DEVICE);
 					    sec4_sg_bytes, DMA_TO_DEVICE);

+ 1 - 5
drivers/crypto/caam/caamrng.c

@@ -285,11 +285,7 @@ static int caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev)
 	if (err)
 	if (err)
 		return err;
 		return err;
 
 
-	err = caam_init_buf(ctx, 1);
-	if (err)
-		return err;
-
-	return 0;
+	return caam_init_buf(ctx, 1);
 }
 }
 
 
 static struct hwrng caam_rng = {
 static struct hwrng caam_rng = {

+ 59 - 68
drivers/crypto/caam/ctrl.c

@@ -17,6 +17,8 @@
 
 
 bool caam_little_end;
 bool caam_little_end;
 EXPORT_SYMBOL(caam_little_end);
 EXPORT_SYMBOL(caam_little_end);
+bool caam_dpaa2;
+EXPORT_SYMBOL(caam_dpaa2);
 
 
 #ifdef CONFIG_CAAM_QI
 #ifdef CONFIG_CAAM_QI
 #include "qi.h"
 #include "qi.h"
@@ -319,8 +321,11 @@ static int caam_remove(struct platform_device *pdev)
 		caam_qi_shutdown(ctrlpriv->qidev);
 		caam_qi_shutdown(ctrlpriv->qidev);
 #endif
 #endif
 
 
-	/* De-initialize RNG state handles initialized by this driver. */
-	if (ctrlpriv->rng4_sh_init)
+	/*
+	 * De-initialize RNG state handles initialized by this driver.
+	 * In case of DPAA 2.x, RNG is managed by MC firmware.
+	 */
+	if (!caam_dpaa2 && ctrlpriv->rng4_sh_init)
 		deinstantiate_rng(ctrldev, ctrlpriv->rng4_sh_init);
 		deinstantiate_rng(ctrldev, ctrlpriv->rng4_sh_init);
 
 
 	/* Shut down debug views */
 	/* Shut down debug views */
@@ -444,7 +449,6 @@ static int caam_probe(struct platform_device *pdev)
 
 
 	dev = &pdev->dev;
 	dev = &pdev->dev;
 	dev_set_drvdata(dev, ctrlpriv);
 	dev_set_drvdata(dev, ctrlpriv);
-	ctrlpriv->pdev = pdev;
 	nprop = pdev->dev.of_node;
 	nprop = pdev->dev.of_node;
 
 
 	/* Enable clocking */
 	/* Enable clocking */
@@ -553,12 +557,17 @@ static int caam_probe(struct platform_device *pdev)
 
 
 	/*
 	/*
 	 * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
 	 * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
-	 * long pointers in master configuration register
+	 * long pointers in master configuration register.
+	 * In case of DPAA 2.x, Management Complex firmware performs
+	 * the configuration.
 	 */
 	 */
-	clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_LONG_PTR,
-		      MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF |
-		      MCFGR_WDENABLE | MCFGR_LARGE_BURST |
-		      (sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0));
+	caam_dpaa2 = !!(comp_params & CTPR_MS_DPAA2);
+	if (!caam_dpaa2)
+		clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_LONG_PTR,
+			      MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF |
+			      MCFGR_WDENABLE | MCFGR_LARGE_BURST |
+			      (sizeof(dma_addr_t) == sizeof(u64) ?
+			       MCFGR_LONG_PTR : 0));
 
 
 	/*
 	/*
 	 *  Read the Compile Time paramters and SCFGR to determine
 	 *  Read the Compile Time paramters and SCFGR to determine
@@ -587,7 +596,9 @@ static int caam_probe(struct platform_device *pdev)
 			      JRSTART_JR3_START);
 			      JRSTART_JR3_START);
 
 
 	if (sizeof(dma_addr_t) == sizeof(u64)) {
 	if (sizeof(dma_addr_t) == sizeof(u64)) {
-		if (of_device_is_compatible(nprop, "fsl,sec-v5.0"))
+		if (caam_dpaa2)
+			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(49));
+		else if (of_device_is_compatible(nprop, "fsl,sec-v5.0"))
 			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
 			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
 		else
 		else
 			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36));
 			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36));
@@ -630,11 +641,9 @@ static int caam_probe(struct platform_device *pdev)
 			ring++;
 			ring++;
 		}
 		}
 
 
-	/* Check to see if QI present. If so, enable */
-	ctrlpriv->qi_present =
-			!!(rd_reg32(&ctrl->perfmon.comp_parms_ms) &
-			   CTPR_MS_QI_MASK);
-	if (ctrlpriv->qi_present) {
+	/* Check to see if (DPAA 1.x) QI present. If so, enable */
+	ctrlpriv->qi_present = !!(comp_params & CTPR_MS_QI_MASK);
+	if (ctrlpriv->qi_present && !caam_dpaa2) {
 		ctrlpriv->qi = (struct caam_queue_if __iomem __force *)
 		ctrlpriv->qi = (struct caam_queue_if __iomem __force *)
 			       ((__force uint8_t *)ctrl +
 			       ((__force uint8_t *)ctrl +
 				 BLOCK_OFFSET * QI_BLOCK_NUMBER
 				 BLOCK_OFFSET * QI_BLOCK_NUMBER
@@ -662,8 +671,10 @@ static int caam_probe(struct platform_device *pdev)
 	/*
 	/*
 	 * If SEC has RNG version >= 4 and RNG state handle has not been
 	 * If SEC has RNG version >= 4 and RNG state handle has not been
 	 * already instantiated, do RNG instantiation
 	 * already instantiated, do RNG instantiation
+	 * In case of DPAA 2.x, RNG is managed by MC firmware.
 	 */
 	 */
-	if ((cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) {
+	if (!caam_dpaa2 &&
+	    (cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) {
 		ctrlpriv->rng4_sh_init =
 		ctrlpriv->rng4_sh_init =
 			rd_reg32(&ctrl->r4tst[0].rdsta);
 			rd_reg32(&ctrl->r4tst[0].rdsta);
 		/*
 		/*
@@ -731,63 +742,43 @@ static int caam_probe(struct platform_device *pdev)
 	/* Report "alive" for developer to see */
 	/* Report "alive" for developer to see */
 	dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id,
 	dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id,
 		 caam_get_era());
 		 caam_get_era());
-	dev_info(dev, "job rings = %d, qi = %d\n",
-		 ctrlpriv->total_jobrs, ctrlpriv->qi_present);
+	dev_info(dev, "job rings = %d, qi = %d, dpaa2 = %s\n",
+		 ctrlpriv->total_jobrs, ctrlpriv->qi_present,
+		 caam_dpaa2 ? "yes" : "no");
 
 
 #ifdef CONFIG_DEBUG_FS
 #ifdef CONFIG_DEBUG_FS
-
-	ctrlpriv->ctl_rq_dequeued =
-		debugfs_create_file("rq_dequeued",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->req_dequeued,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ob_enc_req =
-		debugfs_create_file("ob_rq_encrypted",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ob_enc_req,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ib_dec_req =
-		debugfs_create_file("ib_rq_decrypted",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ib_dec_req,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ob_enc_bytes =
-		debugfs_create_file("ob_bytes_encrypted",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ob_enc_bytes,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ob_prot_bytes =
-		debugfs_create_file("ob_bytes_protected",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ob_prot_bytes,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ib_dec_bytes =
-		debugfs_create_file("ib_bytes_decrypted",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ib_dec_bytes,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ib_valid_bytes =
-		debugfs_create_file("ib_bytes_validated",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ib_valid_bytes,
-				    &caam_fops_u64_ro);
+	debugfs_create_file("rq_dequeued", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->req_dequeued,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ob_rq_encrypted", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ob_enc_req,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ib_rq_decrypted", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ib_dec_req,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ob_bytes_encrypted", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ob_enc_bytes,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ob_bytes_protected", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ob_prot_bytes,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ib_bytes_decrypted", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ib_dec_bytes,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ib_bytes_validated", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ib_valid_bytes,
+			    &caam_fops_u64_ro);
 
 
 	/* Controller level - global status values */
 	/* Controller level - global status values */
-	ctrlpriv->ctl_faultaddr =
-		debugfs_create_file("fault_addr",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->faultaddr,
-				    &caam_fops_u32_ro);
-	ctrlpriv->ctl_faultdetail =
-		debugfs_create_file("fault_detail",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->faultdetail,
-				    &caam_fops_u32_ro);
-	ctrlpriv->ctl_faultstatus =
-		debugfs_create_file("fault_status",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->status,
-				    &caam_fops_u32_ro);
+	debugfs_create_file("fault_addr", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->faultaddr,
+			    &caam_fops_u32_ro);
+	debugfs_create_file("fault_detail", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->faultdetail,
+			    &caam_fops_u32_ro);
+	debugfs_create_file("fault_status", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->status,
+			    &caam_fops_u32_ro);
 
 
 	/* Internal covering keys (useful in non-secure mode only) */
 	/* Internal covering keys (useful in non-secure mode only) */
 	ctrlpriv->ctl_kek_wrap.data = (__force void *)&ctrlpriv->ctrl->kek[0];
 	ctrlpriv->ctl_kek_wrap.data = (__force void *)&ctrlpriv->ctrl->kek[0];

+ 2 - 0
drivers/crypto/caam/ctrl.h

@@ -10,4 +10,6 @@
 /* Prototypes for backend-level services exposed to APIs */
 /* Prototypes for backend-level services exposed to APIs */
 int caam_get_era(void);
 int caam_get_era(void);
 
 
+extern bool caam_dpaa2;
+
 #endif /* CTRL_H */
 #endif /* CTRL_H */

+ 40 - 0
drivers/crypto/caam/error.c

@@ -9,6 +9,46 @@
 #include "desc.h"
 #include "desc.h"
 #include "error.h"
 #include "error.h"
 
 
+#ifdef DEBUG
+#include <linux/highmem.h>
+
+void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type,
+		  int rowsize, int groupsize, struct scatterlist *sg,
+		  size_t tlen, bool ascii)
+{
+	struct scatterlist *it;
+	void *it_page;
+	size_t len;
+	void *buf;
+
+	for (it = sg; it && tlen > 0 ; it = sg_next(sg)) {
+		/*
+		 * make sure the scatterlist's page
+		 * has a valid virtual memory mapping
+		 */
+		it_page = kmap_atomic(sg_page(it));
+		if (unlikely(!it_page)) {
+			pr_err("caam_dump_sg: kmap failed\n");
+			return;
+		}
+
+		buf = it_page + it->offset;
+		len = min_t(size_t, tlen, it->length);
+		print_hex_dump(level, prefix_str, prefix_type, rowsize,
+			       groupsize, buf, len, ascii);
+		tlen -= len;
+
+		kunmap_atomic(it_page);
+	}
+}
+#else
+void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type,
+		  int rowsize, int groupsize, struct scatterlist *sg,
+		  size_t tlen, bool ascii)
+{}
+#endif /* DEBUG */
+EXPORT_SYMBOL(caam_dump_sg);
+
 static const struct {
 static const struct {
 	u8 value;
 	u8 value;
 	const char *error_text;
 	const char *error_text;

+ 4 - 0
drivers/crypto/caam/error.h

@@ -8,4 +8,8 @@
 #define CAAM_ERROR_H
 #define CAAM_ERROR_H
 #define CAAM_ERROR_STR_MAX 302
 #define CAAM_ERROR_STR_MAX 302
 void caam_jr_strstatus(struct device *jrdev, u32 status);
 void caam_jr_strstatus(struct device *jrdev, u32 status);
+
+void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type,
+		  int rowsize, int groupsize, struct scatterlist *sg,
+		  size_t tlen, bool ascii);
 #endif /* CAAM_ERROR_H */
 #endif /* CAAM_ERROR_H */

+ 0 - 11
drivers/crypto/caam/intern.h

@@ -64,12 +64,9 @@ struct caam_drv_private_jr {
  * Driver-private storage for a single CAAM block instance
  * Driver-private storage for a single CAAM block instance
  */
  */
 struct caam_drv_private {
 struct caam_drv_private {
-
-	struct device *dev;
 #ifdef CONFIG_CAAM_QI
 #ifdef CONFIG_CAAM_QI
 	struct device *qidev;
 	struct device *qidev;
 #endif
 #endif
-	struct platform_device *pdev;
 
 
 	/* Physical-presence section */
 	/* Physical-presence section */
 	struct caam_ctrl __iomem *ctrl; /* controller region */
 	struct caam_ctrl __iomem *ctrl; /* controller region */
@@ -105,16 +102,8 @@ struct caam_drv_private {
 #ifdef CONFIG_DEBUG_FS
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dfs_root;
 	struct dentry *dfs_root;
 	struct dentry *ctl; /* controller dir */
 	struct dentry *ctl; /* controller dir */
-	struct dentry *ctl_rq_dequeued, *ctl_ob_enc_req, *ctl_ib_dec_req;
-	struct dentry *ctl_ob_enc_bytes, *ctl_ob_prot_bytes;
-	struct dentry *ctl_ib_dec_bytes, *ctl_ib_valid_bytes;
-	struct dentry *ctl_faultaddr, *ctl_faultdetail, *ctl_faultstatus;
-
 	struct debugfs_blob_wrapper ctl_kek_wrap, ctl_tkek_wrap, ctl_tdsk_wrap;
 	struct debugfs_blob_wrapper ctl_kek_wrap, ctl_tkek_wrap, ctl_tdsk_wrap;
 	struct dentry *ctl_kek, *ctl_tkek, *ctl_tdsk;
 	struct dentry *ctl_kek, *ctl_tkek, *ctl_tdsk;
-#ifdef CONFIG_CAAM_QI
-	struct dentry *qi_congested;
-#endif
 #endif
 #endif
 };
 };
 
 

+ 6 - 1
drivers/crypto/caam/jr.c

@@ -9,6 +9,7 @@
 #include <linux/of_address.h>
 #include <linux/of_address.h>
 
 
 #include "compat.h"
 #include "compat.h"
+#include "ctrl.h"
 #include "regs.h"
 #include "regs.h"
 #include "jr.h"
 #include "jr.h"
 #include "desc.h"
 #include "desc.h"
@@ -499,7 +500,11 @@ static int caam_jr_probe(struct platform_device *pdev)
 	jrpriv->rregs = (struct caam_job_ring __iomem __force *)ctrl;
 	jrpriv->rregs = (struct caam_job_ring __iomem __force *)ctrl;
 
 
 	if (sizeof(dma_addr_t) == sizeof(u64)) {
 	if (sizeof(dma_addr_t) == sizeof(u64)) {
-		if (of_device_is_compatible(nprop, "fsl,sec-v5.0-job-ring"))
+		if (caam_dpaa2)
+			error = dma_set_mask_and_coherent(jrdev,
+							  DMA_BIT_MASK(49));
+		else if (of_device_is_compatible(nprop,
+						 "fsl,sec-v5.0-job-ring"))
 			error = dma_set_mask_and_coherent(jrdev,
 			error = dma_set_mask_and_coherent(jrdev,
 							  DMA_BIT_MASK(40));
 							  DMA_BIT_MASK(40));
 		else
 		else

+ 12 - 18
drivers/crypto/caam/qi.c

@@ -24,9 +24,6 @@
  */
  */
 #define MAX_RSP_FQ_BACKLOG_PER_CPU	256
 #define MAX_RSP_FQ_BACKLOG_PER_CPU	256
 
 
-/* Length of a single buffer in the QI driver memory cache */
-#define CAAM_QI_MEMCACHE_SIZE	512
-
 #define CAAM_QI_ENQUEUE_RETRIES	10000
 #define CAAM_QI_ENQUEUE_RETRIES	10000
 
 
 #define CAAM_NAPI_WEIGHT	63
 #define CAAM_NAPI_WEIGHT	63
@@ -55,6 +52,7 @@ struct caam_qi_pcpu_priv {
 } ____cacheline_aligned;
 } ____cacheline_aligned;
 
 
 static DEFINE_PER_CPU(struct caam_qi_pcpu_priv, pcpu_qipriv);
 static DEFINE_PER_CPU(struct caam_qi_pcpu_priv, pcpu_qipriv);
+static DEFINE_PER_CPU(int, last_cpu);
 
 
 /*
 /*
  * caam_qi_priv - CAAM QI backend private params
  * caam_qi_priv - CAAM QI backend private params
@@ -203,8 +201,8 @@ static struct qman_fq *create_caam_req_fq(struct device *qidev,
 		goto init_req_fq_fail;
 		goto init_req_fq_fail;
 	}
 	}
 
 
-	dev_info(qidev, "Allocated request FQ %u for CPU %u\n", req_fq->fqid,
-		 smp_processor_id());
+	dev_dbg(qidev, "Allocated request FQ %u for CPU %u\n", req_fq->fqid,
+		smp_processor_id());
 	return req_fq;
 	return req_fq;
 
 
 init_req_fq_fail:
 init_req_fq_fail:
@@ -277,6 +275,7 @@ empty_fq:
 		dev_err(qidev, "OOS of FQID: %u failed\n", fq->fqid);
 		dev_err(qidev, "OOS of FQID: %u failed\n", fq->fqid);
 
 
 	qman_destroy_fq(fq);
 	qman_destroy_fq(fq);
+	kfree(fq);
 
 
 	return ret;
 	return ret;
 }
 }
@@ -342,8 +341,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc)
 		drv_ctx->req_fq = old_fq;
 		drv_ctx->req_fq = old_fq;
 
 
 		if (kill_fq(qidev, new_fq))
 		if (kill_fq(qidev, new_fq))
-			dev_warn(qidev, "New CAAM FQ: %u kill failed\n",
-				 new_fq->fqid);
+			dev_warn(qidev, "New CAAM FQ kill failed\n");
 
 
 		return ret;
 		return ret;
 	}
 	}
@@ -373,10 +371,9 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc)
 		drv_ctx->req_fq = old_fq;
 		drv_ctx->req_fq = old_fq;
 
 
 		if (kill_fq(qidev, new_fq))
 		if (kill_fq(qidev, new_fq))
-			dev_warn(qidev, "New CAAM FQ: %u kill failed\n",
-				 new_fq->fqid);
+			dev_warn(qidev, "New CAAM FQ kill failed\n");
 	} else if (kill_fq(qidev, old_fq)) {
 	} else if (kill_fq(qidev, old_fq)) {
-		dev_warn(qidev, "Old CAAM FQ: %u kill failed\n", old_fq->fqid);
+		dev_warn(qidev, "Old CAAM FQ kill failed\n");
 	}
 	}
 
 
 	return 0;
 	return 0;
@@ -392,7 +389,6 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev,
 	dma_addr_t hwdesc;
 	dma_addr_t hwdesc;
 	struct caam_drv_ctx *drv_ctx;
 	struct caam_drv_ctx *drv_ctx;
 	const cpumask_t *cpus = qman_affine_cpus();
 	const cpumask_t *cpus = qman_affine_cpus();
-	static DEFINE_PER_CPU(int, last_cpu);
 
 
 	num_words = desc_len(sh_desc);
 	num_words = desc_len(sh_desc);
 	if (num_words > MAX_SDLEN) {
 	if (num_words > MAX_SDLEN) {
@@ -511,7 +507,6 @@ int caam_qi_shutdown(struct device *qidev)
 
 
 		if (kill_fq(qidev, per_cpu(pcpu_qipriv.rsp_fq, i)))
 		if (kill_fq(qidev, per_cpu(pcpu_qipriv.rsp_fq, i)))
 			dev_err(qidev, "Rsp FQ kill failed, cpu: %d\n", i);
 			dev_err(qidev, "Rsp FQ kill failed, cpu: %d\n", i);
-		kfree(per_cpu(pcpu_qipriv.rsp_fq, i));
 	}
 	}
 
 
 	/*
 	/*
@@ -646,7 +641,7 @@ static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu)
 
 
 	per_cpu(pcpu_qipriv.rsp_fq, cpu) = fq;
 	per_cpu(pcpu_qipriv.rsp_fq, cpu) = fq;
 
 
-	dev_info(qidev, "Allocated response FQ %u for CPU %u", fq->fqid, cpu);
+	dev_dbg(qidev, "Allocated response FQ %u for CPU %u", fq->fqid, cpu);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -679,7 +674,7 @@ static int init_cgr(struct device *qidev)
 		return ret;
 		return ret;
 	}
 	}
 
 
-	dev_info(qidev, "Congestion threshold set to %llu\n", val);
+	dev_dbg(qidev, "Congestion threshold set to %llu\n", val);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -737,6 +732,7 @@ int caam_qi_init(struct platform_device *caam_pdev)
 	qi_pdev = platform_device_register_full(&qi_pdev_info);
 	qi_pdev = platform_device_register_full(&qi_pdev_info);
 	if (IS_ERR(qi_pdev))
 	if (IS_ERR(qi_pdev))
 		return PTR_ERR(qi_pdev);
 		return PTR_ERR(qi_pdev);
+	set_dma_ops(&qi_pdev->dev, get_dma_ops(ctrldev));
 
 
 	ctrlpriv = dev_get_drvdata(ctrldev);
 	ctrlpriv = dev_get_drvdata(ctrldev);
 	qidev = &qi_pdev->dev;
 	qidev = &qi_pdev->dev;
@@ -795,10 +791,8 @@ int caam_qi_init(struct platform_device *caam_pdev)
 	/* Done with the CGRs; restore the cpus allowed mask */
 	/* Done with the CGRs; restore the cpus allowed mask */
 	set_cpus_allowed_ptr(current, &old_cpumask);
 	set_cpus_allowed_ptr(current, &old_cpumask);
 #ifdef CONFIG_DEBUG_FS
 #ifdef CONFIG_DEBUG_FS
-	ctrlpriv->qi_congested = debugfs_create_file("qi_congested", 0444,
-						     ctrlpriv->ctl,
-						     &times_congested,
-						     &caam_fops_u64_ro);
+	debugfs_create_file("qi_congested", 0444, ctrlpriv->ctl,
+			    &times_congested, &caam_fops_u64_ro);
 #endif
 #endif
 	dev_info(qidev, "Linux CAAM Queue I/F driver initialised\n");
 	dev_info(qidev, "Linux CAAM Queue I/F driver initialised\n");
 	return 0;
 	return 0;

+ 3 - 0
drivers/crypto/caam/qi.h

@@ -39,6 +39,9 @@
  */
  */
 #define MAX_SDLEN	((CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN) / CAAM_CMD_SZ)
 #define MAX_SDLEN	((CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN) / CAAM_CMD_SZ)
 
 
+/* Length of a single buffer in the QI driver memory cache */
+#define CAAM_QI_MEMCACHE_SIZE	768
+
 extern bool caam_congested __read_mostly;
 extern bool caam_congested __read_mostly;
 
 
 /*
 /*

+ 1 - 0
drivers/crypto/caam/regs.h

@@ -293,6 +293,7 @@ struct caam_perfmon {
 	u32 cha_rev_ls;		/* CRNR - CHA Rev No. Least significant half*/
 	u32 cha_rev_ls;		/* CRNR - CHA Rev No. Least significant half*/
 #define CTPR_MS_QI_SHIFT	25
 #define CTPR_MS_QI_SHIFT	25
 #define CTPR_MS_QI_MASK		(0x1ull << CTPR_MS_QI_SHIFT)
 #define CTPR_MS_QI_MASK		(0x1ull << CTPR_MS_QI_SHIFT)
+#define CTPR_MS_DPAA2		BIT(13)
 #define CTPR_MS_VIRT_EN_INCL	0x00000001
 #define CTPR_MS_VIRT_EN_INCL	0x00000001
 #define CTPR_MS_VIRT_EN_POR	0x00000002
 #define CTPR_MS_VIRT_EN_POR	0x00000002
 #define CTPR_MS_PG_SZ_MASK	0x10
 #define CTPR_MS_PG_SZ_MASK	0x10

+ 81 - 0
drivers/crypto/caam/sg_sw_qm2.h

@@ -0,0 +1,81 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the names of the above-listed copyright holders nor the
+ *	 names of any contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SG_SW_QM2_H_
+#define _SG_SW_QM2_H_
+
+#include "../../../drivers/staging/fsl-mc/include/dpaa2-fd.h"
+
+static inline void dma_to_qm_sg_one(struct dpaa2_sg_entry *qm_sg_ptr,
+				    dma_addr_t dma, u32 len, u16 offset)
+{
+	dpaa2_sg_set_addr(qm_sg_ptr, dma);
+	dpaa2_sg_set_format(qm_sg_ptr, dpaa2_sg_single);
+	dpaa2_sg_set_final(qm_sg_ptr, false);
+	dpaa2_sg_set_len(qm_sg_ptr, len);
+	dpaa2_sg_set_bpid(qm_sg_ptr, 0);
+	dpaa2_sg_set_offset(qm_sg_ptr, offset);
+}
+
+/*
+ * convert scatterlist to h/w link table format
+ * but does not have final bit; instead, returns last entry
+ */
+static inline struct dpaa2_sg_entry *
+sg_to_qm_sg(struct scatterlist *sg, int sg_count,
+	    struct dpaa2_sg_entry *qm_sg_ptr, u16 offset)
+{
+	while (sg_count && sg) {
+		dma_to_qm_sg_one(qm_sg_ptr, sg_dma_address(sg),
+				 sg_dma_len(sg), offset);
+		qm_sg_ptr++;
+		sg = sg_next(sg);
+		sg_count--;
+	}
+	return qm_sg_ptr - 1;
+}
+
+/*
+ * convert scatterlist to h/w link table format
+ * scatterlist must have been previously dma mapped
+ */
+static inline void sg_to_qm_sg_last(struct scatterlist *sg, int sg_count,
+				    struct dpaa2_sg_entry *qm_sg_ptr,
+				    u16 offset)
+{
+	qm_sg_ptr = sg_to_qm_sg(sg, sg_count, qm_sg_ptr, offset);
+	dpaa2_sg_set_final(qm_sg_ptr, true);
+}
+
+#endif /* _SG_SW_QM2_H_ */

+ 25 - 18
drivers/crypto/caam/sg_sw_sec4.h

@@ -5,7 +5,13 @@
  *
  *
  */
  */
 
 
+#ifndef _SG_SW_SEC4_H_
+#define _SG_SW_SEC4_H_
+
+#include "ctrl.h"
 #include "regs.h"
 #include "regs.h"
+#include "sg_sw_qm2.h"
+#include "../../../drivers/staging/fsl-mc/include/dpaa2-fd.h"
 
 
 struct sec4_sg_entry {
 struct sec4_sg_entry {
 	u64 ptr;
 	u64 ptr;
@@ -19,9 +25,15 @@ struct sec4_sg_entry {
 static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
 static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
 				      dma_addr_t dma, u32 len, u16 offset)
 				      dma_addr_t dma, u32 len, u16 offset)
 {
 {
-	sec4_sg_ptr->ptr = cpu_to_caam_dma64(dma);
-	sec4_sg_ptr->len = cpu_to_caam32(len);
-	sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK);
+	if (caam_dpaa2) {
+		dma_to_qm_sg_one((struct dpaa2_sg_entry *)sec4_sg_ptr, dma, len,
+				 offset);
+	} else {
+		sec4_sg_ptr->ptr = cpu_to_caam_dma64(dma);
+		sec4_sg_ptr->len = cpu_to_caam32(len);
+		sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset &
+							 SEC4_SG_OFFSET_MASK);
+	}
 #ifdef DEBUG
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ",
 	print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr,
 		       DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr,
@@ -47,6 +59,14 @@ sg_to_sec4_sg(struct scatterlist *sg, int sg_count,
 	return sec4_sg_ptr - 1;
 	return sec4_sg_ptr - 1;
 }
 }
 
 
+static inline void sg_to_sec4_set_last(struct sec4_sg_entry *sec4_sg_ptr)
+{
+	if (caam_dpaa2)
+		dpaa2_sg_set_final((struct dpaa2_sg_entry *)sec4_sg_ptr, true);
+	else
+		sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+}
+
 /*
 /*
  * convert scatterlist to h/w link table format
  * convert scatterlist to h/w link table format
  * scatterlist must have been previously dma mapped
  * scatterlist must have been previously dma mapped
@@ -56,20 +76,7 @@ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count,
 				      u16 offset)
 				      u16 offset)
 {
 {
 	sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset);
 	sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset);
-	sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+	sg_to_sec4_set_last(sec4_sg_ptr);
 }
 }
 
 
-static inline struct sec4_sg_entry *sg_to_sec4_sg_len(
-	struct scatterlist *sg, unsigned int total,
-	struct sec4_sg_entry *sec4_sg_ptr)
-{
-	do {
-		unsigned int len = min(sg_dma_len(sg), total);
-
-		dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg), len, 0);
-		sec4_sg_ptr++;
-		sg = sg_next(sg);
-		total -= len;
-	} while (total);
-	return sec4_sg_ptr - 1;
-}
+#endif /* _SG_SW_SEC4_H_ */

+ 9 - 4
drivers/crypto/cavium/cpt/cptpf_main.c

@@ -268,8 +268,10 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae)
 	mcode = &cpt->mcode[cpt->next_mc_idx];
 	mcode = &cpt->mcode[cpt->next_mc_idx];
 	memcpy(mcode->version, (u8 *)fw_entry->data, CPT_UCODE_VERSION_SZ);
 	memcpy(mcode->version, (u8 *)fw_entry->data, CPT_UCODE_VERSION_SZ);
 	mcode->code_size = ntohl(ucode->code_length) * 2;
 	mcode->code_size = ntohl(ucode->code_length) * 2;
-	if (!mcode->code_size)
-		return -EINVAL;
+	if (!mcode->code_size) {
+		ret = -EINVAL;
+		goto fw_release;
+	}
 
 
 	mcode->is_ae = is_ae;
 	mcode->is_ae = is_ae;
 	mcode->core_mask = 0ULL;
 	mcode->core_mask = 0ULL;
@@ -280,7 +282,8 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae)
 					  &mcode->phys_base, GFP_KERNEL);
 					  &mcode->phys_base, GFP_KERNEL);
 	if (!mcode->code) {
 	if (!mcode->code) {
 		dev_err(dev, "Unable to allocate space for microcode");
 		dev_err(dev, "Unable to allocate space for microcode");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto fw_release;
 	}
 	}
 
 
 	memcpy((void *)mcode->code, (void *)(fw_entry->data + sizeof(*ucode)),
 	memcpy((void *)mcode->code, (void *)(fw_entry->data + sizeof(*ucode)),
@@ -302,12 +305,14 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae)
 	ret = do_cpt_init(cpt, mcode);
 	ret = do_cpt_init(cpt, mcode);
 	if (ret) {
 	if (ret) {
 		dev_err(dev, "do_cpt_init failed with ret: %d\n", ret);
 		dev_err(dev, "do_cpt_init failed with ret: %d\n", ret);
-		return ret;
+		goto fw_release;
 	}
 	}
 
 
 	dev_info(dev, "Microcode Loaded %s\n", mcode->version);
 	dev_info(dev, "Microcode Loaded %s\n", mcode->version);
 	mcode->is_mc_valid = 1;
 	mcode->is_mc_valid = 1;
 	cpt->next_mc_idx++;
 	cpt->next_mc_idx++;
+
+fw_release:
 	release_firmware(fw_entry);
 	release_firmware(fw_entry);
 
 
 	return ret;
 	return ret;

+ 3 - 1
drivers/crypto/cavium/nitrox/nitrox_main.c

@@ -513,8 +513,10 @@ static int nitrox_probe(struct pci_dev *pdev,
 	pci_set_master(pdev);
 	pci_set_master(pdev);
 
 
 	ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
 	ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
-	if (!ndev)
+	if (!ndev) {
+		err = -ENOMEM;
 		goto ndev_fail;
 		goto ndev_fail;
+	}
 
 
 	pci_set_drvdata(pdev, ndev);
 	pci_set_drvdata(pdev, ndev);
 	ndev->pdev = pdev;
 	ndev->pdev = pdev;

+ 15 - 7
drivers/crypto/ccp/Kconfig

@@ -1,25 +1,33 @@
 config CRYPTO_DEV_CCP_DD
 config CRYPTO_DEV_CCP_DD
-	tristate "Cryptographic Coprocessor device driver"
-	depends on CRYPTO_DEV_CCP
+	tristate "Secure Processor device driver"
 	default m
 	default m
+	help
+	  Provides AMD Secure Processor device driver.
+	  If you choose 'M' here, this module will be called ccp.
+
+config CRYPTO_DEV_SP_CCP
+	bool "Cryptographic Coprocessor device"
+	default y
+	depends on CRYPTO_DEV_CCP_DD
 	select HW_RANDOM
 	select HW_RANDOM
 	select DMA_ENGINE
 	select DMA_ENGINE
 	select DMADEVICES
 	select DMADEVICES
 	select CRYPTO_SHA1
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 	select CRYPTO_SHA256
 	help
 	help
-	  Provides the interface to use the AMD Cryptographic Coprocessor
-	  which can be used to offload encryption operations such as SHA,
-	  AES and more. If you choose 'M' here, this module will be called
-	  ccp.
+	  Provides the support for AMD Cryptographic Coprocessor (CCP) device
+	  which can be used to offload encryption operations such as SHA, AES
+	  and more.
 
 
 config CRYPTO_DEV_CCP_CRYPTO
 config CRYPTO_DEV_CCP_CRYPTO
 	tristate "Encryption and hashing offload support"
 	tristate "Encryption and hashing offload support"
-	depends on CRYPTO_DEV_CCP_DD
 	default m
 	default m
+	depends on CRYPTO_DEV_CCP_DD
+	depends on CRYPTO_DEV_SP_CCP
 	select CRYPTO_HASH
 	select CRYPTO_HASH
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AUTHENC
 	select CRYPTO_AUTHENC
+	select CRYPTO_RSA
 	help
 	help
 	  Support for using the cryptographic API with the AMD Cryptographic
 	  Support for using the cryptographic API with the AMD Cryptographic
 	  Coprocessor. This module supports offload of SHA and AES algorithms.
 	  Coprocessor. This module supports offload of SHA and AES algorithms.

+ 4 - 3
drivers/crypto/ccp/Makefile

@@ -1,12 +1,12 @@
 obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
 obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
-ccp-objs := ccp-dev.o \
+ccp-objs  := sp-dev.o sp-platform.o
+ccp-$(CONFIG_CRYPTO_DEV_SP_CCP) += ccp-dev.o \
 	    ccp-ops.o \
 	    ccp-ops.o \
 	    ccp-dev-v3.o \
 	    ccp-dev-v3.o \
 	    ccp-dev-v5.o \
 	    ccp-dev-v5.o \
-	    ccp-platform.o \
 	    ccp-dmaengine.o \
 	    ccp-dmaengine.o \
 	    ccp-debugfs.o
 	    ccp-debugfs.o
-ccp-$(CONFIG_PCI) += ccp-pci.o
+ccp-$(CONFIG_PCI) += sp-pci.o
 
 
 obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
 obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
 ccp-crypto-objs := ccp-crypto-main.o \
 ccp-crypto-objs := ccp-crypto-main.o \
@@ -15,4 +15,5 @@ ccp-crypto-objs := ccp-crypto-main.o \
 		   ccp-crypto-aes-xts.o \
 		   ccp-crypto-aes-xts.o \
 		   ccp-crypto-aes-galois.o \
 		   ccp-crypto-aes-galois.o \
 		   ccp-crypto-des3.o \
 		   ccp-crypto-des3.o \
+		   ccp-crypto-rsa.o \
 		   ccp-crypto-sha.o
 		   ccp-crypto-sha.o

+ 1 - 1
drivers/crypto/ccp/ccp-crypto-aes-galois.c

@@ -1,7 +1,7 @@
 /*
 /*
  * AMD Cryptographic Coprocessor (CCP) AES GCM crypto API support
  * AMD Cryptographic Coprocessor (CCP) AES GCM crypto API support
  *
  *
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
  *
  *
  * Author: Gary R Hook <gary.hook@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
  *
  *

+ 53 - 43
drivers/crypto/ccp/ccp-crypto-aes-xts.c

@@ -1,8 +1,9 @@
 /*
 /*
  * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support
  * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support
  *
  *
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  *
+ * Author: Gary R Hook <gary.hook@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
@@ -15,6 +16,7 @@
 #include <linux/delay.h>
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
 #include <linux/scatterlist.h>
 #include <crypto/aes.h>
 #include <crypto/aes.h>
+#include <crypto/xts.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/scatterwalk.h>
 
 
@@ -37,46 +39,26 @@ struct ccp_unit_size_map {
 	u32 value;
 	u32 value;
 };
 };
 
 
-static struct ccp_unit_size_map unit_size_map[] = {
+static struct ccp_unit_size_map xts_unit_sizes[] = {
 	{
 	{
-		.size	= 4096,
-		.value	= CCP_XTS_AES_UNIT_SIZE_4096,
-	},
-	{
-		.size	= 2048,
-		.value	= CCP_XTS_AES_UNIT_SIZE_2048,
-	},
-	{
-		.size	= 1024,
-		.value	= CCP_XTS_AES_UNIT_SIZE_1024,
+		.size   = 16,
+		.value	= CCP_XTS_AES_UNIT_SIZE_16,
 	},
 	},
 	{
 	{
-		.size	= 512,
+		.size   = 512,
 		.value	= CCP_XTS_AES_UNIT_SIZE_512,
 		.value	= CCP_XTS_AES_UNIT_SIZE_512,
 	},
 	},
 	{
 	{
-		.size	= 256,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
-	},
-	{
-		.size	= 128,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
-	},
-	{
-		.size	= 64,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
-	},
-	{
-		.size	= 32,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
+		.size   = 1024,
+		.value	= CCP_XTS_AES_UNIT_SIZE_1024,
 	},
 	},
 	{
 	{
-		.size	= 16,
-		.value	= CCP_XTS_AES_UNIT_SIZE_16,
+		.size   = 2048,
+		.value	= CCP_XTS_AES_UNIT_SIZE_2048,
 	},
 	},
 	{
 	{
-		.size	= 1,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
+		.size   = 4096,
+		.value	= CCP_XTS_AES_UNIT_SIZE_4096,
 	},
 	},
 };
 };
 
 
@@ -96,15 +78,26 @@ static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret)
 static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 			      unsigned int key_len)
 			      unsigned int key_len)
 {
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
+	struct crypto_tfm *xfm = crypto_ablkcipher_tfm(tfm);
+	struct ccp_ctx *ctx = crypto_tfm_ctx(xfm);
+	unsigned int ccpversion = ccp_version();
+	int ret;
 
 
-	/* Only support 128-bit AES key with a 128-bit Tweak key,
-	 * otherwise use the fallback
+	ret = xts_check_key(xfm, key, key_len);
+	if (ret)
+		return ret;
+
+	/* Version 3 devices support 128-bit keys; version 5 devices can
+	 * accommodate 128- and 256-bit keys.
 	 */
 	 */
 	switch (key_len) {
 	switch (key_len) {
 	case AES_KEYSIZE_128 * 2:
 	case AES_KEYSIZE_128 * 2:
 		memcpy(ctx->u.aes.key, key, key_len);
 		memcpy(ctx->u.aes.key, key, key_len);
 		break;
 		break;
+	case AES_KEYSIZE_256 * 2:
+		if (ccpversion > CCP_VERSION(3, 0))
+			memcpy(ctx->u.aes.key, key, key_len);
+		break;
 	}
 	}
 	ctx->u.aes.key_len = key_len / 2;
 	ctx->u.aes.key_len = key_len / 2;
 	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
 	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
@@ -117,6 +110,8 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 {
 {
 	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
 	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
 	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
 	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	unsigned int ccpversion = ccp_version();
+	unsigned int fallback = 0;
 	unsigned int unit;
 	unsigned int unit;
 	u32 unit_size;
 	u32 unit_size;
 	int ret;
 	int ret;
@@ -130,18 +125,32 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 	if (!req->info)
 	if (!req->info)
 		return -EINVAL;
 		return -EINVAL;
 
 
+	/* Check conditions under which the CCP can fulfill a request. The
+	 * device can handle input plaintext of a length that is a multiple
+	 * of the unit_size, bug the crypto implementation only supports
+	 * the unit_size being equal to the input length. This limits the
+	 * number of scenarios we can handle.
+	 */
 	unit_size = CCP_XTS_AES_UNIT_SIZE__LAST;
 	unit_size = CCP_XTS_AES_UNIT_SIZE__LAST;
-	if (req->nbytes <= unit_size_map[0].size) {
-		for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++) {
-			if (!(req->nbytes & (unit_size_map[unit].size - 1))) {
-				unit_size = unit_size_map[unit].value;
-				break;
-			}
+	for (unit = 0; unit < ARRAY_SIZE(xts_unit_sizes); unit++) {
+		if (req->nbytes == xts_unit_sizes[unit].size) {
+			unit_size = unit;
+			break;
 		}
 		}
 	}
 	}
-
-	if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) ||
-	    (ctx->u.aes.key_len != AES_KEYSIZE_128)) {
+	/* The CCP has restrictions on block sizes. Also, a version 3 device
+	 * only supports AES-128 operations; version 5 CCPs support both
+	 * AES-128 and -256 operations.
+	 */
+	if (unit_size == CCP_XTS_AES_UNIT_SIZE__LAST)
+		fallback = 1;
+	if ((ccpversion < CCP_VERSION(5, 0)) &&
+	    (ctx->u.aes.key_len != AES_KEYSIZE_128))
+		fallback = 1;
+	if ((ctx->u.aes.key_len != AES_KEYSIZE_128) &&
+	    (ctx->u.aes.key_len != AES_KEYSIZE_256))
+		fallback = 1;
+	if (fallback) {
 		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher);
 		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher);
 
 
 		/* Use the fallback to process the request for any
 		/* Use the fallback to process the request for any
@@ -164,6 +173,7 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
 	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
 	INIT_LIST_HEAD(&rctx->cmd.entry);
 	INIT_LIST_HEAD(&rctx->cmd.entry);
 	rctx->cmd.engine = CCP_ENGINE_XTS_AES_128;
 	rctx->cmd.engine = CCP_ENGINE_XTS_AES_128;
+	rctx->cmd.u.xts.type = CCP_AES_TYPE_128;
 	rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT
 	rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT
 					   : CCP_AES_ACTION_DECRYPT;
 					   : CCP_AES_ACTION_DECRYPT;
 	rctx->cmd.u.xts.unit_size = unit_size;
 	rctx->cmd.u.xts.unit_size = unit_size;

+ 1 - 1
drivers/crypto/ccp/ccp-crypto-des3.c

@@ -1,7 +1,7 @@
 /*
 /*
  * AMD Cryptographic Coprocessor (CCP) DES3 crypto API support
  * AMD Cryptographic Coprocessor (CCP) DES3 crypto API support
  *
  *
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
  *
  *
  * Author: Gary R Hook <ghook@amd.com>
  * Author: Gary R Hook <ghook@amd.com>
  *
  *

+ 20 - 1
drivers/crypto/ccp/ccp-crypto-main.c

@@ -1,7 +1,7 @@
 /*
 /*
  * AMD Cryptographic Coprocessor (CCP) crypto API support
  * AMD Cryptographic Coprocessor (CCP) crypto API support
  *
  *
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  *
  *
@@ -17,6 +17,7 @@
 #include <linux/ccp.h>
 #include <linux/ccp.h>
 #include <linux/scatterlist.h>
 #include <linux/scatterlist.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/akcipher.h>
 
 
 #include "ccp-crypto.h"
 #include "ccp-crypto.h"
 
 
@@ -37,10 +38,15 @@ static unsigned int des3_disable;
 module_param(des3_disable, uint, 0444);
 module_param(des3_disable, uint, 0444);
 MODULE_PARM_DESC(des3_disable, "Disable use of 3DES - any non-zero value");
 MODULE_PARM_DESC(des3_disable, "Disable use of 3DES - any non-zero value");
 
 
+static unsigned int rsa_disable;
+module_param(rsa_disable, uint, 0444);
+MODULE_PARM_DESC(rsa_disable, "Disable use of RSA - any non-zero value");
+
 /* List heads for the supported algorithms */
 /* List heads for the supported algorithms */
 static LIST_HEAD(hash_algs);
 static LIST_HEAD(hash_algs);
 static LIST_HEAD(cipher_algs);
 static LIST_HEAD(cipher_algs);
 static LIST_HEAD(aead_algs);
 static LIST_HEAD(aead_algs);
+static LIST_HEAD(akcipher_algs);
 
 
 /* For any tfm, requests for that tfm must be returned on the order
 /* For any tfm, requests for that tfm must be returned on the order
  * received.  With multiple queues available, the CCP can process more
  * received.  With multiple queues available, the CCP can process more
@@ -358,6 +364,12 @@ static int ccp_register_algs(void)
 			return ret;
 			return ret;
 	}
 	}
 
 
+	if (!rsa_disable) {
+		ret = ccp_register_rsa_algs(&akcipher_algs);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -366,6 +378,7 @@ static void ccp_unregister_algs(void)
 	struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp;
 	struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp;
 	struct ccp_crypto_ablkcipher_alg *ablk_alg, *ablk_tmp;
 	struct ccp_crypto_ablkcipher_alg *ablk_alg, *ablk_tmp;
 	struct ccp_crypto_aead *aead_alg, *aead_tmp;
 	struct ccp_crypto_aead *aead_alg, *aead_tmp;
+	struct ccp_crypto_akcipher_alg *akc_alg, *akc_tmp;
 
 
 	list_for_each_entry_safe(ahash_alg, ahash_tmp, &hash_algs, entry) {
 	list_for_each_entry_safe(ahash_alg, ahash_tmp, &hash_algs, entry) {
 		crypto_unregister_ahash(&ahash_alg->alg);
 		crypto_unregister_ahash(&ahash_alg->alg);
@@ -384,6 +397,12 @@ static void ccp_unregister_algs(void)
 		list_del(&aead_alg->entry);
 		list_del(&aead_alg->entry);
 		kfree(aead_alg);
 		kfree(aead_alg);
 	}
 	}
+
+	list_for_each_entry_safe(akc_alg, akc_tmp, &akcipher_algs, entry) {
+		crypto_unregister_akcipher(&akc_alg->alg);
+		list_del(&akc_alg->entry);
+		kfree(akc_alg);
+	}
 }
 }
 
 
 static int ccp_crypto_init(void)
 static int ccp_crypto_init(void)

+ 299 - 0
drivers/crypto/ccp/ccp-crypto-rsa.c

@@ -0,0 +1,299 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) RSA crypto API support
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/rsa.h>
+#include <crypto/internal/akcipher.h>
+#include <crypto/akcipher.h>
+#include <crypto/scatterwalk.h>
+
+#include "ccp-crypto.h"
+
+static inline struct akcipher_request *akcipher_request_cast(
+	struct crypto_async_request *req)
+{
+	return container_of(req, struct akcipher_request, base);
+}
+
+static inline int ccp_copy_and_save_keypart(u8 **kpbuf, unsigned int *kplen,
+					    const u8 *buf, size_t sz)
+{
+	int nskip;
+
+	for (nskip = 0; nskip < sz; nskip++)
+		if (buf[nskip])
+			break;
+	*kplen = sz - nskip;
+	*kpbuf = kzalloc(*kplen, GFP_KERNEL);
+	if (!*kpbuf)
+		return -ENOMEM;
+	memcpy(*kpbuf, buf + nskip, *kplen);
+
+	return 0;
+}
+
+static int ccp_rsa_complete(struct crypto_async_request *async_req, int ret)
+{
+	struct akcipher_request *req = akcipher_request_cast(async_req);
+	struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req);
+
+	if (ret)
+		return ret;
+
+	req->dst_len = rctx->cmd.u.rsa.key_size >> 3;
+
+	return 0;
+}
+
+static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm)
+{
+	if (ccp_version() > CCP_VERSION(3, 0))
+		return CCP5_RSA_MAXMOD;
+	else
+		return CCP_RSA_MAXMOD;
+}
+
+static int ccp_rsa_crypt(struct akcipher_request *req, bool encrypt)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req);
+	int ret = 0;
+
+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+	INIT_LIST_HEAD(&rctx->cmd.entry);
+	rctx->cmd.engine = CCP_ENGINE_RSA;
+
+	rctx->cmd.u.rsa.key_size = ctx->u.rsa.key_len; /* in bits */
+	if (encrypt) {
+		rctx->cmd.u.rsa.exp = &ctx->u.rsa.e_sg;
+		rctx->cmd.u.rsa.exp_len = ctx->u.rsa.e_len;
+	} else {
+		rctx->cmd.u.rsa.exp = &ctx->u.rsa.d_sg;
+		rctx->cmd.u.rsa.exp_len = ctx->u.rsa.d_len;
+	}
+	rctx->cmd.u.rsa.mod = &ctx->u.rsa.n_sg;
+	rctx->cmd.u.rsa.mod_len = ctx->u.rsa.n_len;
+	rctx->cmd.u.rsa.src = req->src;
+	rctx->cmd.u.rsa.src_len = req->src_len;
+	rctx->cmd.u.rsa.dst = req->dst;
+
+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+	return ret;
+}
+
+static int ccp_rsa_encrypt(struct akcipher_request *req)
+{
+	return ccp_rsa_crypt(req, true);
+}
+
+static int ccp_rsa_decrypt(struct akcipher_request *req)
+{
+	return ccp_rsa_crypt(req, false);
+}
+
+static int ccp_check_key_length(unsigned int len)
+{
+	/* In bits */
+	if (len < 8 || len > 4096)
+		return -EINVAL;
+	return 0;
+}
+
+static void ccp_rsa_free_key_bufs(struct ccp_ctx *ctx)
+{
+	/* Clean up old key data */
+	kzfree(ctx->u.rsa.e_buf);
+	ctx->u.rsa.e_buf = NULL;
+	ctx->u.rsa.e_len = 0;
+	kzfree(ctx->u.rsa.n_buf);
+	ctx->u.rsa.n_buf = NULL;
+	ctx->u.rsa.n_len = 0;
+	kzfree(ctx->u.rsa.d_buf);
+	ctx->u.rsa.d_buf = NULL;
+	ctx->u.rsa.d_len = 0;
+}
+
+static int ccp_rsa_setkey(struct crypto_akcipher *tfm, const void *key,
+			  unsigned int keylen, bool private)
+{
+	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct rsa_key raw_key;
+	int ret;
+
+	ccp_rsa_free_key_bufs(ctx);
+	memset(&raw_key, 0, sizeof(raw_key));
+
+	/* Code borrowed from crypto/rsa.c */
+	if (private)
+		ret = rsa_parse_priv_key(&raw_key, key, keylen);
+	else
+		ret = rsa_parse_pub_key(&raw_key, key, keylen);
+	if (ret)
+		goto n_key;
+
+	ret = ccp_copy_and_save_keypart(&ctx->u.rsa.n_buf, &ctx->u.rsa.n_len,
+					raw_key.n, raw_key.n_sz);
+	if (ret)
+		goto key_err;
+	sg_init_one(&ctx->u.rsa.n_sg, ctx->u.rsa.n_buf, ctx->u.rsa.n_len);
+
+	ctx->u.rsa.key_len = ctx->u.rsa.n_len << 3; /* convert to bits */
+	if (ccp_check_key_length(ctx->u.rsa.key_len)) {
+		ret = -EINVAL;
+		goto key_err;
+	}
+
+	ret = ccp_copy_and_save_keypart(&ctx->u.rsa.e_buf, &ctx->u.rsa.e_len,
+					raw_key.e, raw_key.e_sz);
+	if (ret)
+		goto key_err;
+	sg_init_one(&ctx->u.rsa.e_sg, ctx->u.rsa.e_buf, ctx->u.rsa.e_len);
+
+	if (private) {
+		ret = ccp_copy_and_save_keypart(&ctx->u.rsa.d_buf,
+						&ctx->u.rsa.d_len,
+						raw_key.d, raw_key.d_sz);
+		if (ret)
+			goto key_err;
+		sg_init_one(&ctx->u.rsa.d_sg,
+			    ctx->u.rsa.d_buf, ctx->u.rsa.d_len);
+	}
+
+	return 0;
+
+key_err:
+	ccp_rsa_free_key_bufs(ctx);
+
+n_key:
+	return ret;
+}
+
+static int ccp_rsa_setprivkey(struct crypto_akcipher *tfm, const void *key,
+			      unsigned int keylen)
+{
+	return ccp_rsa_setkey(tfm, key, keylen, true);
+}
+
+static int ccp_rsa_setpubkey(struct crypto_akcipher *tfm, const void *key,
+			     unsigned int keylen)
+{
+	return ccp_rsa_setkey(tfm, key, keylen, false);
+}
+
+static int ccp_rsa_init_tfm(struct crypto_akcipher *tfm)
+{
+	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+
+	akcipher_set_reqsize(tfm, sizeof(struct ccp_rsa_req_ctx));
+	ctx->complete = ccp_rsa_complete;
+
+	return 0;
+}
+
+static void ccp_rsa_exit_tfm(struct crypto_akcipher *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(&tfm->base);
+
+	ccp_rsa_free_key_bufs(ctx);
+}
+
+static struct akcipher_alg ccp_rsa_defaults = {
+	.encrypt = ccp_rsa_encrypt,
+	.decrypt = ccp_rsa_decrypt,
+	.sign = ccp_rsa_decrypt,
+	.verify = ccp_rsa_encrypt,
+	.set_pub_key = ccp_rsa_setpubkey,
+	.set_priv_key = ccp_rsa_setprivkey,
+	.max_size = ccp_rsa_maxsize,
+	.init = ccp_rsa_init_tfm,
+	.exit = ccp_rsa_exit_tfm,
+	.base = {
+		.cra_name = "rsa",
+		.cra_driver_name = "rsa-ccp",
+		.cra_priority = CCP_CRA_PRIORITY,
+		.cra_module = THIS_MODULE,
+		.cra_ctxsize = 2 * sizeof(struct ccp_ctx),
+	},
+};
+
+struct ccp_rsa_def {
+	unsigned int version;
+	const char *name;
+	const char *driver_name;
+	unsigned int reqsize;
+	struct akcipher_alg *alg_defaults;
+};
+
+static struct ccp_rsa_def rsa_algs[] = {
+	{
+		.version	= CCP_VERSION(3, 0),
+		.name		= "rsa",
+		.driver_name	= "rsa-ccp",
+		.reqsize	= sizeof(struct ccp_rsa_req_ctx),
+		.alg_defaults	= &ccp_rsa_defaults,
+	}
+};
+
+int ccp_register_rsa_alg(struct list_head *head, const struct ccp_rsa_def *def)
+{
+	struct ccp_crypto_akcipher_alg *ccp_alg;
+	struct akcipher_alg *alg;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ccp_alg->entry);
+
+	alg = &ccp_alg->alg;
+	*alg = *def->alg_defaults;
+	snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 def->driver_name);
+	ret = crypto_register_akcipher(alg);
+	if (ret) {
+		pr_err("%s akcipher algorithm registration error (%d)\n",
+		       alg->base.cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	return 0;
+}
+
+int ccp_register_rsa_algs(struct list_head *head)
+{
+	int i, ret;
+	unsigned int ccpversion = ccp_version();
+
+	/* Register the RSA algorithm in standard mode
+	 * This works for CCP v3 and later
+	 */
+	for (i = 0; i < ARRAY_SIZE(rsa_algs); i++) {
+		if (rsa_algs[i].version > ccpversion)
+			continue;
+		ret = ccp_register_rsa_alg(head, &rsa_algs[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}

+ 1 - 1
drivers/crypto/ccp/ccp-crypto-sha.c

@@ -1,7 +1,7 @@
 /*
 /*
  * AMD Cryptographic Coprocessor (CCP) SHA crypto API support
  * AMD Cryptographic Coprocessor (CCP) SHA crypto API support
  *
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>

+ 34 - 2
drivers/crypto/ccp/ccp-crypto.h

@@ -1,7 +1,7 @@
 /*
 /*
  * AMD Cryptographic Coprocessor (CCP) crypto API support
  * AMD Cryptographic Coprocessor (CCP) crypto API support
  *
  *
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  *
  *
@@ -24,6 +24,8 @@
 #include <crypto/ctr.h>
 #include <crypto/ctr.h>
 #include <crypto/hash.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
 #include <crypto/sha.h>
+#include <crypto/akcipher.h>
+#include <crypto/internal/rsa.h>
 
 
 #define	CCP_LOG_LEVEL	KERN_INFO
 #define	CCP_LOG_LEVEL	KERN_INFO
 
 
@@ -58,6 +60,12 @@ struct ccp_crypto_ahash_alg {
 	struct ahash_alg alg;
 	struct ahash_alg alg;
 };
 };
 
 
+struct ccp_crypto_akcipher_alg {
+	struct list_head entry;
+
+	struct akcipher_alg alg;
+};
+
 static inline struct ccp_crypto_ablkcipher_alg *
 static inline struct ccp_crypto_ablkcipher_alg *
 	ccp_crypto_ablkcipher_alg(struct crypto_tfm *tfm)
 	ccp_crypto_ablkcipher_alg(struct crypto_tfm *tfm)
 {
 {
@@ -91,7 +99,7 @@ struct ccp_aes_ctx {
 
 
 	struct scatterlist key_sg;
 	struct scatterlist key_sg;
 	unsigned int key_len;
 	unsigned int key_len;
-	u8 key[AES_MAX_KEY_SIZE];
+	u8 key[AES_MAX_KEY_SIZE * 2];
 
 
 	u8 nonce[CTR_RFC3686_NONCE_SIZE];
 	u8 nonce[CTR_RFC3686_NONCE_SIZE];
 
 
@@ -227,12 +235,35 @@ struct ccp_sha_exp_ctx {
 	u8 buf[MAX_SHA_BLOCK_SIZE];
 	u8 buf[MAX_SHA_BLOCK_SIZE];
 };
 };
 
 
+/***** RSA related defines *****/
+
+struct ccp_rsa_ctx {
+	unsigned int key_len; /* in bits */
+	struct scatterlist e_sg;
+	u8 *e_buf;
+	unsigned int e_len;
+	struct scatterlist n_sg;
+	u8 *n_buf;
+	unsigned int n_len;
+	struct scatterlist d_sg;
+	u8 *d_buf;
+	unsigned int d_len;
+};
+
+struct ccp_rsa_req_ctx {
+	struct ccp_cmd cmd;
+};
+
+#define	CCP_RSA_MAXMOD	(4 * 1024 / 8)
+#define	CCP5_RSA_MAXMOD	(16 * 1024 / 8)
+
 /***** Common Context Structure *****/
 /***** Common Context Structure *****/
 struct ccp_ctx {
 struct ccp_ctx {
 	int (*complete)(struct crypto_async_request *req, int ret);
 	int (*complete)(struct crypto_async_request *req, int ret);
 
 
 	union {
 	union {
 		struct ccp_aes_ctx aes;
 		struct ccp_aes_ctx aes;
+		struct ccp_rsa_ctx rsa;
 		struct ccp_sha_ctx sha;
 		struct ccp_sha_ctx sha;
 		struct ccp_des3_ctx des3;
 		struct ccp_des3_ctx des3;
 	} u;
 	} u;
@@ -249,5 +280,6 @@ int ccp_register_aes_xts_algs(struct list_head *head);
 int ccp_register_aes_aeads(struct list_head *head);
 int ccp_register_aes_aeads(struct list_head *head);
 int ccp_register_sha_algs(struct list_head *head);
 int ccp_register_sha_algs(struct list_head *head);
 int ccp_register_des3_algs(struct list_head *head);
 int ccp_register_des3_algs(struct list_head *head);
+int ccp_register_rsa_algs(struct list_head *head);
 
 
 #endif
 #endif

+ 10 - 5
drivers/crypto/ccp/ccp-debugfs.c

@@ -305,19 +305,19 @@ void ccp5_debugfs_setup(struct ccp_device *ccp)
 
 
 	ccp->debugfs_instance = debugfs_create_dir(ccp->name, ccp_debugfs_dir);
 	ccp->debugfs_instance = debugfs_create_dir(ccp->name, ccp_debugfs_dir);
 	if (!ccp->debugfs_instance)
 	if (!ccp->debugfs_instance)
-		return;
+		goto err;
 
 
 	debugfs_info = debugfs_create_file("info", 0400,
 	debugfs_info = debugfs_create_file("info", 0400,
 					   ccp->debugfs_instance, ccp,
 					   ccp->debugfs_instance, ccp,
 					   &ccp_debugfs_info_ops);
 					   &ccp_debugfs_info_ops);
 	if (!debugfs_info)
 	if (!debugfs_info)
-		return;
+		goto err;
 
 
 	debugfs_stats = debugfs_create_file("stats", 0600,
 	debugfs_stats = debugfs_create_file("stats", 0600,
 					    ccp->debugfs_instance, ccp,
 					    ccp->debugfs_instance, ccp,
 					    &ccp_debugfs_stats_ops);
 					    &ccp_debugfs_stats_ops);
 	if (!debugfs_stats)
 	if (!debugfs_stats)
-		return;
+		goto err;
 
 
 	for (i = 0; i < ccp->cmd_q_count; i++) {
 	for (i = 0; i < ccp->cmd_q_count; i++) {
 		cmd_q = &ccp->cmd_q[i];
 		cmd_q = &ccp->cmd_q[i];
@@ -327,15 +327,20 @@ void ccp5_debugfs_setup(struct ccp_device *ccp)
 		debugfs_q_instance =
 		debugfs_q_instance =
 			debugfs_create_dir(name, ccp->debugfs_instance);
 			debugfs_create_dir(name, ccp->debugfs_instance);
 		if (!debugfs_q_instance)
 		if (!debugfs_q_instance)
-			return;
+			goto err;
 
 
 		debugfs_q_stats =
 		debugfs_q_stats =
 			debugfs_create_file("stats", 0600,
 			debugfs_create_file("stats", 0600,
 					    debugfs_q_instance, cmd_q,
 					    debugfs_q_instance, cmd_q,
 					    &ccp_debugfs_queue_ops);
 					    &ccp_debugfs_queue_ops);
 		if (!debugfs_q_stats)
 		if (!debugfs_q_stats)
-			return;
+			goto err;
 	}
 	}
+
+	return;
+
+err:
+	debugfs_remove_recursive(ccp->debugfs_instance);
 }
 }
 
 
 void ccp5_debugfs_destroy(void)
 void ccp5_debugfs_destroy(void)

+ 13 - 7
drivers/crypto/ccp/ccp-dev-v3.c

@@ -1,7 +1,7 @@
 /*
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  * AMD Cryptographic Coprocessor (CCP) driver
  *
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
@@ -359,8 +359,7 @@ static void ccp_irq_bh(unsigned long data)
 
 
 static irqreturn_t ccp_irq_handler(int irq, void *data)
 static irqreturn_t ccp_irq_handler(int irq, void *data)
 {
 {
-	struct device *dev = data;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
+	struct ccp_device *ccp = (struct ccp_device *)data;
 
 
 	ccp_disable_queue_interrupts(ccp);
 	ccp_disable_queue_interrupts(ccp);
 	if (ccp->use_tasklet)
 	if (ccp->use_tasklet)
@@ -454,7 +453,7 @@ static int ccp_init(struct ccp_device *ccp)
 	iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG);
 	iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG);
 
 
 	/* Request an irq */
 	/* Request an irq */
-	ret = ccp->get_irq(ccp);
+	ret = sp_request_ccp_irq(ccp->sp, ccp_irq_handler, ccp->name, ccp);
 	if (ret) {
 	if (ret) {
 		dev_err(dev, "unable to allocate an IRQ\n");
 		dev_err(dev, "unable to allocate an IRQ\n");
 		goto e_pool;
 		goto e_pool;
@@ -511,7 +510,7 @@ e_kthread:
 		if (ccp->cmd_q[i].kthread)
 		if (ccp->cmd_q[i].kthread)
 			kthread_stop(ccp->cmd_q[i].kthread);
 			kthread_stop(ccp->cmd_q[i].kthread);
 
 
-	ccp->free_irq(ccp);
+	sp_free_ccp_irq(ccp->sp, ccp);
 
 
 e_pool:
 e_pool:
 	for (i = 0; i < ccp->cmd_q_count; i++)
 	for (i = 0; i < ccp->cmd_q_count; i++)
@@ -550,7 +549,7 @@ static void ccp_destroy(struct ccp_device *ccp)
 		if (ccp->cmd_q[i].kthread)
 		if (ccp->cmd_q[i].kthread)
 			kthread_stop(ccp->cmd_q[i].kthread);
 			kthread_stop(ccp->cmd_q[i].kthread);
 
 
-	ccp->free_irq(ccp);
+	sp_free_ccp_irq(ccp->sp, ccp);
 
 
 	for (i = 0; i < ccp->cmd_q_count; i++)
 	for (i = 0; i < ccp->cmd_q_count; i++)
 		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
 		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
@@ -586,10 +585,17 @@ static const struct ccp_actions ccp3_actions = {
 	.irqhandler = ccp_irq_handler,
 	.irqhandler = ccp_irq_handler,
 };
 };
 
 
+const struct ccp_vdata ccpv3_platform = {
+	.version = CCP_VERSION(3, 0),
+	.setup = NULL,
+	.perform = &ccp3_actions,
+	.offset = 0,
+};
+
 const struct ccp_vdata ccpv3 = {
 const struct ccp_vdata ccpv3 = {
 	.version = CCP_VERSION(3, 0),
 	.version = CCP_VERSION(3, 0),
 	.setup = NULL,
 	.setup = NULL,
 	.perform = &ccp3_actions,
 	.perform = &ccp3_actions,
-	.bar = 2,
 	.offset = 0x20000,
 	.offset = 0x20000,
+	.rsamax = CCP_RSA_MAX_WIDTH,
 };
 };

+ 14 - 14
drivers/crypto/ccp/ccp-dev-v5.c

@@ -1,7 +1,7 @@
 /*
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  * AMD Cryptographic Coprocessor (CCP) driver
  *
  *
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
  *
  *
  * Author: Gary R Hook <gary.hook@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
  *
  *
@@ -145,6 +145,7 @@ union ccp_function {
 #define	CCP_AES_MODE(p)		((p)->aes.mode)
 #define	CCP_AES_MODE(p)		((p)->aes.mode)
 #define	CCP_AES_TYPE(p)		((p)->aes.type)
 #define	CCP_AES_TYPE(p)		((p)->aes.type)
 #define	CCP_XTS_SIZE(p)		((p)->aes_xts.size)
 #define	CCP_XTS_SIZE(p)		((p)->aes_xts.size)
+#define	CCP_XTS_TYPE(p)		((p)->aes_xts.type)
 #define	CCP_XTS_ENCRYPT(p)	((p)->aes_xts.encrypt)
 #define	CCP_XTS_ENCRYPT(p)	((p)->aes_xts.encrypt)
 #define	CCP_DES3_SIZE(p)	((p)->des3.size)
 #define	CCP_DES3_SIZE(p)	((p)->des3.size)
 #define	CCP_DES3_ENCRYPT(p)	((p)->des3.encrypt)
 #define	CCP_DES3_ENCRYPT(p)	((p)->des3.encrypt)
@@ -344,6 +345,7 @@ static int ccp5_perform_xts_aes(struct ccp_op *op)
 	CCP5_CMD_PROT(&desc) = 0;
 	CCP5_CMD_PROT(&desc) = 0;
 
 
 	function.raw = 0;
 	function.raw = 0;
+	CCP_XTS_TYPE(&function) = op->u.xts.type;
 	CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
 	CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
 	CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
 	CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
 	CCP5_CMD_FUNCTION(&desc) = function.raw;
 	CCP5_CMD_FUNCTION(&desc) = function.raw;
@@ -469,7 +471,7 @@ static int ccp5_perform_rsa(struct ccp_op *op)
 	CCP5_CMD_PROT(&desc) = 0;
 	CCP5_CMD_PROT(&desc) = 0;
 
 
 	function.raw = 0;
 	function.raw = 0;
-	CCP_RSA_SIZE(&function) = op->u.rsa.mod_size >> 3;
+	CCP_RSA_SIZE(&function) = (op->u.rsa.mod_size + 7) >> 3;
 	CCP5_CMD_FUNCTION(&desc) = function.raw;
 	CCP5_CMD_FUNCTION(&desc) = function.raw;
 
 
 	CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
 	CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
@@ -484,10 +486,10 @@ static int ccp5_perform_rsa(struct ccp_op *op)
 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
 
 
-	/* Exponent is in LSB memory */
-	CCP5_CMD_KEY_LO(&desc) = op->sb_key * LSB_ITEM_SIZE;
-	CCP5_CMD_KEY_HI(&desc) = 0;
-	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
+	/* Key (Exponent) is in external memory */
+	CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma);
+	CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma);
+	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
 
 
 	return ccp5_do_cmd(&desc, op->cmd_q);
 	return ccp5_do_cmd(&desc, op->cmd_q);
 }
 }
@@ -769,8 +771,7 @@ static void ccp5_irq_bh(unsigned long data)
 
 
 static irqreturn_t ccp5_irq_handler(int irq, void *data)
 static irqreturn_t ccp5_irq_handler(int irq, void *data)
 {
 {
-	struct device *dev = data;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
+	struct ccp_device *ccp = (struct ccp_device *)data;
 
 
 	ccp5_disable_queue_interrupts(ccp);
 	ccp5_disable_queue_interrupts(ccp);
 	ccp->total_interrupts++;
 	ccp->total_interrupts++;
@@ -881,7 +882,7 @@ static int ccp5_init(struct ccp_device *ccp)
 
 
 	dev_dbg(dev, "Requesting an IRQ...\n");
 	dev_dbg(dev, "Requesting an IRQ...\n");
 	/* Request an irq */
 	/* Request an irq */
-	ret = ccp->get_irq(ccp);
+	ret = sp_request_ccp_irq(ccp->sp, ccp5_irq_handler, ccp->name, ccp);
 	if (ret) {
 	if (ret) {
 		dev_err(dev, "unable to allocate an IRQ\n");
 		dev_err(dev, "unable to allocate an IRQ\n");
 		goto e_pool;
 		goto e_pool;
@@ -987,7 +988,7 @@ e_kthread:
 			kthread_stop(ccp->cmd_q[i].kthread);
 			kthread_stop(ccp->cmd_q[i].kthread);
 
 
 e_irq:
 e_irq:
-	ccp->free_irq(ccp);
+	sp_free_ccp_irq(ccp->sp, ccp);
 
 
 e_pool:
 e_pool:
 	for (i = 0; i < ccp->cmd_q_count; i++)
 	for (i = 0; i < ccp->cmd_q_count; i++)
@@ -1037,7 +1038,7 @@ static void ccp5_destroy(struct ccp_device *ccp)
 		if (ccp->cmd_q[i].kthread)
 		if (ccp->cmd_q[i].kthread)
 			kthread_stop(ccp->cmd_q[i].kthread);
 			kthread_stop(ccp->cmd_q[i].kthread);
 
 
-	ccp->free_irq(ccp);
+	sp_free_ccp_irq(ccp->sp, ccp);
 
 
 	for (i = 0; i < ccp->cmd_q_count; i++) {
 	for (i = 0; i < ccp->cmd_q_count; i++) {
 		cmd_q = &ccp->cmd_q[i];
 		cmd_q = &ccp->cmd_q[i];
@@ -1106,15 +1107,14 @@ static const struct ccp_actions ccp5_actions = {
 	.init = ccp5_init,
 	.init = ccp5_init,
 	.destroy = ccp5_destroy,
 	.destroy = ccp5_destroy,
 	.get_free_slots = ccp5_get_free_slots,
 	.get_free_slots = ccp5_get_free_slots,
-	.irqhandler = ccp5_irq_handler,
 };
 };
 
 
 const struct ccp_vdata ccpv5a = {
 const struct ccp_vdata ccpv5a = {
 	.version = CCP_VERSION(5, 0),
 	.version = CCP_VERSION(5, 0),
 	.setup = ccp5_config,
 	.setup = ccp5_config,
 	.perform = &ccp5_actions,
 	.perform = &ccp5_actions,
-	.bar = 2,
 	.offset = 0x0,
 	.offset = 0x0,
+	.rsamax = CCP5_RSA_MAX_WIDTH,
 };
 };
 
 
 const struct ccp_vdata ccpv5b = {
 const struct ccp_vdata ccpv5b = {
@@ -1122,6 +1122,6 @@ const struct ccp_vdata ccpv5b = {
 	.dma_chan_attr = DMA_PRIVATE,
 	.dma_chan_attr = DMA_PRIVATE,
 	.setup = ccp5other_config,
 	.setup = ccp5other_config,
 	.perform = &ccp5_actions,
 	.perform = &ccp5_actions,
-	.bar = 2,
 	.offset = 0x0,
 	.offset = 0x0,
+	.rsamax = CCP5_RSA_MAX_WIDTH,
 };
 };

+ 84 - 50
drivers/crypto/ccp/ccp-dev.c

@@ -1,7 +1,7 @@
 /*
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  * AMD Cryptographic Coprocessor (CCP) driver
  *
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
@@ -11,7 +11,6 @@
  * published by the Free Software Foundation.
  * published by the Free Software Foundation.
  */
  */
 
 
-#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
@@ -30,12 +29,6 @@
 
 
 #include "ccp-dev.h"
 #include "ccp-dev.h"
 
 
-MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
-MODULE_AUTHOR("Gary R Hook <gary.hook@amd.com>");
-MODULE_LICENSE("GPL");
-MODULE_VERSION("1.1.0");
-MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver");
-
 struct ccp_tasklet_data {
 struct ccp_tasklet_data {
 	struct completion completion;
 	struct completion completion;
 	struct ccp_cmd *cmd;
 	struct ccp_cmd *cmd;
@@ -111,13 +104,6 @@ static LIST_HEAD(ccp_units);
 static DEFINE_SPINLOCK(ccp_rr_lock);
 static DEFINE_SPINLOCK(ccp_rr_lock);
 static struct ccp_device *ccp_rr;
 static struct ccp_device *ccp_rr;
 
 
-/* Ever-increasing value to produce unique unit numbers */
-static atomic_t ccp_unit_ordinal;
-static unsigned int ccp_increment_unit_ordinal(void)
-{
-	return atomic_inc_return(&ccp_unit_ordinal);
-}
-
 /**
 /**
  * ccp_add_device - add a CCP device to the list
  * ccp_add_device - add a CCP device to the list
  *
  *
@@ -415,6 +401,7 @@ static void ccp_do_cmd_complete(unsigned long data)
 	struct ccp_cmd *cmd = tdata->cmd;
 	struct ccp_cmd *cmd = tdata->cmd;
 
 
 	cmd->callback(cmd->data, cmd->ret);
 	cmd->callback(cmd->data, cmd->ret);
+
 	complete(&tdata->completion);
 	complete(&tdata->completion);
 }
 }
 
 
@@ -464,14 +451,17 @@ int ccp_cmd_queue_thread(void *data)
  *
  *
  * @dev: device struct of the CCP
  * @dev: device struct of the CCP
  */
  */
-struct ccp_device *ccp_alloc_struct(struct device *dev)
+struct ccp_device *ccp_alloc_struct(struct sp_device *sp)
 {
 {
+	struct device *dev = sp->dev;
 	struct ccp_device *ccp;
 	struct ccp_device *ccp;
 
 
 	ccp = devm_kzalloc(dev, sizeof(*ccp), GFP_KERNEL);
 	ccp = devm_kzalloc(dev, sizeof(*ccp), GFP_KERNEL);
 	if (!ccp)
 	if (!ccp)
 		return NULL;
 		return NULL;
 	ccp->dev = dev;
 	ccp->dev = dev;
+	ccp->sp = sp;
+	ccp->axcache = sp->axcache;
 
 
 	INIT_LIST_HEAD(&ccp->cmd);
 	INIT_LIST_HEAD(&ccp->cmd);
 	INIT_LIST_HEAD(&ccp->backlog);
 	INIT_LIST_HEAD(&ccp->backlog);
@@ -486,9 +476,8 @@ struct ccp_device *ccp_alloc_struct(struct device *dev)
 	init_waitqueue_head(&ccp->sb_queue);
 	init_waitqueue_head(&ccp->sb_queue);
 	init_waitqueue_head(&ccp->suspend_queue);
 	init_waitqueue_head(&ccp->suspend_queue);
 
 
-	ccp->ord = ccp_increment_unit_ordinal();
-	snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", ccp->ord);
-	snprintf(ccp->rngname, MAX_CCP_NAME_LEN, "ccp-%u-rng", ccp->ord);
+	snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", sp->ord);
+	snprintf(ccp->rngname, MAX_CCP_NAME_LEN, "ccp-%u-rng", sp->ord);
 
 
 	return ccp;
 	return ccp;
 }
 }
@@ -538,55 +527,100 @@ bool ccp_queues_suspended(struct ccp_device *ccp)
 
 
 	return ccp->cmd_q_count == suspended;
 	return ccp->cmd_q_count == suspended;
 }
 }
-#endif
 
 
-static int __init ccp_mod_init(void)
+int ccp_dev_suspend(struct sp_device *sp, pm_message_t state)
 {
 {
-#ifdef CONFIG_X86
-	int ret;
+	struct ccp_device *ccp = sp->ccp_data;
+	unsigned long flags;
+	unsigned int i;
 
 
-	ret = ccp_pci_init();
-	if (ret)
-		return ret;
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
 
 
-	/* Don't leave the driver loaded if init failed */
-	if (ccp_present() != 0) {
-		ccp_pci_exit();
-		return -ENODEV;
+	ccp->suspending = 1;
+
+	/* Wake all the queue kthreads to prepare for suspend */
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		wake_up_process(ccp->cmd_q[i].kthread);
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	/* Wait for all queue kthreads to say they're done */
+	while (!ccp_queues_suspended(ccp))
+		wait_event_interruptible(ccp->suspend_queue,
+					 ccp_queues_suspended(ccp));
+
+	return 0;
+}
+
+int ccp_dev_resume(struct sp_device *sp)
+{
+	struct ccp_device *ccp = sp->ccp_data;
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	ccp->suspending = 0;
+
+	/* Wake up all the kthreads */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		ccp->cmd_q[i].suspended = 0;
+		wake_up_process(ccp->cmd_q[i].kthread);
 	}
 	}
 
 
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
 	return 0;
 	return 0;
+}
 #endif
 #endif
 
 
-#ifdef CONFIG_ARM64
+int ccp_dev_init(struct sp_device *sp)
+{
+	struct device *dev = sp->dev;
+	struct ccp_device *ccp;
 	int ret;
 	int ret;
 
 
-	ret = ccp_platform_init();
+	ret = -ENOMEM;
+	ccp = ccp_alloc_struct(sp);
+	if (!ccp)
+		goto e_err;
+	sp->ccp_data = ccp;
+
+	ccp->vdata = (struct ccp_vdata *)sp->dev_vdata->ccp_vdata;
+	if (!ccp->vdata || !ccp->vdata->version) {
+		ret = -ENODEV;
+		dev_err(dev, "missing driver data\n");
+		goto e_err;
+	}
+
+	ccp->use_tasklet = sp->use_tasklet;
+
+	ccp->io_regs = sp->io_map + ccp->vdata->offset;
+	if (ccp->vdata->setup)
+		ccp->vdata->setup(ccp);
+
+	ret = ccp->vdata->perform->init(ccp);
 	if (ret)
 	if (ret)
-		return ret;
+		goto e_err;
 
 
-	/* Don't leave the driver loaded if init failed */
-	if (ccp_present() != 0) {
-		ccp_platform_exit();
-		return -ENODEV;
-	}
+	dev_notice(dev, "ccp enabled\n");
 
 
 	return 0;
 	return 0;
-#endif
 
 
-	return -ENODEV;
+e_err:
+	sp->ccp_data = NULL;
+
+	dev_notice(dev, "ccp initialization failed\n");
+
+	return ret;
 }
 }
 
 
-static void __exit ccp_mod_exit(void)
+void ccp_dev_destroy(struct sp_device *sp)
 {
 {
-#ifdef CONFIG_X86
-	ccp_pci_exit();
-#endif
+	struct ccp_device *ccp = sp->ccp_data;
 
 
-#ifdef CONFIG_ARM64
-	ccp_platform_exit();
-#endif
-}
+	if (!ccp)
+		return;
 
 
-module_init(ccp_mod_init);
-module_exit(ccp_mod_exit);
+	ccp->vdata->perform->destroy(ccp);
+}

+ 9 - 21
drivers/crypto/ccp/ccp-dev.h

@@ -1,7 +1,7 @@
 /*
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  * AMD Cryptographic Coprocessor (CCP) driver
  *
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
@@ -27,6 +27,8 @@
 #include <linux/irqreturn.h>
 #include <linux/irqreturn.h>
 #include <linux/dmaengine.h>
 #include <linux/dmaengine.h>
 
 
+#include "sp-dev.h"
+
 #define MAX_CCP_NAME_LEN		16
 #define MAX_CCP_NAME_LEN		16
 #define MAX_DMAPOOL_NAME_LEN		32
 #define MAX_DMAPOOL_NAME_LEN		32
 
 
@@ -192,6 +194,7 @@
 #define CCP_AES_CTX_SB_COUNT		1
 #define CCP_AES_CTX_SB_COUNT		1
 
 
 #define CCP_XTS_AES_KEY_SB_COUNT	1
 #define CCP_XTS_AES_KEY_SB_COUNT	1
+#define CCP5_XTS_AES_KEY_SB_COUNT	2
 #define CCP_XTS_AES_CTX_SB_COUNT	1
 #define CCP_XTS_AES_CTX_SB_COUNT	1
 
 
 #define CCP_DES3_KEY_SB_COUNT		1
 #define CCP_DES3_KEY_SB_COUNT		1
@@ -200,6 +203,7 @@
 #define CCP_SHA_SB_COUNT		1
 #define CCP_SHA_SB_COUNT		1
 
 
 #define CCP_RSA_MAX_WIDTH		4096
 #define CCP_RSA_MAX_WIDTH		4096
+#define CCP5_RSA_MAX_WIDTH		16384
 
 
 #define CCP_PASSTHRU_BLOCKSIZE		256
 #define CCP_PASSTHRU_BLOCKSIZE		256
 #define CCP_PASSTHRU_MASKSIZE		32
 #define CCP_PASSTHRU_MASKSIZE		32
@@ -344,12 +348,11 @@ struct ccp_device {
 	char rngname[MAX_CCP_NAME_LEN];
 	char rngname[MAX_CCP_NAME_LEN];
 
 
 	struct device *dev;
 	struct device *dev;
+	struct sp_device *sp;
 
 
 	/* Bus specific device information
 	/* Bus specific device information
 	 */
 	 */
 	void *dev_specific;
 	void *dev_specific;
-	int (*get_irq)(struct ccp_device *ccp);
-	void (*free_irq)(struct ccp_device *ccp);
 	unsigned int qim;
 	unsigned int qim;
 	unsigned int irq;
 	unsigned int irq;
 	bool use_tasklet;
 	bool use_tasklet;
@@ -362,7 +365,6 @@ struct ccp_device {
 	 *   them.
 	 *   them.
 	 */
 	 */
 	struct mutex req_mutex ____cacheline_aligned;
 	struct mutex req_mutex ____cacheline_aligned;
-	void __iomem *io_map;
 	void __iomem *io_regs;
 	void __iomem *io_regs;
 
 
 	/* Master lists that all cmds are queued on. Because there can be
 	/* Master lists that all cmds are queued on. Because there can be
@@ -497,6 +499,7 @@ struct ccp_aes_op {
 };
 };
 
 
 struct ccp_xts_aes_op {
 struct ccp_xts_aes_op {
+	enum ccp_aes_type type;
 	enum ccp_aes_action action;
 	enum ccp_aes_action action;
 	enum ccp_xts_aes_unit_size unit_size;
 	enum ccp_xts_aes_unit_size unit_size;
 };
 };
@@ -626,18 +629,12 @@ struct ccp5_desc {
 	struct dword7 dw7;
 	struct dword7 dw7;
 };
 };
 
 
-int ccp_pci_init(void);
-void ccp_pci_exit(void);
-
-int ccp_platform_init(void);
-void ccp_platform_exit(void);
-
 void ccp_add_device(struct ccp_device *ccp);
 void ccp_add_device(struct ccp_device *ccp);
 void ccp_del_device(struct ccp_device *ccp);
 void ccp_del_device(struct ccp_device *ccp);
 
 
 extern void ccp_log_error(struct ccp_device *, int);
 extern void ccp_log_error(struct ccp_device *, int);
 
 
-struct ccp_device *ccp_alloc_struct(struct device *dev);
+struct ccp_device *ccp_alloc_struct(struct sp_device *sp);
 bool ccp_queues_suspended(struct ccp_device *ccp);
 bool ccp_queues_suspended(struct ccp_device *ccp);
 int ccp_cmd_queue_thread(void *data);
 int ccp_cmd_queue_thread(void *data);
 int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait);
 int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait);
@@ -669,16 +666,7 @@ struct ccp_actions {
 	irqreturn_t (*irqhandler)(int, void *);
 	irqreturn_t (*irqhandler)(int, void *);
 };
 };
 
 
-/* Structure to hold CCP version-specific values */
-struct ccp_vdata {
-	const unsigned int version;
-	const unsigned int dma_chan_attr;
-	void (*setup)(struct ccp_device *);
-	const struct ccp_actions *perform;
-	const unsigned int bar;
-	const unsigned int offset;
-};
-
+extern const struct ccp_vdata ccpv3_platform;
 extern const struct ccp_vdata ccpv3;
 extern const struct ccp_vdata ccpv3;
 extern const struct ccp_vdata ccpv5a;
 extern const struct ccp_vdata ccpv5a;
 extern const struct ccp_vdata ccpv5b;
 extern const struct ccp_vdata ccpv5b;

+ 1 - 1
drivers/crypto/ccp/ccp-dmaengine.c

@@ -1,7 +1,7 @@
 /*
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  * AMD Cryptographic Coprocessor (CCP) driver
  *
  *
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
  *
  *
  * Author: Gary R Hook <gary.hook@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
  *
  *

+ 91 - 42
drivers/crypto/ccp/ccp-ops.c

@@ -1,7 +1,7 @@
 /*
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  * AMD Cryptographic Coprocessor (CCP) driver
  *
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
@@ -168,7 +168,7 @@ static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
 
 
 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
 						 dir);
 						 dir);
-		if (!wa->dma.address)
+		if (dma_mapping_error(wa->dev, wa->dma.address))
 			return -ENOMEM;
 			return -ENOMEM;
 
 
 		wa->dma.length = len;
 		wa->dma.length = len;
@@ -1038,6 +1038,8 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 	struct ccp_op op;
 	struct ccp_op op;
 	unsigned int unit_size, dm_offset;
 	unsigned int unit_size, dm_offset;
 	bool in_place = false;
 	bool in_place = false;
+	unsigned int sb_count;
+	enum ccp_aes_type aestype;
 	int ret;
 	int ret;
 
 
 	switch (xts->unit_size) {
 	switch (xts->unit_size) {
@@ -1061,7 +1063,11 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	if (xts->key_len != AES_KEYSIZE_128)
+	if (xts->key_len == AES_KEYSIZE_128)
+		aestype = CCP_AES_TYPE_128;
+	else if (xts->key_len == AES_KEYSIZE_256)
+		aestype = CCP_AES_TYPE_256;
+	else
 		return -EINVAL;
 		return -EINVAL;
 
 
 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
@@ -1083,23 +1089,44 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 	op.sb_key = cmd_q->sb_key;
 	op.sb_key = cmd_q->sb_key;
 	op.sb_ctx = cmd_q->sb_ctx;
 	op.sb_ctx = cmd_q->sb_ctx;
 	op.init = 1;
 	op.init = 1;
+	op.u.xts.type = aestype;
 	op.u.xts.action = xts->action;
 	op.u.xts.action = xts->action;
 	op.u.xts.unit_size = xts->unit_size;
 	op.u.xts.unit_size = xts->unit_size;
 
 
-	/* All supported key sizes fit in a single (32-byte) SB entry
-	 * and must be in little endian format. Use the 256-bit byte
-	 * swap passthru option to convert from big endian to little
-	 * endian.
+	/* A version 3 device only supports 128-bit keys, which fits into a
+	 * single SB entry. A version 5 device uses a 512-bit vector, so two
+	 * SB entries.
 	 */
 	 */
+	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
+		sb_count = CCP_XTS_AES_KEY_SB_COUNT;
+	else
+		sb_count = CCP5_XTS_AES_KEY_SB_COUNT;
 	ret = ccp_init_dm_workarea(&key, cmd_q,
 	ret = ccp_init_dm_workarea(&key, cmd_q,
-				   CCP_XTS_AES_KEY_SB_COUNT * CCP_SB_BYTES,
+				   sb_count * CCP_SB_BYTES,
 				   DMA_TO_DEVICE);
 				   DMA_TO_DEVICE);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
-	dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
-	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
-	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
+	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
+		/* All supported key sizes must be in little endian format.
+		 * Use the 256-bit byte swap passthru option to convert from
+		 * big endian to little endian.
+		 */
+		dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
+		ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
+		ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len);
+	} else {
+		/* Version 5 CCPs use a 512-bit space for the key: each portion
+		 * occupies 256 bits, or one entire slot, and is zero-padded.
+		 */
+		unsigned int pad;
+
+		dm_offset = CCP_SB_BYTES;
+		pad = dm_offset - xts->key_len;
+		ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len);
+		ccp_set_dm_area(&key, dm_offset + pad, xts->key, xts->key_len,
+				xts->key_len);
+	}
 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
 			     CCP_PASSTHRU_BYTESWAP_256BIT);
 			     CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
 	if (ret) {
@@ -1731,42 +1758,53 @@ e_ctx:
 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 {
 {
 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
-	struct ccp_dm_workarea exp, src;
-	struct ccp_data dst;
+	struct ccp_dm_workarea exp, src, dst;
 	struct ccp_op op;
 	struct ccp_op op;
 	unsigned int sb_count, i_len, o_len;
 	unsigned int sb_count, i_len, o_len;
 	int ret;
 	int ret;
 
 
-	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
+	/* Check against the maximum allowable size, in bits */
+	if (rsa->key_size > cmd_q->ccp->vdata->rsamax)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
 		return -EINVAL;
 		return -EINVAL;
 
 
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+
 	/* The RSA modulus must precede the message being acted upon, so
 	/* The RSA modulus must precede the message being acted upon, so
 	 * it must be copied to a DMA area where the message and the
 	 * it must be copied to a DMA area where the message and the
 	 * modulus can be concatenated.  Therefore the input buffer
 	 * modulus can be concatenated.  Therefore the input buffer
 	 * length required is twice the output buffer length (which
 	 * length required is twice the output buffer length (which
-	 * must be a multiple of 256-bits).
+	 * must be a multiple of 256-bits).  Compute o_len, i_len in bytes.
+	 * Buffer sizes must be a multiple of 32 bytes; rounding up may be
+	 * required.
 	 */
 	 */
-	o_len = ((rsa->key_size + 255) / 256) * 32;
+	o_len = 32 * ((rsa->key_size + 255) / 256);
 	i_len = o_len * 2;
 	i_len = o_len * 2;
 
 
-	sb_count = o_len / CCP_SB_BYTES;
-
-	memset(&op, 0, sizeof(op));
-	op.cmd_q = cmd_q;
-	op.jobid = ccp_gen_jobid(cmd_q->ccp);
-	op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q, sb_count);
-
-	if (!op.sb_key)
-		return -EIO;
+	sb_count = 0;
+	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
+		/* sb_count is the number of storage block slots required
+		 * for the modulus.
+		 */
+		sb_count = o_len / CCP_SB_BYTES;
+		op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q,
+								sb_count);
+		if (!op.sb_key)
+			return -EIO;
+	} else {
+		/* A version 5 device allows a modulus size that will not fit
+		 * in the LSB, so the command will transfer it from memory.
+		 * Set the sb key to the default, even though it's not used.
+		 */
+		op.sb_key = cmd_q->sb_key;
+	}
 
 
-	/* The RSA exponent may span multiple (32-byte) SB entries and must
-	 * be in little endian format. Reverse copy each 32-byte chunk
-	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
-	 * and each byte within that chunk and do not perform any byte swap
-	 * operations on the passthru operation.
+	/* The RSA exponent must be in little endian format. Reverse its
+	 * byte order.
 	 */
 	 */
 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
 	if (ret)
 	if (ret)
@@ -1775,11 +1813,22 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len);
 	ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len);
 	if (ret)
 	if (ret)
 		goto e_exp;
 		goto e_exp;
-	ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
-			     CCP_PASSTHRU_BYTESWAP_NOOP);
-	if (ret) {
-		cmd->engine_error = cmd_q->cmd_error;
-		goto e_exp;
+
+	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
+		/* Copy the exponent to the local storage block, using
+		 * as many 32-byte blocks as were allocated above. It's
+		 * already little endian, so no further change is required.
+		 */
+		ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
+				     CCP_PASSTHRU_BYTESWAP_NOOP);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_exp;
+		}
+	} else {
+		/* The exponent can be retrieved from memory via DMA. */
+		op.exp.u.dma.address = exp.dma.address;
+		op.exp.u.dma.offset = 0;
 	}
 	}
 
 
 	/* Concatenate the modulus and the message. Both the modulus and
 	/* Concatenate the modulus and the message. Both the modulus and
@@ -1798,8 +1847,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		goto e_src;
 		goto e_src;
 
 
 	/* Prepare the output area for the operation */
 	/* Prepare the output area for the operation */
-	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
-			    o_len, DMA_FROM_DEVICE);
+	ret = ccp_init_dm_workarea(&dst, cmd_q, o_len, DMA_FROM_DEVICE);
 	if (ret)
 	if (ret)
 		goto e_src;
 		goto e_src;
 
 
@@ -1807,7 +1855,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	op.src.u.dma.address = src.dma.address;
 	op.src.u.dma.address = src.dma.address;
 	op.src.u.dma.offset = 0;
 	op.src.u.dma.offset = 0;
 	op.src.u.dma.length = i_len;
 	op.src.u.dma.length = i_len;
-	op.dst.u.dma.address = dst.dm_wa.dma.address;
+	op.dst.u.dma.address = dst.dma.address;
 	op.dst.u.dma.offset = 0;
 	op.dst.u.dma.offset = 0;
 	op.dst.u.dma.length = o_len;
 	op.dst.u.dma.length = o_len;
 
 
@@ -1820,10 +1868,10 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		goto e_dst;
 		goto e_dst;
 	}
 	}
 
 
-	ccp_reverse_get_dm_area(&dst.dm_wa, 0, rsa->dst, 0, rsa->mod_len);
+	ccp_reverse_get_dm_area(&dst, 0, rsa->dst, 0, rsa->mod_len);
 
 
 e_dst:
 e_dst:
-	ccp_free_data(&dst, cmd_q);
+	ccp_dm_free(&dst);
 
 
 e_src:
 e_src:
 	ccp_dm_free(&src);
 	ccp_dm_free(&src);
@@ -1832,7 +1880,8 @@ e_exp:
 	ccp_dm_free(&exp);
 	ccp_dm_free(&exp);
 
 
 e_sb:
 e_sb:
-	cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
+	if (sb_count)
+		cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
 
 
 	return ret;
 	return ret;
 }
 }
@@ -1992,7 +2041,7 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
 
 
 	memset(&op, 0, sizeof(op));
 	memset(&op, 0, sizeof(op));
 	op.cmd_q = cmd_q;
 	op.cmd_q = cmd_q;
-	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
 
 
 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
 		/* Load the mask */
 		/* Load the mask */

+ 0 - 356
drivers/crypto/ccp/ccp-pci.c

@@ -1,356 +0,0 @@
-/*
- * AMD Cryptographic Coprocessor (CCP) driver
- *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
- *
- * Author: Tom Lendacky <thomas.lendacky@amd.com>
- * Author: Gary R Hook <gary.hook@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/pci.h>
-#include <linux/pci_ids.h>
-#include <linux/dma-mapping.h>
-#include <linux/kthread.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/ccp.h>
-
-#include "ccp-dev.h"
-
-#define MSIX_VECTORS			2
-
-struct ccp_msix {
-	u32 vector;
-	char name[16];
-};
-
-struct ccp_pci {
-	int msix_count;
-	struct ccp_msix msix[MSIX_VECTORS];
-};
-
-static int ccp_get_msix_irqs(struct ccp_device *ccp)
-{
-	struct ccp_pci *ccp_pci = ccp->dev_specific;
-	struct device *dev = ccp->dev;
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct msix_entry msix_entry[MSIX_VECTORS];
-	unsigned int name_len = sizeof(ccp_pci->msix[0].name) - 1;
-	int v, ret;
-
-	for (v = 0; v < ARRAY_SIZE(msix_entry); v++)
-		msix_entry[v].entry = v;
-
-	ret = pci_enable_msix_range(pdev, msix_entry, 1, v);
-	if (ret < 0)
-		return ret;
-
-	ccp_pci->msix_count = ret;
-	for (v = 0; v < ccp_pci->msix_count; v++) {
-		/* Set the interrupt names and request the irqs */
-		snprintf(ccp_pci->msix[v].name, name_len, "%s-%u",
-			 ccp->name, v);
-		ccp_pci->msix[v].vector = msix_entry[v].vector;
-		ret = request_irq(ccp_pci->msix[v].vector,
-				  ccp->vdata->perform->irqhandler,
-				  0, ccp_pci->msix[v].name, dev);
-		if (ret) {
-			dev_notice(dev, "unable to allocate MSI-X IRQ (%d)\n",
-				   ret);
-			goto e_irq;
-		}
-	}
-	ccp->use_tasklet = true;
-
-	return 0;
-
-e_irq:
-	while (v--)
-		free_irq(ccp_pci->msix[v].vector, dev);
-
-	pci_disable_msix(pdev);
-
-	ccp_pci->msix_count = 0;
-
-	return ret;
-}
-
-static int ccp_get_msi_irq(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	struct pci_dev *pdev = to_pci_dev(dev);
-	int ret;
-
-	ret = pci_enable_msi(pdev);
-	if (ret)
-		return ret;
-
-	ccp->irq = pdev->irq;
-	ret = request_irq(ccp->irq, ccp->vdata->perform->irqhandler, 0,
-			  ccp->name, dev);
-	if (ret) {
-		dev_notice(dev, "unable to allocate MSI IRQ (%d)\n", ret);
-		goto e_msi;
-	}
-	ccp->use_tasklet = true;
-
-	return 0;
-
-e_msi:
-	pci_disable_msi(pdev);
-
-	return ret;
-}
-
-static int ccp_get_irqs(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	int ret;
-
-	ret = ccp_get_msix_irqs(ccp);
-	if (!ret)
-		return 0;
-
-	/* Couldn't get MSI-X vectors, try MSI */
-	dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
-	ret = ccp_get_msi_irq(ccp);
-	if (!ret)
-		return 0;
-
-	/* Couldn't get MSI interrupt */
-	dev_notice(dev, "could not enable MSI (%d)\n", ret);
-
-	return ret;
-}
-
-static void ccp_free_irqs(struct ccp_device *ccp)
-{
-	struct ccp_pci *ccp_pci = ccp->dev_specific;
-	struct device *dev = ccp->dev;
-	struct pci_dev *pdev = to_pci_dev(dev);
-
-	if (ccp_pci->msix_count) {
-		while (ccp_pci->msix_count--)
-			free_irq(ccp_pci->msix[ccp_pci->msix_count].vector,
-				 dev);
-		pci_disable_msix(pdev);
-	} else if (ccp->irq) {
-		free_irq(ccp->irq, dev);
-		pci_disable_msi(pdev);
-	}
-	ccp->irq = 0;
-}
-
-static int ccp_find_mmio_area(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	struct pci_dev *pdev = to_pci_dev(dev);
-	resource_size_t io_len;
-	unsigned long io_flags;
-
-	io_flags = pci_resource_flags(pdev, ccp->vdata->bar);
-	io_len = pci_resource_len(pdev, ccp->vdata->bar);
-	if ((io_flags & IORESOURCE_MEM) &&
-	    (io_len >= (ccp->vdata->offset + 0x800)))
-		return ccp->vdata->bar;
-
-	return -EIO;
-}
-
-static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-{
-	struct ccp_device *ccp;
-	struct ccp_pci *ccp_pci;
-	struct device *dev = &pdev->dev;
-	unsigned int bar;
-	int ret;
-
-	ret = -ENOMEM;
-	ccp = ccp_alloc_struct(dev);
-	if (!ccp)
-		goto e_err;
-
-	ccp_pci = devm_kzalloc(dev, sizeof(*ccp_pci), GFP_KERNEL);
-	if (!ccp_pci)
-		goto e_err;
-
-	ccp->dev_specific = ccp_pci;
-	ccp->vdata = (struct ccp_vdata *)id->driver_data;
-	if (!ccp->vdata || !ccp->vdata->version) {
-		ret = -ENODEV;
-		dev_err(dev, "missing driver data\n");
-		goto e_err;
-	}
-	ccp->get_irq = ccp_get_irqs;
-	ccp->free_irq = ccp_free_irqs;
-
-	ret = pci_request_regions(pdev, "ccp");
-	if (ret) {
-		dev_err(dev, "pci_request_regions failed (%d)\n", ret);
-		goto e_err;
-	}
-
-	ret = pci_enable_device(pdev);
-	if (ret) {
-		dev_err(dev, "pci_enable_device failed (%d)\n", ret);
-		goto e_regions;
-	}
-
-	pci_set_master(pdev);
-
-	ret = ccp_find_mmio_area(ccp);
-	if (ret < 0)
-		goto e_device;
-	bar = ret;
-
-	ret = -EIO;
-	ccp->io_map = pci_iomap(pdev, bar, 0);
-	if (!ccp->io_map) {
-		dev_err(dev, "pci_iomap failed\n");
-		goto e_device;
-	}
-	ccp->io_regs = ccp->io_map + ccp->vdata->offset;
-
-	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
-	if (ret) {
-		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
-		if (ret) {
-			dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n",
-				ret);
-			goto e_iomap;
-		}
-	}
-
-	dev_set_drvdata(dev, ccp);
-
-	if (ccp->vdata->setup)
-		ccp->vdata->setup(ccp);
-
-	ret = ccp->vdata->perform->init(ccp);
-	if (ret)
-		goto e_iomap;
-
-	dev_notice(dev, "enabled\n");
-
-	return 0;
-
-e_iomap:
-	pci_iounmap(pdev, ccp->io_map);
-
-e_device:
-	pci_disable_device(pdev);
-
-e_regions:
-	pci_release_regions(pdev);
-
-e_err:
-	dev_notice(dev, "initialization failed\n");
-	return ret;
-}
-
-static void ccp_pci_remove(struct pci_dev *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
-
-	if (!ccp)
-		return;
-
-	ccp->vdata->perform->destroy(ccp);
-
-	pci_iounmap(pdev, ccp->io_map);
-
-	pci_disable_device(pdev);
-
-	pci_release_regions(pdev);
-
-	dev_notice(dev, "disabled\n");
-}
-
-#ifdef CONFIG_PM
-static int ccp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
-{
-	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&ccp->cmd_lock, flags);
-
-	ccp->suspending = 1;
-
-	/* Wake all the queue kthreads to prepare for suspend */
-	for (i = 0; i < ccp->cmd_q_count; i++)
-		wake_up_process(ccp->cmd_q[i].kthread);
-
-	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
-
-	/* Wait for all queue kthreads to say they're done */
-	while (!ccp_queues_suspended(ccp))
-		wait_event_interruptible(ccp->suspend_queue,
-					 ccp_queues_suspended(ccp));
-
-	return 0;
-}
-
-static int ccp_pci_resume(struct pci_dev *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&ccp->cmd_lock, flags);
-
-	ccp->suspending = 0;
-
-	/* Wake up all the kthreads */
-	for (i = 0; i < ccp->cmd_q_count; i++) {
-		ccp->cmd_q[i].suspended = 0;
-		wake_up_process(ccp->cmd_q[i].kthread);
-	}
-
-	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
-
-	return 0;
-}
-#endif
-
-static const struct pci_device_id ccp_pci_table[] = {
-	{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&ccpv3 },
-	{ PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&ccpv5a },
-	{ PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&ccpv5b },
-	/* Last entry must be zero */
-	{ 0, }
-};
-MODULE_DEVICE_TABLE(pci, ccp_pci_table);
-
-static struct pci_driver ccp_pci_driver = {
-	.name = "ccp",
-	.id_table = ccp_pci_table,
-	.probe = ccp_pci_probe,
-	.remove = ccp_pci_remove,
-#ifdef CONFIG_PM
-	.suspend = ccp_pci_suspend,
-	.resume = ccp_pci_resume,
-#endif
-};
-
-int ccp_pci_init(void)
-{
-	return pci_register_driver(&ccp_pci_driver);
-}
-
-void ccp_pci_exit(void)
-{
-	pci_unregister_driver(&ccp_pci_driver);
-}

+ 0 - 293
drivers/crypto/ccp/ccp-platform.c

@@ -1,293 +0,0 @@
-/*
- * AMD Cryptographic Coprocessor (CCP) driver
- *
- * Copyright (C) 2014,2016 Advanced Micro Devices, Inc.
- *
- * Author: Tom Lendacky <thomas.lendacky@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/ioport.h>
-#include <linux/dma-mapping.h>
-#include <linux/kthread.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/ccp.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/acpi.h>
-
-#include "ccp-dev.h"
-
-struct ccp_platform {
-	int coherent;
-};
-
-static const struct acpi_device_id ccp_acpi_match[];
-static const struct of_device_id ccp_of_match[];
-
-static struct ccp_vdata *ccp_get_of_version(struct platform_device *pdev)
-{
-#ifdef CONFIG_OF
-	const struct of_device_id *match;
-
-	match = of_match_node(ccp_of_match, pdev->dev.of_node);
-	if (match && match->data)
-		return (struct ccp_vdata *)match->data;
-#endif
-	return NULL;
-}
-
-static struct ccp_vdata *ccp_get_acpi_version(struct platform_device *pdev)
-{
-#ifdef CONFIG_ACPI
-	const struct acpi_device_id *match;
-
-	match = acpi_match_device(ccp_acpi_match, &pdev->dev);
-	if (match && match->driver_data)
-		return (struct ccp_vdata *)match->driver_data;
-#endif
-	return NULL;
-}
-
-static int ccp_get_irq(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	struct platform_device *pdev = to_platform_device(dev);
-	int ret;
-
-	ret = platform_get_irq(pdev, 0);
-	if (ret < 0)
-		return ret;
-
-	ccp->irq = ret;
-	ret = request_irq(ccp->irq, ccp->vdata->perform->irqhandler, 0,
-			  ccp->name, dev);
-	if (ret) {
-		dev_notice(dev, "unable to allocate IRQ (%d)\n", ret);
-		return ret;
-	}
-
-	return 0;
-}
-
-static int ccp_get_irqs(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	int ret;
-
-	ret = ccp_get_irq(ccp);
-	if (!ret)
-		return 0;
-
-	/* Couldn't get an interrupt */
-	dev_notice(dev, "could not enable interrupts (%d)\n", ret);
-
-	return ret;
-}
-
-static void ccp_free_irqs(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-
-	free_irq(ccp->irq, dev);
-}
-
-static struct resource *ccp_find_mmio_area(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	struct platform_device *pdev = to_platform_device(dev);
-	struct resource *ior;
-
-	ior = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (ior && (resource_size(ior) >= 0x800))
-		return ior;
-
-	return NULL;
-}
-
-static int ccp_platform_probe(struct platform_device *pdev)
-{
-	struct ccp_device *ccp;
-	struct ccp_platform *ccp_platform;
-	struct device *dev = &pdev->dev;
-	enum dev_dma_attr attr;
-	struct resource *ior;
-	int ret;
-
-	ret = -ENOMEM;
-	ccp = ccp_alloc_struct(dev);
-	if (!ccp)
-		goto e_err;
-
-	ccp_platform = devm_kzalloc(dev, sizeof(*ccp_platform), GFP_KERNEL);
-	if (!ccp_platform)
-		goto e_err;
-
-	ccp->dev_specific = ccp_platform;
-	ccp->vdata = pdev->dev.of_node ? ccp_get_of_version(pdev)
-					 : ccp_get_acpi_version(pdev);
-	if (!ccp->vdata || !ccp->vdata->version) {
-		ret = -ENODEV;
-		dev_err(dev, "missing driver data\n");
-		goto e_err;
-	}
-	ccp->get_irq = ccp_get_irqs;
-	ccp->free_irq = ccp_free_irqs;
-
-	ior = ccp_find_mmio_area(ccp);
-	ccp->io_map = devm_ioremap_resource(dev, ior);
-	if (IS_ERR(ccp->io_map)) {
-		ret = PTR_ERR(ccp->io_map);
-		goto e_err;
-	}
-	ccp->io_regs = ccp->io_map;
-
-	attr = device_get_dma_attr(dev);
-	if (attr == DEV_DMA_NOT_SUPPORTED) {
-		dev_err(dev, "DMA is not supported");
-		goto e_err;
-	}
-
-	ccp_platform->coherent = (attr == DEV_DMA_COHERENT);
-	if (ccp_platform->coherent)
-		ccp->axcache = CACHE_WB_NO_ALLOC;
-	else
-		ccp->axcache = CACHE_NONE;
-
-	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
-	if (ret) {
-		dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);
-		goto e_err;
-	}
-
-	dev_set_drvdata(dev, ccp);
-
-	ret = ccp->vdata->perform->init(ccp);
-	if (ret)
-		goto e_err;
-
-	dev_notice(dev, "enabled\n");
-
-	return 0;
-
-e_err:
-	dev_notice(dev, "initialization failed\n");
-	return ret;
-}
-
-static int ccp_platform_remove(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
-
-	ccp->vdata->perform->destroy(ccp);
-
-	dev_notice(dev, "disabled\n");
-
-	return 0;
-}
-
-#ifdef CONFIG_PM
-static int ccp_platform_suspend(struct platform_device *pdev,
-				pm_message_t state)
-{
-	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&ccp->cmd_lock, flags);
-
-	ccp->suspending = 1;
-
-	/* Wake all the queue kthreads to prepare for suspend */
-	for (i = 0; i < ccp->cmd_q_count; i++)
-		wake_up_process(ccp->cmd_q[i].kthread);
-
-	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
-
-	/* Wait for all queue kthreads to say they're done */
-	while (!ccp_queues_suspended(ccp))
-		wait_event_interruptible(ccp->suspend_queue,
-					 ccp_queues_suspended(ccp));
-
-	return 0;
-}
-
-static int ccp_platform_resume(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&ccp->cmd_lock, flags);
-
-	ccp->suspending = 0;
-
-	/* Wake up all the kthreads */
-	for (i = 0; i < ccp->cmd_q_count; i++) {
-		ccp->cmd_q[i].suspended = 0;
-		wake_up_process(ccp->cmd_q[i].kthread);
-	}
-
-	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
-
-	return 0;
-}
-#endif
-
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id ccp_acpi_match[] = {
-	{ "AMDI0C00", (kernel_ulong_t)&ccpv3 },
-	{ },
-};
-MODULE_DEVICE_TABLE(acpi, ccp_acpi_match);
-#endif
-
-#ifdef CONFIG_OF
-static const struct of_device_id ccp_of_match[] = {
-	{ .compatible = "amd,ccp-seattle-v1a",
-	  .data = (const void *)&ccpv3 },
-	{ },
-};
-MODULE_DEVICE_TABLE(of, ccp_of_match);
-#endif
-
-static struct platform_driver ccp_platform_driver = {
-	.driver = {
-		.name = "ccp",
-#ifdef CONFIG_ACPI
-		.acpi_match_table = ccp_acpi_match,
-#endif
-#ifdef CONFIG_OF
-		.of_match_table = ccp_of_match,
-#endif
-	},
-	.probe = ccp_platform_probe,
-	.remove = ccp_platform_remove,
-#ifdef CONFIG_PM
-	.suspend = ccp_platform_suspend,
-	.resume = ccp_platform_resume,
-#endif
-};
-
-int ccp_platform_init(void)
-{
-	return platform_driver_register(&ccp_platform_driver);
-}
-
-void ccp_platform_exit(void)
-{
-	platform_driver_unregister(&ccp_platform_driver);
-}

+ 277 - 0
drivers/crypto/ccp/sp-dev.c

@@ -0,0 +1,277 @@
+/*
+ * AMD Secure Processor driver
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+#include "sp-dev.h"
+
+MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
+MODULE_AUTHOR("Gary R Hook <gary.hook@amd.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.1.0");
+MODULE_DESCRIPTION("AMD Secure Processor driver");
+
+/* List of SPs, SP count, read-write access lock, and access functions
+ *
+ * Lock structure: get sp_unit_lock for reading whenever we need to
+ * examine the SP list.
+ */
+static DEFINE_RWLOCK(sp_unit_lock);
+static LIST_HEAD(sp_units);
+
+/* Ever-increasing value to produce unique unit numbers */
+static atomic_t sp_ordinal;
+
+static void sp_add_device(struct sp_device *sp)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&sp_unit_lock, flags);
+
+	list_add_tail(&sp->entry, &sp_units);
+
+	write_unlock_irqrestore(&sp_unit_lock, flags);
+}
+
+static void sp_del_device(struct sp_device *sp)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&sp_unit_lock, flags);
+
+	list_del(&sp->entry);
+
+	write_unlock_irqrestore(&sp_unit_lock, flags);
+}
+
+static irqreturn_t sp_irq_handler(int irq, void *data)
+{
+	struct sp_device *sp = data;
+
+	if (sp->ccp_irq_handler)
+		sp->ccp_irq_handler(irq, sp->ccp_irq_data);
+
+	if (sp->psp_irq_handler)
+		sp->psp_irq_handler(irq, sp->psp_irq_data);
+
+	return IRQ_HANDLED;
+}
+
+int sp_request_ccp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data)
+{
+	int ret;
+
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->psp_vdata) {
+		/* Need a common routine to manage all interrupts */
+		sp->ccp_irq_data = data;
+		sp->ccp_irq_handler = handler;
+
+		if (!sp->irq_registered) {
+			ret = request_irq(sp->ccp_irq, sp_irq_handler, 0,
+					  sp->name, sp);
+			if (ret)
+				return ret;
+
+			sp->irq_registered = true;
+		}
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		ret = request_irq(sp->ccp_irq, handler, 0, name, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int sp_request_psp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data)
+{
+	int ret;
+
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->ccp_vdata) {
+		/* Need a common routine to manage all interrupts */
+		sp->psp_irq_data = data;
+		sp->psp_irq_handler = handler;
+
+		if (!sp->irq_registered) {
+			ret = request_irq(sp->psp_irq, sp_irq_handler, 0,
+					  sp->name, sp);
+			if (ret)
+				return ret;
+
+			sp->irq_registered = true;
+		}
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		ret = request_irq(sp->psp_irq, handler, 0, name, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+void sp_free_ccp_irq(struct sp_device *sp, void *data)
+{
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->psp_vdata) {
+		/* Using common routine to manage all interrupts */
+		if (!sp->psp_irq_handler) {
+			/* Nothing else using it, so free it */
+			free_irq(sp->ccp_irq, sp);
+
+			sp->irq_registered = false;
+		}
+
+		sp->ccp_irq_handler = NULL;
+		sp->ccp_irq_data = NULL;
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		free_irq(sp->ccp_irq, data);
+	}
+}
+
+void sp_free_psp_irq(struct sp_device *sp, void *data)
+{
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->ccp_vdata) {
+		/* Using common routine to manage all interrupts */
+		if (!sp->ccp_irq_handler) {
+			/* Nothing else using it, so free it */
+			free_irq(sp->psp_irq, sp);
+
+			sp->irq_registered = false;
+		}
+
+		sp->psp_irq_handler = NULL;
+		sp->psp_irq_data = NULL;
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		free_irq(sp->psp_irq, data);
+	}
+}
+
+/**
+ * sp_alloc_struct - allocate and initialize the sp_device struct
+ *
+ * @dev: device struct of the SP
+ */
+struct sp_device *sp_alloc_struct(struct device *dev)
+{
+	struct sp_device *sp;
+
+	sp = devm_kzalloc(dev, sizeof(*sp), GFP_KERNEL);
+	if (!sp)
+		return NULL;
+
+	sp->dev = dev;
+	sp->ord = atomic_inc_return(&sp_ordinal);
+	snprintf(sp->name, SP_MAX_NAME_LEN, "sp-%u", sp->ord);
+
+	return sp;
+}
+
+int sp_init(struct sp_device *sp)
+{
+	sp_add_device(sp);
+
+	if (sp->dev_vdata->ccp_vdata)
+		ccp_dev_init(sp);
+
+	return 0;
+}
+
+void sp_destroy(struct sp_device *sp)
+{
+	if (sp->dev_vdata->ccp_vdata)
+		ccp_dev_destroy(sp);
+
+	sp_del_device(sp);
+}
+
+#ifdef CONFIG_PM
+int sp_suspend(struct sp_device *sp, pm_message_t state)
+{
+	int ret;
+
+	if (sp->dev_vdata->ccp_vdata) {
+		ret = ccp_dev_suspend(sp, state);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int sp_resume(struct sp_device *sp)
+{
+	int ret;
+
+	if (sp->dev_vdata->ccp_vdata) {
+		ret = ccp_dev_resume(sp);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static int __init sp_mod_init(void)
+{
+#ifdef CONFIG_X86
+	int ret;
+
+	ret = sp_pci_init();
+	if (ret)
+		return ret;
+
+	return 0;
+#endif
+
+#ifdef CONFIG_ARM64
+	int ret;
+
+	ret = sp_platform_init();
+	if (ret)
+		return ret;
+
+	return 0;
+#endif
+
+	return -ENODEV;
+}
+
+static void __exit sp_mod_exit(void)
+{
+#ifdef CONFIG_X86
+	sp_pci_exit();
+#endif
+
+#ifdef CONFIG_ARM64
+	sp_platform_exit();
+#endif
+}
+
+module_init(sp_mod_init);
+module_exit(sp_mod_exit);

+ 133 - 0
drivers/crypto/ccp/sp-dev.h

@@ -0,0 +1,133 @@
+/*
+ * AMD Secure Processor driver
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __SP_DEV_H__
+#define __SP_DEV_H__
+
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/dmapool.h>
+#include <linux/hw_random.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
+
+#define SP_MAX_NAME_LEN		32
+
+#define CACHE_NONE			0x00
+#define CACHE_WB_NO_ALLOC		0xb7
+
+/* Structure to hold CCP device data */
+struct ccp_device;
+struct ccp_vdata {
+	const unsigned int version;
+	const unsigned int dma_chan_attr;
+	void (*setup)(struct ccp_device *);
+	const struct ccp_actions *perform;
+	const unsigned int offset;
+	const unsigned int rsamax;
+};
+/* Structure to hold SP device data */
+struct sp_dev_vdata {
+	const unsigned int bar;
+
+	const struct ccp_vdata *ccp_vdata;
+	void *psp_vdata;
+};
+
+struct sp_device {
+	struct list_head entry;
+
+	struct device *dev;
+
+	struct sp_dev_vdata *dev_vdata;
+	unsigned int ord;
+	char name[SP_MAX_NAME_LEN];
+
+	/* Bus specific device information */
+	void *dev_specific;
+
+	/* I/O area used for device communication. */
+	void __iomem *io_map;
+
+	/* DMA caching attribute support */
+	unsigned int axcache;
+
+	bool irq_registered;
+	bool use_tasklet;
+
+	unsigned int ccp_irq;
+	irq_handler_t ccp_irq_handler;
+	void *ccp_irq_data;
+
+	unsigned int psp_irq;
+	irq_handler_t psp_irq_handler;
+	void *psp_irq_data;
+
+	void *ccp_data;
+	void *psp_data;
+};
+
+int sp_pci_init(void);
+void sp_pci_exit(void);
+
+int sp_platform_init(void);
+void sp_platform_exit(void);
+
+struct sp_device *sp_alloc_struct(struct device *dev);
+
+int sp_init(struct sp_device *sp);
+void sp_destroy(struct sp_device *sp);
+struct sp_device *sp_get_master(void);
+
+int sp_suspend(struct sp_device *sp, pm_message_t state);
+int sp_resume(struct sp_device *sp);
+int sp_request_ccp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data);
+void sp_free_ccp_irq(struct sp_device *sp, void *data);
+int sp_request_psp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data);
+void sp_free_psp_irq(struct sp_device *sp, void *data);
+
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+
+int ccp_dev_init(struct sp_device *sp);
+void ccp_dev_destroy(struct sp_device *sp);
+
+int ccp_dev_suspend(struct sp_device *sp, pm_message_t state);
+int ccp_dev_resume(struct sp_device *sp);
+
+#else	/* !CONFIG_CRYPTO_DEV_SP_CCP */
+
+static inline int ccp_dev_init(struct sp_device *sp)
+{
+	return 0;
+}
+static inline void ccp_dev_destroy(struct sp_device *sp) { }
+
+static inline int ccp_dev_suspend(struct sp_device *sp, pm_message_t state)
+{
+	return 0;
+}
+static inline int ccp_dev_resume(struct sp_device *sp)
+{
+	return 0;
+}
+#endif	/* CONFIG_CRYPTO_DEV_SP_CCP */
+
+#endif

+ 276 - 0
drivers/crypto/ccp/sp-pci.c

@@ -0,0 +1,276 @@
+/*
+ * AMD Secure Processor device driver
+ *
+ * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/dma-mapping.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+#define MSIX_VECTORS			2
+
+struct sp_pci {
+	int msix_count;
+	struct msix_entry msix_entry[MSIX_VECTORS];
+};
+
+static int sp_get_msix_irqs(struct sp_device *sp)
+{
+	struct sp_pci *sp_pci = sp->dev_specific;
+	struct device *dev = sp->dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int v, ret;
+
+	for (v = 0; v < ARRAY_SIZE(sp_pci->msix_entry); v++)
+		sp_pci->msix_entry[v].entry = v;
+
+	ret = pci_enable_msix_range(pdev, sp_pci->msix_entry, 1, v);
+	if (ret < 0)
+		return ret;
+
+	sp_pci->msix_count = ret;
+	sp->use_tasklet = true;
+
+	sp->psp_irq = sp_pci->msix_entry[0].vector;
+	sp->ccp_irq = (sp_pci->msix_count > 1) ? sp_pci->msix_entry[1].vector
+					       : sp_pci->msix_entry[0].vector;
+	return 0;
+}
+
+static int sp_get_msi_irq(struct sp_device *sp)
+{
+	struct device *dev = sp->dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret;
+
+	ret = pci_enable_msi(pdev);
+	if (ret)
+		return ret;
+
+	sp->ccp_irq = pdev->irq;
+	sp->psp_irq = pdev->irq;
+
+	return 0;
+}
+
+static int sp_get_irqs(struct sp_device *sp)
+{
+	struct device *dev = sp->dev;
+	int ret;
+
+	ret = sp_get_msix_irqs(sp);
+	if (!ret)
+		return 0;
+
+	/* Couldn't get MSI-X vectors, try MSI */
+	dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
+	ret = sp_get_msi_irq(sp);
+	if (!ret)
+		return 0;
+
+	/* Couldn't get MSI interrupt */
+	dev_notice(dev, "could not enable MSI (%d)\n", ret);
+
+	return ret;
+}
+
+static void sp_free_irqs(struct sp_device *sp)
+{
+	struct sp_pci *sp_pci = sp->dev_specific;
+	struct device *dev = sp->dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (sp_pci->msix_count)
+		pci_disable_msix(pdev);
+	else if (sp->psp_irq)
+		pci_disable_msi(pdev);
+
+	sp->ccp_irq = 0;
+	sp->psp_irq = 0;
+}
+
+static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct sp_device *sp;
+	struct sp_pci *sp_pci;
+	struct device *dev = &pdev->dev;
+	void __iomem * const *iomap_table;
+	int bar_mask;
+	int ret;
+
+	ret = -ENOMEM;
+	sp = sp_alloc_struct(dev);
+	if (!sp)
+		goto e_err;
+
+	sp_pci = devm_kzalloc(dev, sizeof(*sp_pci), GFP_KERNEL);
+	if (!sp_pci)
+		goto e_err;
+
+	sp->dev_specific = sp_pci;
+	sp->dev_vdata = (struct sp_dev_vdata *)id->driver_data;
+	if (!sp->dev_vdata) {
+		ret = -ENODEV;
+		dev_err(dev, "missing driver data\n");
+		goto e_err;
+	}
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		dev_err(dev, "pcim_enable_device failed (%d)\n", ret);
+		goto e_err;
+	}
+
+	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
+	ret = pcim_iomap_regions(pdev, bar_mask, "ccp");
+	if (ret) {
+		dev_err(dev, "pcim_iomap_regions failed (%d)\n", ret);
+		goto e_err;
+	}
+
+	iomap_table = pcim_iomap_table(pdev);
+	if (!iomap_table) {
+		dev_err(dev, "pcim_iomap_table failed\n");
+		ret = -ENOMEM;
+		goto e_err;
+	}
+
+	sp->io_map = iomap_table[sp->dev_vdata->bar];
+	if (!sp->io_map) {
+		dev_err(dev, "ioremap failed\n");
+		ret = -ENOMEM;
+		goto e_err;
+	}
+
+	ret = sp_get_irqs(sp);
+	if (ret)
+		goto e_err;
+
+	pci_set_master(pdev);
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+	if (ret) {
+		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+		if (ret) {
+			dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n",
+				ret);
+			goto e_err;
+		}
+	}
+
+	dev_set_drvdata(dev, sp);
+
+	ret = sp_init(sp);
+	if (ret)
+		goto e_err;
+
+	dev_notice(dev, "enabled\n");
+
+	return 0;
+
+e_err:
+	dev_notice(dev, "initialization failed\n");
+	return ret;
+}
+
+static void sp_pci_remove(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	if (!sp)
+		return;
+
+	sp_destroy(sp);
+
+	sp_free_irqs(sp);
+
+	dev_notice(dev, "disabled\n");
+}
+
+#ifdef CONFIG_PM
+static int sp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	return sp_suspend(sp, state);
+}
+
+static int sp_pci_resume(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	return sp_resume(sp);
+}
+#endif
+
+static const struct sp_dev_vdata dev_vdata[] = {
+	{
+		.bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv3,
+#endif
+	},
+	{
+		.bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv5a,
+#endif
+	},
+	{
+		.bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv5b,
+#endif
+	},
+};
+static const struct pci_device_id sp_pci_table[] = {
+	{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&dev_vdata[0] },
+	{ PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&dev_vdata[1] },
+	{ PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&dev_vdata[2] },
+	/* Last entry must be zero */
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, sp_pci_table);
+
+static struct pci_driver sp_pci_driver = {
+	.name = "ccp",
+	.id_table = sp_pci_table,
+	.probe = sp_pci_probe,
+	.remove = sp_pci_remove,
+#ifdef CONFIG_PM
+	.suspend = sp_pci_suspend,
+	.resume = sp_pci_resume,
+#endif
+};
+
+int sp_pci_init(void)
+{
+	return pci_register_driver(&sp_pci_driver);
+}
+
+void sp_pci_exit(void)
+{
+	pci_unregister_driver(&sp_pci_driver);
+}

+ 256 - 0
drivers/crypto/ccp/sp-platform.c

@@ -0,0 +1,256 @@
+/*
+ * AMD Secure Processor device driver
+ *
+ * Copyright (C) 2014,2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/ioport.h>
+#include <linux/dma-mapping.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/ccp.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/acpi.h>
+
+#include "ccp-dev.h"
+
+struct sp_platform {
+	int coherent;
+	unsigned int irq_count;
+};
+
+static const struct acpi_device_id sp_acpi_match[];
+static const struct of_device_id sp_of_match[];
+
+static struct sp_dev_vdata *sp_get_of_version(struct platform_device *pdev)
+{
+#ifdef CONFIG_OF
+	const struct of_device_id *match;
+
+	match = of_match_node(sp_of_match, pdev->dev.of_node);
+	if (match && match->data)
+		return (struct sp_dev_vdata *)match->data;
+#endif
+	return NULL;
+}
+
+static struct sp_dev_vdata *sp_get_acpi_version(struct platform_device *pdev)
+{
+#ifdef CONFIG_ACPI
+	const struct acpi_device_id *match;
+
+	match = acpi_match_device(sp_acpi_match, &pdev->dev);
+	if (match && match->driver_data)
+		return (struct sp_dev_vdata *)match->driver_data;
+#endif
+	return NULL;
+}
+
+static int sp_get_irqs(struct sp_device *sp)
+{
+	struct sp_platform *sp_platform = sp->dev_specific;
+	struct device *dev = sp->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	unsigned int i, count;
+	int ret;
+
+	for (i = 0, count = 0; i < pdev->num_resources; i++) {
+		struct resource *res = &pdev->resource[i];
+
+		if (resource_type(res) == IORESOURCE_IRQ)
+			count++;
+	}
+
+	sp_platform->irq_count = count;
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_notice(dev, "unable to get IRQ (%d)\n", ret);
+		return ret;
+	}
+
+	sp->psp_irq = ret;
+	if (count == 1) {
+		sp->ccp_irq = ret;
+	} else {
+		ret = platform_get_irq(pdev, 1);
+		if (ret < 0) {
+			dev_notice(dev, "unable to get IRQ (%d)\n", ret);
+			return ret;
+		}
+
+		sp->ccp_irq = ret;
+	}
+
+	return 0;
+}
+
+static int sp_platform_probe(struct platform_device *pdev)
+{
+	struct sp_device *sp;
+	struct sp_platform *sp_platform;
+	struct device *dev = &pdev->dev;
+	enum dev_dma_attr attr;
+	struct resource *ior;
+	int ret;
+
+	ret = -ENOMEM;
+	sp = sp_alloc_struct(dev);
+	if (!sp)
+		goto e_err;
+
+	sp_platform = devm_kzalloc(dev, sizeof(*sp_platform), GFP_KERNEL);
+	if (!sp_platform)
+		goto e_err;
+
+	sp->dev_specific = sp_platform;
+	sp->dev_vdata = pdev->dev.of_node ? sp_get_of_version(pdev)
+					 : sp_get_acpi_version(pdev);
+	if (!sp->dev_vdata) {
+		ret = -ENODEV;
+		dev_err(dev, "missing driver data\n");
+		goto e_err;
+	}
+
+	ior = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	sp->io_map = devm_ioremap_resource(dev, ior);
+	if (IS_ERR(sp->io_map)) {
+		ret = PTR_ERR(sp->io_map);
+		goto e_err;
+	}
+
+	attr = device_get_dma_attr(dev);
+	if (attr == DEV_DMA_NOT_SUPPORTED) {
+		dev_err(dev, "DMA is not supported");
+		goto e_err;
+	}
+
+	sp_platform->coherent = (attr == DEV_DMA_COHERENT);
+	if (sp_platform->coherent)
+		sp->axcache = CACHE_WB_NO_ALLOC;
+	else
+		sp->axcache = CACHE_NONE;
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+	if (ret) {
+		dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);
+		goto e_err;
+	}
+
+	ret = sp_get_irqs(sp);
+	if (ret)
+		goto e_err;
+
+	dev_set_drvdata(dev, sp);
+
+	ret = sp_init(sp);
+	if (ret)
+		goto e_err;
+
+	dev_notice(dev, "enabled\n");
+
+	return 0;
+
+e_err:
+	dev_notice(dev, "initialization failed\n");
+	return ret;
+}
+
+static int sp_platform_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	sp_destroy(sp);
+
+	dev_notice(dev, "disabled\n");
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int sp_platform_suspend(struct platform_device *pdev,
+				pm_message_t state)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	return sp_suspend(sp, state);
+}
+
+static int sp_platform_resume(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	return sp_resume(sp);
+}
+#endif
+
+static const struct sp_dev_vdata dev_vdata[] = {
+	{
+		.bar = 0,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv3_platform,
+#endif
+	},
+};
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id sp_acpi_match[] = {
+	{ "AMDI0C00", (kernel_ulong_t)&dev_vdata[0] },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, sp_acpi_match);
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id sp_of_match[] = {
+	{ .compatible = "amd,ccp-seattle-v1a",
+	  .data = (const void *)&dev_vdata[0] },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, sp_of_match);
+#endif
+
+static struct platform_driver sp_platform_driver = {
+	.driver = {
+		.name = "ccp",
+#ifdef CONFIG_ACPI
+		.acpi_match_table = sp_acpi_match,
+#endif
+#ifdef CONFIG_OF
+		.of_match_table = sp_of_match,
+#endif
+	},
+	.probe = sp_platform_probe,
+	.remove = sp_platform_remove,
+#ifdef CONFIG_PM
+	.suspend = sp_platform_suspend,
+	.resume = sp_platform_resume,
+#endif
+};
+
+int sp_platform_init(void)
+{
+	return platform_driver_register(&sp_platform_driver);
+}
+
+void sp_platform_exit(void)
+{
+	platform_driver_unregister(&sp_platform_driver);
+}

Daži faili netika attēloti, jo izmaiņu fails ir pārāk liels