Browse Source

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "Here is the crypto update for 4.14:

  API:
   - Defer scompress scratch buffer allocation to first use.
   - Add __crypto_xor that takes separte src and dst operands.
   - Add ahash multiple registration interface.
   - Revamped aead/skcipher algif code to fix async IO properly.

  Drivers:
   - Add non-SIMD fallback code path on ARM for SVE.
   - Add AMD Security Processor framework for ccp.
   - Add support for RSA in ccp.
   - Add XTS-AES-256 support for CCP version 5.
   - Add support for PRNG in sun4i-ss.
   - Add support for DPAA2 in caam.
   - Add ARTPEC crypto support.
   - Add Freescale RNGC hwrng support.
   - Add Microchip / Atmel ECC driver.
   - Add support for STM32 HASH module"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (116 commits)
  crypto: af_alg - get_page upon reassignment to TX SGL
  crypto: cavium/nitrox - Fix an error handling path in 'nitrox_probe()'
  crypto: inside-secure - fix an error handling path in safexcel_probe()
  crypto: rockchip - Don't dequeue the request when device is busy
  crypto: cavium - add release_firmware to all return case
  crypto: sahara - constify platform_device_id
  MAINTAINERS: Add ARTPEC crypto maintainer
  crypto: axis - add ARTPEC-6/7 crypto accelerator driver
  crypto: hash - add crypto_(un)register_ahashes()
  dt-bindings: crypto: add ARTPEC crypto
  crypto: algif_aead - fix comment regarding memory layout
  crypto: ccp - use dma_mapping_error to check map error
  lib/mpi: fix build with clang
  crypto: sahara - Remove leftover from previous used spinlock
  crypto: sahara - Fix dma unmap direction
  crypto: af_alg - consolidation of duplicate code
  crypto: caam - Remove unused dentry members
  crypto: ccp - select CONFIG_CRYPTO_RSA
  crypto: ccp - avoid uninitialized variable warning
  crypto: serpent - improve __serpent_setkey with UBSAN
  ...
Linus Torvalds 8 years ago
parent
commit
80cee03bf1
100 changed files with 9490 additions and 2865 deletions
  1. 16 0
      Documentation/devicetree/bindings/crypto/artpec6-crypto.txt
  2. 13 0
      Documentation/devicetree/bindings/crypto/atmel-crypto.txt
  3. 30 0
      Documentation/devicetree/bindings/crypto/st,stm32-hash.txt
  4. 21 0
      Documentation/devicetree/bindings/rng/imx-rngc.txt
  5. 7 0
      MAINTAINERS
  6. 3 2
      arch/arm/crypto/Kconfig
  7. 1 3
      arch/arm/crypto/aes-ce-glue.c
  8. 65 23
      arch/arm/crypto/aes-cipher-core.S
  9. 2 3
      arch/arm/crypto/aes-neonbs-glue.c
  10. 191 43
      arch/arm/crypto/ghash-ce-core.S
  11. 21 3
      arch/arm/crypto/ghash-ce-glue.c
  12. 16 6
      arch/arm64/crypto/Kconfig
  13. 15 15
      arch/arm64/crypto/aes-ce-ccm-core.S
  14. 139 35
      arch/arm64/crypto/aes-ce-ccm-glue.c
  15. 33 22
      arch/arm64/crypto/aes-ce-cipher.c
  16. 6 6
      arch/arm64/crypto/aes-ce.S
  17. 107 45
      arch/arm64/crypto/aes-cipher-core.S
  18. 53 0
      arch/arm64/crypto/aes-ctr-fallback.h
  19. 45 18
      arch/arm64/crypto/aes-glue.c
  20. 45 8
      arch/arm64/crypto/aes-neonbs-glue.c
  21. 3 2
      arch/arm64/crypto/chacha20-neon-glue.c
  22. 6 5
      arch/arm64/crypto/crc32-ce-glue.c
  23. 9 4
      arch/arm64/crypto/crct10dif-ce-glue.c
  24. 383 18
      arch/arm64/crypto/ghash-ce-core.S
  25. 482 35
      arch/arm64/crypto/ghash-ce-glue.c
  26. 14 4
      arch/arm64/crypto/sha1-ce-glue.c
  27. 26 4
      arch/arm64/crypto/sha2-ce-glue.c
  28. 1 0
      arch/arm64/crypto/sha256-glue.c
  29. 1 2
      arch/sparc/crypto/aes_glue.c
  30. 2 2
      arch/x86/crypto/aesni-intel_glue.c
  31. 1 2
      arch/x86/crypto/blowfish_glue.c
  32. 1 2
      arch/x86/crypto/cast5_avx_glue.c
  33. 1 2
      arch/x86/crypto/des3_ede_glue.c
  34. 2 0
      crypto/Kconfig
  35. 691 0
      crypto/af_alg.c
  36. 29 0
      crypto/ahash.c
  37. 15 10
      crypto/algapi.c
  38. 250 614
      crypto/algif_aead.c
  39. 132 702
      crypto/algif_skcipher.c
  40. 1 2
      crypto/ctr.c
  41. 33 18
      crypto/ecdh.c
  42. 4 8
      crypto/pcbc.c
  43. 4 2
      crypto/rng.c
  44. 22 33
      crypto/scompress.c
  45. 41 36
      crypto/serpent_generic.c
  46. 4 4
      crypto/tcrypt.c
  47. 16 4
      drivers/char/hw_random/Kconfig
  48. 1 0
      drivers/char/hw_random/Makefile
  49. 36 6
      drivers/char/hw_random/core.c
  50. 331 0
      drivers/char/hw_random/imx-rngc.c
  51. 46 3
      drivers/crypto/Kconfig
  52. 3 1
      drivers/crypto/Makefile
  53. 781 0
      drivers/crypto/atmel-ecc.c
  54. 128 0
      drivers/crypto/atmel-ecc.h
  55. 1 1
      drivers/crypto/atmel-sha.c
  56. 1 1
      drivers/crypto/atmel-tdes.c
  57. 1 0
      drivers/crypto/axis/Makefile
  58. 3192 0
      drivers/crypto/axis/artpec6_crypto.c
  59. 54 60
      drivers/crypto/bcm/cipher.c
  60. 7 6
      drivers/crypto/bcm/cipher.h
  61. 15 51
      drivers/crypto/caam/caamalg.c
  62. 2 3
      drivers/crypto/caam/caamalg_desc.c
  63. 47 8
      drivers/crypto/caam/caamalg_qi.c
  64. 3 4
      drivers/crypto/caam/caamhash.c
  65. 1 5
      drivers/crypto/caam/caamrng.c
  66. 59 68
      drivers/crypto/caam/ctrl.c
  67. 2 0
      drivers/crypto/caam/ctrl.h
  68. 40 0
      drivers/crypto/caam/error.c
  69. 4 0
      drivers/crypto/caam/error.h
  70. 0 11
      drivers/crypto/caam/intern.h
  71. 6 1
      drivers/crypto/caam/jr.c
  72. 12 18
      drivers/crypto/caam/qi.c
  73. 3 0
      drivers/crypto/caam/qi.h
  74. 1 0
      drivers/crypto/caam/regs.h
  75. 81 0
      drivers/crypto/caam/sg_sw_qm2.h
  76. 25 18
      drivers/crypto/caam/sg_sw_sec4.h
  77. 9 4
      drivers/crypto/cavium/cpt/cptpf_main.c
  78. 3 1
      drivers/crypto/cavium/nitrox/nitrox_main.c
  79. 15 7
      drivers/crypto/ccp/Kconfig
  80. 4 3
      drivers/crypto/ccp/Makefile
  81. 1 1
      drivers/crypto/ccp/ccp-crypto-aes-galois.c
  82. 53 43
      drivers/crypto/ccp/ccp-crypto-aes-xts.c
  83. 1 1
      drivers/crypto/ccp/ccp-crypto-des3.c
  84. 20 1
      drivers/crypto/ccp/ccp-crypto-main.c
  85. 299 0
      drivers/crypto/ccp/ccp-crypto-rsa.c
  86. 1 1
      drivers/crypto/ccp/ccp-crypto-sha.c
  87. 34 2
      drivers/crypto/ccp/ccp-crypto.h
  88. 10 5
      drivers/crypto/ccp/ccp-debugfs.c
  89. 13 7
      drivers/crypto/ccp/ccp-dev-v3.c
  90. 14 14
      drivers/crypto/ccp/ccp-dev-v5.c
  91. 84 50
      drivers/crypto/ccp/ccp-dev.c
  92. 9 21
      drivers/crypto/ccp/ccp-dev.h
  93. 1 1
      drivers/crypto/ccp/ccp-dmaengine.c
  94. 91 42
      drivers/crypto/ccp/ccp-ops.c
  95. 0 356
      drivers/crypto/ccp/ccp-pci.c
  96. 0 293
      drivers/crypto/ccp/ccp-platform.c
  97. 277 0
      drivers/crypto/ccp/sp-dev.c
  98. 133 0
      drivers/crypto/ccp/sp-dev.h
  99. 276 0
      drivers/crypto/ccp/sp-pci.c
  100. 256 0
      drivers/crypto/ccp/sp-platform.c

+ 16 - 0
Documentation/devicetree/bindings/crypto/artpec6-crypto.txt

@@ -0,0 +1,16 @@
+Axis crypto engine with PDMA interface.
+
+Required properties:
+- compatible : Should be one of the following strings:
+	"axis,artpec6-crypto" for the version in the Axis ARTPEC-6 SoC
+	"axis,artpec7-crypto" for the version in the Axis ARTPEC-7 SoC.
+- reg: Base address and size for the PDMA register area.
+- interrupts: Interrupt handle for the PDMA interrupt line.
+
+Example:
+
+crypto@f4264000 {
+	compatible = "axis,artpec6-crypto";
+	reg = <0xf4264000 0x1000>;
+	interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+};

+ 13 - 0
Documentation/devicetree/bindings/crypto/atmel-crypto.txt

@@ -66,3 +66,16 @@ sha@f8034000 {
 	dmas = <&dma1 2 17>;
 	dma-names = "tx";
 };
+
+* Eliptic Curve Cryptography (I2C)
+
+Required properties:
+- compatible : must be "atmel,atecc508a".
+- reg: I2C bus address of the device.
+- clock-frequency: must be present in the i2c controller node.
+
+Example:
+atecc508a@C0 {
+	compatible = "atmel,atecc508a";
+	reg = <0xC0>;
+};

+ 30 - 0
Documentation/devicetree/bindings/crypto/st,stm32-hash.txt

@@ -0,0 +1,30 @@
+* STMicroelectronics STM32 HASH
+
+Required properties:
+- compatible: Should contain entries for this and backward compatible
+  HASH versions:
+  - "st,stm32f456-hash" for stm32 F456.
+  - "st,stm32f756-hash" for stm32 F756.
+- reg: The address and length of the peripheral registers space
+- interrupts: the interrupt specifier for the HASH
+- clocks: The input clock of the HASH instance
+
+Optional properties:
+- resets: The input reset of the HASH instance
+- dmas: DMA specifiers for the HASH. See the DMA client binding,
+	 Documentation/devicetree/bindings/dma/dma.txt
+- dma-names: DMA request name. Should be "in" if a dma is present.
+- dma-maxburst: Set number of maximum dma burst supported
+
+Example:
+
+hash1: hash@50060400 {
+	compatible = "st,stm32f756-hash";
+	reg = <0x50060400 0x400>;
+	interrupts = <80>;
+	clocks = <&rcc 0 STM32F7_AHB2_CLOCK(HASH)>;
+	resets = <&rcc STM32F7_AHB2_RESET(HASH)>;
+	dmas = <&dma2 7 2 0x400 0x0>;
+	dma-names = "in";
+	dma-maxburst = <0>;
+};

+ 21 - 0
Documentation/devicetree/bindings/rng/imx-rngc.txt

@@ -0,0 +1,21 @@
+Freescale RNGC (Random Number Generator Version C)
+
+The driver also supports version B, which is mostly compatible
+to version C.
+
+Required properties:
+- compatible : should be one of
+               "fsl,imx25-rngb"
+               "fsl,imx35-rngc"
+- reg : offset and length of the register set of this block
+- interrupts : the interrupt number for the RNGC block
+- clocks : the RNGC clk source
+
+Example:
+
+rng@53fb0000 {
+	compatible = "fsl,imx25-rngb";
+	reg = <0x53fb0000 0x4000>;
+	interrupts = <22>;
+	clocks = <&trng_clk>;
+};

+ 7 - 0
MAINTAINERS

@@ -1162,6 +1162,7 @@ L:	linux-arm-kernel@axis.com
 F:	arch/arm/mach-artpec
 F:	arch/arm/boot/dts/artpec6*
 F:	drivers/clk/axis
+F:	drivers/crypto/axis
 F:	drivers/pinctrl/pinctrl-artpec*
 F:	Documentation/devicetree/bindings/pinctrl/axis,artpec6-pinctrl.txt
 
@@ -8770,6 +8771,12 @@ F:	drivers/dma/at_hdmac.c
 F:	drivers/dma/at_hdmac_regs.h
 F:	include/linux/platform_data/dma-atmel.h
 
+MICROCHIP / ATMEL ECC DRIVER
+M:	Tudor Ambarus <tudor.ambarus@microchip.com>
+L:	linux-crypto@vger.kernel.org
+S:	Maintained
+F:	drivers/crypto/atmel-ecc.*
+
 MICROCHIP / ATMEL ISC DRIVER
 M:	Songjun Wu <songjun.wu@microchip.com>
 L:	linux-media@vger.kernel.org

+ 3 - 2
arch/arm/crypto/Kconfig

@@ -94,14 +94,15 @@ config CRYPTO_AES_ARM_CE
 	  ARMv8 Crypto Extensions
 
 config CRYPTO_GHASH_ARM_CE
-	tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions"
+	tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_CRYPTD
 	help
 	  Use an implementation of GHASH (used by the GCM AEAD chaining mode)
 	  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
-	  that is part of the ARMv8 Crypto Extensions
+	  that is part of the ARMv8 Crypto Extensions, or a slower variant that
+	  uses the vmull.p8 instruction that is part of the basic NEON ISA.
 
 config CRYPTO_CRCT10DIF_ARM_CE
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"

+ 1 - 3
arch/arm/crypto/aes-ce-glue.c

@@ -285,9 +285,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
 		ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
 				   num_rounds(ctx), blocks, walk.iv);
-		if (tdst != tsrc)
-			memcpy(tdst, tsrc, nbytes);
-		crypto_xor(tdst, tail, nbytes);
+		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
 		err = skcipher_walk_done(&walk, 0);
 	}
 	kernel_neon_end();

+ 65 - 23
arch/arm/crypto/aes-cipher-core.S

@@ -10,6 +10,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/cache.h>
 
 	.text
 	.align		5
@@ -32,19 +33,19 @@
 	.endif
 	.endm
 
-	.macro		__load, out, in, idx
+	.macro		__load, out, in, idx, sz, op
 	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
-	ldr		\out, [ttab, \in, lsr #(8 * \idx) - 2]
+	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
 	.else
-	ldr		\out, [ttab, \in, lsl #2]
+	ldr\op		\out, [ttab, \in, lsl #\sz]
 	.endif
 	.endm
 
-	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
+	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op
 	__select	\out0, \in0, 0
 	__select	t0, \in1, 1
-	__load		\out0, \out0, 0
-	__load		t0, t0, 1
+	__load		\out0, \out0, 0, \sz, \op
+	__load		t0, t0, 1, \sz, \op
 
 	.if		\enc
 	__select	\out1, \in1, 0
@@ -53,10 +54,10 @@
 	__select	\out1, \in3, 0
 	__select	t1, \in0, 1
 	.endif
-	__load		\out1, \out1, 0
+	__load		\out1, \out1, 0, \sz, \op
 	__select	t2, \in2, 2
-	__load		t1, t1, 1
-	__load		t2, t2, 2
+	__load		t1, t1, 1, \sz, \op
+	__load		t2, t2, 2, \sz, \op
 
 	eor		\out0, \out0, t0, ror #24
 
@@ -68,9 +69,9 @@
 	__select	\t3, \in1, 2
 	__select	\t4, \in2, 3
 	.endif
-	__load		\t3, \t3, 2
-	__load		t0, t0, 3
-	__load		\t4, \t4, 3
+	__load		\t3, \t3, 2, \sz, \op
+	__load		t0, t0, 3, \sz, \op
+	__load		\t4, \t4, 3, \sz, \op
 
 	eor		\out1, \out1, t1, ror #24
 	eor		\out0, \out0, t2, ror #16
@@ -82,14 +83,14 @@
 	eor		\out1, \out1, t2
 	.endm
 
-	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
-	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
 	.endm
 
-	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
-	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
 	.endm
 
 	.macro		__rev, out, in
@@ -114,7 +115,7 @@
 	.endif
 	.endm
 
-	.macro		do_crypt, round, ttab, ltab
+	.macro		do_crypt, round, ttab, ltab, bsz
 	push		{r3-r11, lr}
 
 	ldr		r4, [in]
@@ -146,9 +147,12 @@
 
 1:	subs		rounds, rounds, #4
 	\round		r8, r9, r10, r11, r4, r5, r6, r7
-	__adrl		ttab, \ltab, ls
+	bls		2f
 	\round		r4, r5, r6, r7, r8, r9, r10, r11
-	bhi		0b
+	b		0b
+
+2:	__adrl		ttab, \ltab
+	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	__rev		r4, r4
@@ -170,10 +174,48 @@
 	.ltorg
 	.endm
 
+	.align		L1_CACHE_SHIFT
+	.type		__aes_arm_inverse_sbox, %object
+__aes_arm_inverse_sbox:
+	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+	.size		__aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
+
 ENTRY(__aes_arm_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_fl_tab
+	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
 ENDPROC(__aes_arm_encrypt)
 
+	.align		5
 ENTRY(__aes_arm_decrypt)
-	do_crypt	iround, crypto_it_tab, crypto_il_tab
+	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
 ENDPROC(__aes_arm_decrypt)

+ 2 - 3
arch/arm/crypto/aes-neonbs-glue.c

@@ -221,9 +221,8 @@ static int ctr_encrypt(struct skcipher_request *req)
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 
-			if (dst != src)
-				memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
-			crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
+			crypto_xor_cpy(dst, src, final,
+				       walk.total % AES_BLOCK_SIZE);
 
 			err = skcipher_walk_done(&walk, 0);
 			break;

+ 191 - 43
arch/arm/crypto/ghash-ce-core.S

@@ -1,7 +1,7 @@
 /*
- * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
+ * Accelerated GHASH implementation with NEON/ARMv8 vmull.p8/64 instructions.
  *
- * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
@@ -12,40 +12,162 @@
 #include <asm/assembler.h>
 
 	SHASH		.req	q0
-	SHASH2		.req	q1
-	T1		.req	q2
-	T2		.req	q3
-	MASK		.req	q4
-	XL		.req	q5
-	XM		.req	q6
-	XH		.req	q7
-	IN1		.req	q7
+	T1		.req	q1
+	XL		.req	q2
+	XM		.req	q3
+	XH		.req	q4
+	IN1		.req	q4
 
 	SHASH_L		.req	d0
 	SHASH_H		.req	d1
-	SHASH2_L	.req	d2
-	T1_L		.req	d4
-	MASK_L		.req	d8
-	XL_L		.req	d10
-	XL_H		.req	d11
-	XM_L		.req	d12
-	XM_H		.req	d13
-	XH_L		.req	d14
+	T1_L		.req	d2
+	T1_H		.req	d3
+	XL_L		.req	d4
+	XL_H		.req	d5
+	XM_L		.req	d6
+	XM_H		.req	d7
+	XH_L		.req	d8
+
+	t0l		.req	d10
+	t0h		.req	d11
+	t1l		.req	d12
+	t1h		.req	d13
+	t2l		.req	d14
+	t2h		.req	d15
+	t3l		.req	d16
+	t3h		.req	d17
+	t4l		.req	d18
+	t4h		.req	d19
+
+	t0q		.req	q5
+	t1q		.req	q6
+	t2q		.req	q7
+	t3q		.req	q8
+	t4q		.req	q9
+	T2		.req	q9
+
+	s1l		.req	d20
+	s1h		.req	d21
+	s2l		.req	d22
+	s2h		.req	d23
+	s3l		.req	d24
+	s3h		.req	d25
+	s4l		.req	d26
+	s4h		.req	d27
+
+	MASK		.req	d28
+	SHASH2_p8	.req	d28
+
+	k16		.req	d29
+	k32		.req	d30
+	k48		.req	d31
+	SHASH2_p64	.req	d31
 
 	.text
 	.fpu		crypto-neon-fp-armv8
 
+	.macro		__pmull_p64, rd, rn, rm, b1, b2, b3, b4
+	vmull.p64	\rd, \rn, \rm
+	.endm
+
 	/*
-	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-	 *			   struct ghash_key const *k, const char *head)
+	 * This implementation of 64x64 -> 128 bit polynomial multiplication
+	 * using vmull.p8 instructions (8x8 -> 16) is taken from the paper
+	 * "Fast Software Polynomial Multiplication on ARM Processors Using
+	 * the NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and
+	 * Ricardo Dahab (https://hal.inria.fr/hal-01506572)
+	 *
+	 * It has been slightly tweaked for in-order performance, and to allow
+	 * 'rq' to overlap with 'ad' or 'bd'.
 	 */
-ENTRY(pmull_ghash_update)
-	vld1.64		{SHASH}, [r3]
+	.macro		__pmull_p8, rq, ad, bd, b1=t4l, b2=t3l, b3=t4l, b4=t3l
+	vext.8		t0l, \ad, \ad, #1	@ A1
+	.ifc		\b1, t4l
+	vext.8		t4l, \bd, \bd, #1	@ B1
+	.endif
+	vmull.p8	t0q, t0l, \bd		@ F = A1*B
+	vext.8		t1l, \ad, \ad, #2	@ A2
+	vmull.p8	t4q, \ad, \b1		@ E = A*B1
+	.ifc		\b2, t3l
+	vext.8		t3l, \bd, \bd, #2	@ B2
+	.endif
+	vmull.p8	t1q, t1l, \bd		@ H = A2*B
+	vext.8		t2l, \ad, \ad, #3	@ A3
+	vmull.p8	t3q, \ad, \b2		@ G = A*B2
+	veor		t0q, t0q, t4q		@ L = E + F
+	.ifc		\b3, t4l
+	vext.8		t4l, \bd, \bd, #3	@ B3
+	.endif
+	vmull.p8	t2q, t2l, \bd		@ J = A3*B
+	veor		t0l, t0l, t0h		@ t0 = (L) (P0 + P1) << 8
+	veor		t1q, t1q, t3q		@ M = G + H
+	.ifc		\b4, t3l
+	vext.8		t3l, \bd, \bd, #4	@ B4
+	.endif
+	vmull.p8	t4q, \ad, \b3		@ I = A*B3
+	veor		t1l, t1l, t1h		@ t1 = (M) (P2 + P3) << 16
+	vmull.p8	t3q, \ad, \b4		@ K = A*B4
+	vand		t0h, t0h, k48
+	vand		t1h, t1h, k32
+	veor		t2q, t2q, t4q		@ N = I + J
+	veor		t0l, t0l, t0h
+	veor		t1l, t1l, t1h
+	veor		t2l, t2l, t2h		@ t2 = (N) (P4 + P5) << 24
+	vand		t2h, t2h, k16
+	veor		t3l, t3l, t3h		@ t3 = (K) (P6 + P7) << 32
+	vmov.i64	t3h, #0
+	vext.8		t0q, t0q, t0q, #15
+	veor		t2l, t2l, t2h
+	vext.8		t1q, t1q, t1q, #14
+	vmull.p8	\rq, \ad, \bd		@ D = A*B
+	vext.8		t2q, t2q, t2q, #13
+	vext.8		t3q, t3q, t3q, #12
+	veor		t0q, t0q, t1q
+	veor		t2q, t2q, t3q
+	veor		\rq, \rq, t0q
+	veor		\rq, \rq, t2q
+	.endm
+
+	//
+	// PMULL (64x64->128) based reduction for CPUs that can do
+	// it in a single instruction.
+	//
+	.macro		__pmull_reduce_p64
+	vmull.p64	T1, XL_L, MASK
+
+	veor		XH_L, XH_L, XM_H
+	vext.8		T1, T1, T1, #8
+	veor		XL_H, XL_H, XM_L
+	veor		T1, T1, XL
+
+	vmull.p64	XL, T1_H, MASK
+	.endm
+
+	//
+	// Alternative reduction for CPUs that lack support for the
+	// 64x64->128 PMULL instruction
+	//
+	.macro		__pmull_reduce_p8
+	veor		XL_H, XL_H, XM_L
+	veor		XH_L, XH_L, XM_H
+
+	vshl.i64	T1, XL, #57
+	vshl.i64	T2, XL, #62
+	veor		T1, T1, T2
+	vshl.i64	T2, XL, #63
+	veor		T1, T1, T2
+	veor		XL_H, XL_H, T1_L
+	veor		XH_L, XH_L, T1_H
+
+	vshr.u64	T1, XL, #1
+	veor		XH, XH, XL
+	veor		XL, XL, T1
+	vshr.u64	T1, T1, #6
+	vshr.u64	XL, XL, #1
+	.endm
+
+	.macro		ghash_update, pn
 	vld1.64		{XL}, [r1]
-	vmov.i8		MASK, #0xe1
-	vext.8		SHASH2, SHASH, SHASH, #8
-	vshl.u64	MASK, MASK, #57
-	veor		SHASH2, SHASH2, SHASH
 
 	/* do the head block first, if supplied */
 	ldr		ip, [sp]
@@ -62,33 +184,59 @@ ENTRY(pmull_ghash_update)
 #ifndef CONFIG_CPU_BIG_ENDIAN
 	vrev64.8	T1, T1
 #endif
-	vext.8		T2, XL, XL, #8
 	vext.8		IN1, T1, T1, #8
-	veor		T1, T1, T2
+	veor		T1_L, T1_L, XL_H
 	veor		XL, XL, IN1
 
-	vmull.p64	XH, SHASH_H, XL_H		@ a1 * b1
+	__pmull_\pn	XH, XL_H, SHASH_H, s1h, s2h, s3h, s4h	@ a1 * b1
 	veor		T1, T1, XL
-	vmull.p64	XL, SHASH_L, XL_L		@ a0 * b0
-	vmull.p64	XM, SHASH2_L, T1_L		@ (a1 + a0)(b1 + b0)
+	__pmull_\pn	XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l	@ a0 * b0
+	__pmull_\pn	XM, T1_L, SHASH2_\pn			@ (a1+a0)(b1+b0)
 
-	vext.8		T1, XL, XH, #8
-	veor		T2, XL, XH
+	veor		T1, XL, XH
 	veor		XM, XM, T1
-	veor		XM, XM, T2
-	vmull.p64	T2, XL_L, MASK_L
 
-	vmov		XH_L, XM_H
-	vmov		XM_H, XL_L
+	__pmull_reduce_\pn
 
-	veor		XL, XM, T2
-	vext.8		T2, XL, XL, #8
-	vmull.p64	XL, XL_L, MASK_L
-	veor		T2, T2, XH
-	veor		XL, XL, T2
+	veor		T1, T1, XH
+	veor		XL, XL, T1
 
 	bne		0b
 
 	vst1.64		{XL}, [r1]
 	bx		lr
-ENDPROC(pmull_ghash_update)
+	.endm
+
+	/*
+	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+	 *			   struct ghash_key const *k, const char *head)
+	 */
+ENTRY(pmull_ghash_update_p64)
+	vld1.64		{SHASH}, [r3]
+	veor		SHASH2_p64, SHASH_L, SHASH_H
+
+	vmov.i8		MASK, #0xe1
+	vshl.u64	MASK, MASK, #57
+
+	ghash_update	p64
+ENDPROC(pmull_ghash_update_p64)
+
+ENTRY(pmull_ghash_update_p8)
+	vld1.64		{SHASH}, [r3]
+	veor		SHASH2_p8, SHASH_L, SHASH_H
+
+	vext.8		s1l, SHASH_L, SHASH_L, #1
+	vext.8		s2l, SHASH_L, SHASH_L, #2
+	vext.8		s3l, SHASH_L, SHASH_L, #3
+	vext.8		s4l, SHASH_L, SHASH_L, #4
+	vext.8		s1h, SHASH_H, SHASH_H, #1
+	vext.8		s2h, SHASH_H, SHASH_H, #2
+	vext.8		s3h, SHASH_H, SHASH_H, #3
+	vext.8		s4h, SHASH_H, SHASH_H, #4
+
+	vmov.i64	k16, #0xffff
+	vmov.i64	k32, #0xffffffff
+	vmov.i64	k48, #0xffffffffffff
+
+	ghash_update	p8
+ENDPROC(pmull_ghash_update_p8)

+ 21 - 3
arch/arm/crypto/ghash-ce-glue.c

@@ -22,6 +22,7 @@
 MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("ghash");
 
 #define GHASH_BLOCK_SIZE	16
 #define GHASH_DIGEST_SIZE	16
@@ -41,8 +42,17 @@ struct ghash_async_ctx {
 	struct cryptd_ahash *cryptd_tfm;
 };
 
-asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-				   struct ghash_key const *k, const char *head);
+asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
+				       struct ghash_key const *k,
+				       const char *head);
+
+asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
+				      struct ghash_key const *k,
+				      const char *head);
+
+static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
+				  struct ghash_key const *k,
+				  const char *head);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -312,6 +322,14 @@ static int __init ghash_ce_mod_init(void)
 {
 	int err;
 
+	if (!(elf_hwcap & HWCAP_NEON))
+		return -ENODEV;
+
+	if (elf_hwcap2 & HWCAP2_PMULL)
+		pmull_ghash_update = pmull_ghash_update_p64;
+	else
+		pmull_ghash_update = pmull_ghash_update_p8;
+
 	err = crypto_register_shash(&ghash_alg);
 	if (err)
 		return err;
@@ -332,5 +350,5 @@ static void __exit ghash_ce_mod_exit(void)
 	crypto_unregister_shash(&ghash_alg);
 }
 
-module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+module_init(ghash_ce_mod_init);
 module_exit(ghash_ce_mod_exit);

+ 16 - 6
arch/arm64/crypto/Kconfig

@@ -18,18 +18,23 @@ config CRYPTO_SHA512_ARM64
 
 config CRYPTO_SHA1_ARM64_CE
 	tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
+	select CRYPTO_SHA1
 
 config CRYPTO_SHA2_ARM64_CE
 	tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
+	select CRYPTO_SHA256_ARM64
 
 config CRYPTO_GHASH_ARM64_CE
-	tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
-	depends on ARM64 && KERNEL_MODE_NEON
+	tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
+	select CRYPTO_GF128MUL
+	select CRYPTO_AES
+	select CRYPTO_AES_ARM64
 
 config CRYPTO_CRCT10DIF_ARM64_CE
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"
@@ -49,25 +54,29 @@ config CRYPTO_AES_ARM64_CE
 	tristate "AES core cipher using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
+	select CRYPTO_AES_ARM64
 
 config CRYPTO_AES_ARM64_CE_CCM
 	tristate "AES in CCM mode using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
 	select CRYPTO_AES_ARM64_CE
+	select CRYPTO_AES_ARM64
 	select CRYPTO_AEAD
 
 config CRYPTO_AES_ARM64_CE_BLK
 	tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AES_ARM64_CE
+	select CRYPTO_AES_ARM64
 	select CRYPTO_SIMD
 
 config CRYPTO_AES_ARM64_NEON_BLK
 	tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
-	depends on ARM64 && KERNEL_MODE_NEON
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
+	select CRYPTO_AES_ARM64
 	select CRYPTO_AES
 	select CRYPTO_SIMD
 
@@ -82,6 +91,7 @@ config CRYPTO_AES_ARM64_BS
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AES_ARM64_NEON_BLK
+	select CRYPTO_AES_ARM64
 	select CRYPTO_SIMD
 
 endif

+ 15 - 15
arch/arm64/crypto/aes-ce-ccm-core.S

@@ -1,7 +1,7 @@
 /*
  * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -32,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
 	beq	8f				/* out of input? */
 	cbnz	w8, 0b
 	eor	v0.16b, v0.16b, v1.16b
-1:	ld1	{v3.16b}, [x4]			/* load first round key */
+1:	ld1	{v3.4s}, [x4]			/* load first round key */
 	prfm	pldl1strm, [x1]
 	cmp	w5, #12				/* which key size? */
 	add	x6, x4, #16
@@ -42,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
 	mov	v5.16b, v3.16b
 	b	4f
 2:	mov	v4.16b, v3.16b
-	ld1	{v5.16b}, [x6], #16		/* load 2nd round key */
+	ld1	{v5.4s}, [x6], #16		/* load 2nd round key */
 3:	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
-4:	ld1	{v3.16b}, [x6], #16		/* load next round key */
+4:	ld1	{v3.4s}, [x6], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
-5:	ld1	{v4.16b}, [x6], #16		/* load next round key */
+5:	ld1	{v4.4s}, [x6], #16		/* load next round key */
 	subs	w7, w7, #3
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
-	ld1	{v5.16b}, [x6], #16		/* load next round key */
+	ld1	{v5.4s}, [x6], #16		/* load next round key */
 	bpl	3b
 	aese	v0.16b, v4.16b
 	subs	w2, w2, #16			/* last data? */
@@ -90,7 +90,7 @@ ENDPROC(ce_aes_ccm_auth_data)
 	 * 			 u32 rounds);
 	 */
 ENTRY(ce_aes_ccm_final)
-	ld1	{v3.16b}, [x2], #16		/* load first round key */
+	ld1	{v3.4s}, [x2], #16		/* load first round key */
 	ld1	{v0.16b}, [x0]			/* load mac */
 	cmp	w3, #12				/* which key size? */
 	sub	w3, w3, #2			/* modified # of rounds */
@@ -100,17 +100,17 @@ ENTRY(ce_aes_ccm_final)
 	mov	v5.16b, v3.16b
 	b	2f
 0:	mov	v4.16b, v3.16b
-1:	ld1	{v5.16b}, [x2], #16		/* load next round key */
+1:	ld1	{v5.4s}, [x2], #16		/* load next round key */
 	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
-2:	ld1	{v3.16b}, [x2], #16		/* load next round key */
+2:	ld1	{v3.4s}, [x2], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
-3:	ld1	{v4.16b}, [x2], #16		/* load next round key */
+3:	ld1	{v4.4s}, [x2], #16		/* load next round key */
 	subs	w3, w3, #3
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
@@ -137,31 +137,31 @@ CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
 	cmp	w4, #12				/* which key size? */
 	sub	w7, w4, #2			/* get modified # of rounds */
 	ins	v1.d[1], x9			/* no carry in lower ctr */
-	ld1	{v3.16b}, [x3]			/* load first round key */
+	ld1	{v3.4s}, [x3]			/* load first round key */
 	add	x10, x3, #16
 	bmi	1f
 	bne	4f
 	mov	v5.16b, v3.16b
 	b	3f
 1:	mov	v4.16b, v3.16b
-	ld1	{v5.16b}, [x10], #16		/* load 2nd round key */
+	ld1	{v5.4s}, [x10], #16		/* load 2nd round key */
 2:	/* inner loop: 3 rounds, 2x interleaved */
 	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
-3:	ld1	{v3.16b}, [x10], #16		/* load next round key */
+3:	ld1	{v3.4s}, [x10], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
-4:	ld1	{v4.16b}, [x10], #16		/* load next round key */
+4:	ld1	{v4.4s}, [x10], #16		/* load next round key */
 	subs	w7, w7, #3
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v3.16b
 	aesmc	v1.16b, v1.16b
-	ld1	{v5.16b}, [x10], #16		/* load next round key */
+	ld1	{v5.4s}, [x10], #16		/* load next round key */
 	bpl	2b
 	aese	v0.16b, v4.16b
 	aese	v1.16b, v4.16b

+ 139 - 35
arch/arm64/crypto/aes-ce-ccm-glue.c

@@ -1,7 +1,7 @@
 /*
  * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/aes.h>
 #include <crypto/scatterwalk.h>
@@ -44,6 +45,8 @@ asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
 asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
 				 u32 rounds);
 
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
 static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
 		      unsigned int key_len)
 {
@@ -103,7 +106,45 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
 	return 0;
 }
 
-static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
+			   u32 abytes, u32 *macp, bool use_neon)
+{
+	if (likely(use_neon)) {
+		ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
+				     num_rounds(key));
+	} else {
+		if (*macp > 0 && *macp < AES_BLOCK_SIZE) {
+			int added = min(abytes, AES_BLOCK_SIZE - *macp);
+
+			crypto_xor(&mac[*macp], in, added);
+
+			*macp += added;
+			in += added;
+			abytes -= added;
+		}
+
+		while (abytes > AES_BLOCK_SIZE) {
+			__aes_arm64_encrypt(key->key_enc, mac, mac,
+					    num_rounds(key));
+			crypto_xor(mac, in, AES_BLOCK_SIZE);
+
+			in += AES_BLOCK_SIZE;
+			abytes -= AES_BLOCK_SIZE;
+		}
+
+		if (abytes > 0) {
+			__aes_arm64_encrypt(key->key_enc, mac, mac,
+					    num_rounds(key));
+			crypto_xor(mac, in, abytes);
+			*macp = abytes;
+		} else {
+			*macp = 0;
+		}
+	}
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
+				   bool use_neon)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
@@ -122,8 +163,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 		ltag.len = 6;
 	}
 
-	ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
-			     num_rounds(ctx));
+	ccm_update_mac(ctx, mac, (u8 *)&ltag, ltag.len, &macp, use_neon);
 	scatterwalk_start(&walk, req->src);
 
 	do {
@@ -135,8 +175,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 			n = scatterwalk_clamp(&walk, len);
 		}
 		p = scatterwalk_map(&walk);
-		ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc,
-				     num_rounds(ctx));
+		ccm_update_mac(ctx, mac, p, n, &macp, use_neon);
 		len -= n;
 
 		scatterwalk_unmap(p);
@@ -145,6 +184,56 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 	} while (len);
 }
 
+static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[],
+			      struct crypto_aes_ctx *ctx, bool enc)
+{
+	u8 buf[AES_BLOCK_SIZE];
+	int err = 0;
+
+	while (walk->nbytes) {
+		int blocks = walk->nbytes / AES_BLOCK_SIZE;
+		u32 tail = walk->nbytes % AES_BLOCK_SIZE;
+		u8 *dst = walk->dst.virt.addr;
+		u8 *src = walk->src.virt.addr;
+		u32 nbytes = walk->nbytes;
+
+		if (nbytes == walk->total && tail > 0) {
+			blocks++;
+			tail = 0;
+		}
+
+		do {
+			u32 bsize = AES_BLOCK_SIZE;
+
+			if (nbytes < AES_BLOCK_SIZE)
+				bsize = nbytes;
+
+			crypto_inc(walk->iv, AES_BLOCK_SIZE);
+			__aes_arm64_encrypt(ctx->key_enc, buf, walk->iv,
+					    num_rounds(ctx));
+			__aes_arm64_encrypt(ctx->key_enc, mac, mac,
+					    num_rounds(ctx));
+			if (enc)
+				crypto_xor(mac, src, bsize);
+			crypto_xor_cpy(dst, src, buf, bsize);
+			if (!enc)
+				crypto_xor(mac, dst, bsize);
+			dst += bsize;
+			src += bsize;
+			nbytes -= bsize;
+		} while (--blocks);
+
+		err = skcipher_walk_done(walk, tail);
+	}
+
+	if (!err) {
+		__aes_arm64_encrypt(ctx->key_enc, buf, iv0, num_rounds(ctx));
+		__aes_arm64_encrypt(ctx->key_enc, mac, mac, num_rounds(ctx));
+		crypto_xor(mac, buf, AES_BLOCK_SIZE);
+	}
+	return err;
+}
+
 static int ccm_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -153,39 +242,46 @@ static int ccm_encrypt(struct aead_request *req)
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen;
+	bool use_neon = may_use_simd();
 	int err;
 
 	err = ccm_init_mac(req, mac, len);
 	if (err)
 		return err;
 
-	kernel_neon_begin_partial(6);
+	if (likely(use_neon))
+		kernel_neon_begin();
 
 	if (req->assoclen)
-		ccm_calculate_auth_mac(req, mac);
+		ccm_calculate_auth_mac(req, mac, use_neon);
 
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
 	err = skcipher_walk_aead_encrypt(&walk, req, true);
 
-	while (walk.nbytes) {
-		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
-
-		if (walk.nbytes == walk.total)
-			tail = 0;
+	if (likely(use_neon)) {
+		while (walk.nbytes) {
+			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-		ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   walk.nbytes - tail, ctx->key_enc,
-				   num_rounds(ctx), mac, walk.iv);
+			if (walk.nbytes == walk.total)
+				tail = 0;
 
-		err = skcipher_walk_done(&walk, tail);
-	}
-	if (!err)
-		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+			ce_aes_ccm_encrypt(walk.dst.virt.addr,
+					   walk.src.virt.addr,
+					   walk.nbytes - tail, ctx->key_enc,
+					   num_rounds(ctx), mac, walk.iv);
 
-	kernel_neon_end();
+			err = skcipher_walk_done(&walk, tail);
+		}
+		if (!err)
+			ce_aes_ccm_final(mac, buf, ctx->key_enc,
+					 num_rounds(ctx));
 
+		kernel_neon_end();
+	} else {
+		err = ccm_crypt_fallback(&walk, mac, buf, ctx, true);
+	}
 	if (err)
 		return err;
 
@@ -205,38 +301,46 @@ static int ccm_decrypt(struct aead_request *req)
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen - authsize;
+	bool use_neon = may_use_simd();
 	int err;
 
 	err = ccm_init_mac(req, mac, len);
 	if (err)
 		return err;
 
-	kernel_neon_begin_partial(6);
+	if (likely(use_neon))
+		kernel_neon_begin();
 
 	if (req->assoclen)
-		ccm_calculate_auth_mac(req, mac);
+		ccm_calculate_auth_mac(req, mac, use_neon);
 
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
 	err = skcipher_walk_aead_decrypt(&walk, req, true);
 
-	while (walk.nbytes) {
-		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
+	if (likely(use_neon)) {
+		while (walk.nbytes) {
+			u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-		if (walk.nbytes == walk.total)
-			tail = 0;
+			if (walk.nbytes == walk.total)
+				tail = 0;
 
-		ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   walk.nbytes - tail, ctx->key_enc,
-				   num_rounds(ctx), mac, walk.iv);
+			ce_aes_ccm_decrypt(walk.dst.virt.addr,
+					   walk.src.virt.addr,
+					   walk.nbytes - tail, ctx->key_enc,
+					   num_rounds(ctx), mac, walk.iv);
 
-		err = skcipher_walk_done(&walk, tail);
-	}
-	if (!err)
-		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+			err = skcipher_walk_done(&walk, tail);
+		}
+		if (!err)
+			ce_aes_ccm_final(mac, buf, ctx->key_enc,
+					 num_rounds(ctx));
 
-	kernel_neon_end();
+		kernel_neon_end();
+	} else {
+		err = ccm_crypt_fallback(&walk, mac, buf, ctx, false);
+	}
 
 	if (err)
 		return err;

+ 33 - 22
arch/arm64/crypto/aes-ce-cipher.c

@@ -1,7 +1,7 @@
 /*
  * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
  *
- * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,8 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
 #include <crypto/aes.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
@@ -20,6 +22,9 @@ MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
 struct aes_block {
 	u8 b[AES_BLOCK_SIZE];
 };
@@ -44,27 +49,32 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	void *dummy0;
 	int dummy1;
 
-	kernel_neon_begin_partial(4);
+	if (!may_use_simd()) {
+		__aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
+		return;
+	}
+
+	kernel_neon_begin();
 
 	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.16b}, [%[key]], #16		;"
+		"	ld1	{v1.4s}, [%[key]], #16		;"
 		"	cmp	%w[rounds], #10			;"
 		"	bmi	0f				;"
 		"	bne	3f				;"
 		"	mov	v3.16b, v1.16b			;"
 		"	b	2f				;"
 		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"1:	aese	v0.16b, v2.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.16b}, [%[key]], #16		;"
+		"2:	ld1	{v1.4s}, [%[key]], #16		;"
 		"	aese	v0.16b, v3.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.16b}, [%[key]], #16		;"
+		"3:	ld1	{v2.4s}, [%[key]], #16		;"
 		"	subs	%w[rounds], %w[rounds], #3	;"
 		"	aese	v0.16b, v1.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"	bpl	1b				;"
 		"	aese	v0.16b, v2.16b			;"
 		"	eor	v0.16b, v0.16b, v3.16b		;"
@@ -89,27 +99,32 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	void *dummy0;
 	int dummy1;
 
-	kernel_neon_begin_partial(4);
+	if (!may_use_simd()) {
+		__aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
+		return;
+	}
+
+	kernel_neon_begin();
 
 	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.16b}, [%[key]], #16		;"
+		"	ld1	{v1.4s}, [%[key]], #16		;"
 		"	cmp	%w[rounds], #10			;"
 		"	bmi	0f				;"
 		"	bne	3f				;"
 		"	mov	v3.16b, v1.16b			;"
 		"	b	2f				;"
 		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"1:	aesd	v0.16b, v2.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.16b}, [%[key]], #16		;"
+		"2:	ld1	{v1.4s}, [%[key]], #16		;"
 		"	aesd	v0.16b, v3.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.16b}, [%[key]], #16		;"
+		"3:	ld1	{v2.4s}, [%[key]], #16		;"
 		"	subs	%w[rounds], %w[rounds], #3	;"
 		"	aesd	v0.16b, v1.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"	ld1	{v3.16b}, [%[key]], #16		;"
+		"	ld1	{v3.4s}, [%[key]], #16		;"
 		"	bpl	1b				;"
 		"	aesd	v0.16b, v2.16b			;"
 		"	eor	v0.16b, v0.16b, v3.16b		;"
@@ -165,20 +180,16 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 	    key_len != AES_KEYSIZE_256)
 		return -EINVAL;
 
-	memcpy(ctx->key_enc, in_key, key_len);
 	ctx->key_length = key_len;
+	for (i = 0; i < kwords; i++)
+		ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
 
-	kernel_neon_begin_partial(2);
+	kernel_neon_begin();
 	for (i = 0; i < sizeof(rcon); i++) {
 		u32 *rki = ctx->key_enc + (i * kwords);
 		u32 *rko = rki + kwords;
 
-#ifndef CONFIG_CPU_BIG_ENDIAN
 		rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
-#else
-		rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
-			 rki[0];
-#endif
 		rko[1] = rko[0] ^ rki[1];
 		rko[2] = rko[1] ^ rki[2];
 		rko[3] = rko[2] ^ rki[3];
@@ -210,9 +221,9 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 
 	key_dec[0] = key_enc[j];
 	for (i = 1, j--; j > 0; i++, j--)
-		__asm__("ld1	{v0.16b}, %[in]		;"
+		__asm__("ld1	{v0.4s}, %[in]		;"
 			"aesimc	v1.16b, v0.16b		;"
-			"st1	{v1.16b}, %[out]	;"
+			"st1	{v1.4s}, %[out]	;"
 
 		:	[out]	"=Q"(key_dec[i])
 		:	[in]	"Q"(key_enc[j])

+ 6 - 6
arch/arm64/crypto/aes-ce.S

@@ -2,7 +2,7 @@
  * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
  *                                    Crypto Extensions
  *
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -22,11 +22,11 @@
 	cmp		\rounds, #12
 	blo		2222f		/* 128 bits */
 	beq		1111f		/* 192 bits */
-	ld1		{v17.16b-v18.16b}, [\rk], #32
-1111:	ld1		{v19.16b-v20.16b}, [\rk], #32
-2222:	ld1		{v21.16b-v24.16b}, [\rk], #64
-	ld1		{v25.16b-v28.16b}, [\rk], #64
-	ld1		{v29.16b-v31.16b}, [\rk]
+	ld1		{v17.4s-v18.4s}, [\rk], #32
+1111:	ld1		{v19.4s-v20.4s}, [\rk], #32
+2222:	ld1		{v21.4s-v24.4s}, [\rk], #64
+	ld1		{v25.4s-v28.4s}, [\rk], #64
+	ld1		{v29.4s-v31.4s}, [\rk]
 	.endm
 
 	/* prepare for encryption with key in rk[] */

+ 107 - 45
arch/arm64/crypto/aes-cipher-core.S

@@ -10,6 +10,7 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include <asm/cache.h>
 
 	.text
 
@@ -17,94 +18,155 @@
 	out		.req	x1
 	in		.req	x2
 	rounds		.req	x3
-	tt		.req	x4
-	lt		.req	x2
+	tt		.req	x2
 
-	.macro		__pair, enc, reg0, reg1, in0, in1e, in1d, shift
+	.macro		__pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift
+	.ifc		\op\shift, b0
+	ubfiz		\reg0, \in0, #2, #8
+	ubfiz		\reg1, \in1e, #2, #8
+	.else
 	ubfx		\reg0, \in0, #\shift, #8
-	.if		\enc
 	ubfx		\reg1, \in1e, #\shift, #8
-	.else
-	ubfx		\reg1, \in1d, #\shift, #8
 	.endif
+
+	/*
+	 * AArch64 cannot do byte size indexed loads from a table containing
+	 * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a
+	 * valid instruction. So perform the shift explicitly first for the
+	 * high bytes (the low byte is shifted implicitly by using ubfiz rather
+	 * than ubfx above)
+	 */
+	.ifnc		\op, b
 	ldr		\reg0, [tt, \reg0, uxtw #2]
 	ldr		\reg1, [tt, \reg1, uxtw #2]
+	.else
+	.if		\shift > 0
+	lsl		\reg0, \reg0, #2
+	lsl		\reg1, \reg1, #2
+	.endif
+	ldrb		\reg0, [tt, \reg0, uxtw]
+	ldrb		\reg1, [tt, \reg1, uxtw]
+	.endif
 	.endm
 
-	.macro		__hround, out0, out1, in0, in1, in2, in3, t0, t1, enc
+	.macro		__pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift
+	ubfx		\reg0, \in0, #\shift, #8
+	ubfx		\reg1, \in1d, #\shift, #8
+	ldr\op		\reg0, [tt, \reg0, uxtw #\sz]
+	ldr\op		\reg1, [tt, \reg1, uxtw #\sz]
+	.endm
+
+	.macro		__hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op
 	ldp		\out0, \out1, [rk], #8
 
-	__pair		\enc, w13, w14, \in0, \in1, \in3, 0
-	__pair		\enc, w15, w16, \in1, \in2, \in0, 8
-	__pair		\enc, w17, w18, \in2, \in3, \in1, 16
-	__pair		\enc, \t0, \t1, \in3, \in0, \in2, 24
-
-	eor		\out0, \out0, w13
-	eor		\out1, \out1, w14
-	eor		\out0, \out0, w15, ror #24
-	eor		\out1, \out1, w16, ror #24
-	eor		\out0, \out0, w17, ror #16
-	eor		\out1, \out1, w18, ror #16
+	__pair\enc	\sz, \op, w12, w13, \in0, \in1, \in3, 0
+	__pair\enc	\sz, \op, w14, w15, \in1, \in2, \in0, 8
+	__pair\enc	\sz, \op, w16, w17, \in2, \in3, \in1, 16
+	__pair\enc	\sz, \op, \t0, \t1, \in3, \in0, \in2, 24
+
+	eor		\out0, \out0, w12
+	eor		\out1, \out1, w13
+	eor		\out0, \out0, w14, ror #24
+	eor		\out1, \out1, w15, ror #24
+	eor		\out0, \out0, w16, ror #16
+	eor		\out1, \out1, w17, ror #16
 	eor		\out0, \out0, \t0, ror #8
 	eor		\out1, \out1, \t1, ror #8
 	.endm
 
-	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
-	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
 	.endm
 
-	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3
-	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
-	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
 	.endm
 
-	.macro		do_crypt, round, ttab, ltab
-	ldp		w5, w6, [in]
-	ldp		w7, w8, [in, #8]
-	ldp		w9, w10, [rk], #16
-	ldp		w11, w12, [rk, #-8]
+	.macro		do_crypt, round, ttab, ltab, bsz
+	ldp		w4, w5, [in]
+	ldp		w6, w7, [in, #8]
+	ldp		w8, w9, [rk], #16
+	ldp		w10, w11, [rk, #-8]
 
+CPU_BE(	rev		w4, w4		)
 CPU_BE(	rev		w5, w5		)
 CPU_BE(	rev		w6, w6		)
 CPU_BE(	rev		w7, w7		)
-CPU_BE(	rev		w8, w8		)
 
+	eor		w4, w4, w8
 	eor		w5, w5, w9
 	eor		w6, w6, w10
 	eor		w7, w7, w11
-	eor		w8, w8, w12
 
 	adr_l		tt, \ttab
-	adr_l		lt, \ltab
 
 	tbnz		rounds, #1, 1f
 
-0:	\round		w9, w10, w11, w12, w5, w6, w7, w8
-	\round		w5, w6, w7, w8, w9, w10, w11, w12
+0:	\round		w8, w9, w10, w11, w4, w5, w6, w7
+	\round		w4, w5, w6, w7, w8, w9, w10, w11
 
 1:	subs		rounds, rounds, #4
-	\round		w9, w10, w11, w12, w5, w6, w7, w8
-	csel		tt, tt, lt, hi
-	\round		w5, w6, w7, w8, w9, w10, w11, w12
-	b.hi		0b
-
+	\round		w8, w9, w10, w11, w4, w5, w6, w7
+	b.ls		3f
+2:	\round		w4, w5, w6, w7, w8, w9, w10, w11
+	b		0b
+3:	adr_l		tt, \ltab
+	\round		w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b
+
+CPU_BE(	rev		w4, w4		)
 CPU_BE(	rev		w5, w5		)
 CPU_BE(	rev		w6, w6		)
 CPU_BE(	rev		w7, w7		)
-CPU_BE(	rev		w8, w8		)
 
-	stp		w5, w6, [out]
-	stp		w7, w8, [out, #8]
+	stp		w4, w5, [out]
+	stp		w6, w7, [out, #8]
 	ret
 	.endm
 
-	.align		5
+	.align		L1_CACHE_SHIFT
+	.type		__aes_arm64_inverse_sbox, %object
+__aes_arm64_inverse_sbox:
+	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+	.size		__aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox
+
 ENTRY(__aes_arm64_encrypt)
-	do_crypt	fround, crypto_ft_tab, crypto_fl_tab
+	do_crypt	fround, crypto_ft_tab, crypto_ft_tab + 1, 2
 ENDPROC(__aes_arm64_encrypt)
 
 	.align		5
 ENTRY(__aes_arm64_decrypt)
-	do_crypt	iround, crypto_it_tab, crypto_il_tab
+	do_crypt	iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
 ENDPROC(__aes_arm64_decrypt)

+ 53 - 0
arch/arm64/crypto/aes-ctr-fallback.h

@@ -0,0 +1,53 @@
+/*
+ * Fallback for sync aes(ctr) in contexts where kernel mode NEON
+ * is not allowed
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
+
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
+static inline int aes_ctr_encrypt_fallback(struct crypto_aes_ctx *ctx,
+					   struct skcipher_request *req)
+{
+	struct skcipher_walk walk;
+	u8 buf[AES_BLOCK_SIZE];
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	while (walk.nbytes > 0) {
+		u8 *dst = walk.dst.virt.addr;
+		u8 *src = walk.src.virt.addr;
+		int nbytes = walk.nbytes;
+		int tail = 0;
+
+		if (nbytes < walk.total) {
+			nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+			tail = walk.nbytes % AES_BLOCK_SIZE;
+		}
+
+		do {
+			int bsize = min(nbytes, AES_BLOCK_SIZE);
+
+			__aes_arm64_encrypt(ctx->key_enc, buf, walk.iv,
+					    6 + ctx->key_length / 4);
+			crypto_xor_cpy(dst, src, buf, bsize);
+			crypto_inc(walk.iv, AES_BLOCK_SIZE);
+
+			dst += AES_BLOCK_SIZE;
+			src += AES_BLOCK_SIZE;
+			nbytes -= AES_BLOCK_SIZE;
+		} while (nbytes > 0);
+
+		err = skcipher_walk_done(&walk, tail);
+	}
+	return err;
+}

+ 45 - 18
arch/arm64/crypto/aes-glue.c

@@ -10,6 +10,7 @@
 
 #include <asm/neon.h>
 #include <asm/hwcap.h>
+#include <asm/simd.h>
 #include <crypto/aes.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
@@ -19,6 +20,7 @@
 #include <crypto/xts.h>
 
 #include "aes-ce-setkey.h"
+#include "aes-ctr-fallback.h"
 
 #ifdef USE_V8_CRYPTO_EXTENSIONS
 #define MODE			"ce"
@@ -241,9 +243,7 @@ static int ctr_encrypt(struct skcipher_request *req)
 
 		aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
 				blocks, walk.iv, first);
-		if (tdst != tsrc)
-			memcpy(tdst, tsrc, nbytes);
-		crypto_xor(tdst, tail, nbytes);
+		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
 		err = skcipher_walk_done(&walk, 0);
 	}
 	kernel_neon_end();
@@ -251,6 +251,17 @@ static int ctr_encrypt(struct skcipher_request *req)
 	return err;
 }
 
+static int ctr_encrypt_sync(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	if (!may_use_simd())
+		return aes_ctr_encrypt_fallback(ctx, req);
+
+	return ctr_encrypt(req);
+}
+
 static int xts_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -357,8 +368,8 @@ static struct skcipher_alg aes_algs[] = { {
 	.ivsize		= AES_BLOCK_SIZE,
 	.chunksize	= AES_BLOCK_SIZE,
 	.setkey		= skcipher_aes_setkey,
-	.encrypt	= ctr_encrypt,
-	.decrypt	= ctr_encrypt,
+	.encrypt	= ctr_encrypt_sync,
+	.decrypt	= ctr_encrypt_sync,
 }, {
 	.base = {
 		.cra_name		= "__xts(aes)",
@@ -460,11 +471,35 @@ static int mac_init(struct shash_desc *desc)
 	return 0;
 }
 
+static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
+			  u8 dg[], int enc_before, int enc_after)
+{
+	int rounds = 6 + ctx->key_length / 4;
+
+	if (may_use_simd()) {
+		kernel_neon_begin();
+		aes_mac_update(in, ctx->key_enc, rounds, blocks, dg, enc_before,
+			       enc_after);
+		kernel_neon_end();
+	} else {
+		if (enc_before)
+			__aes_arm64_encrypt(ctx->key_enc, dg, dg, rounds);
+
+		while (blocks--) {
+			crypto_xor(dg, in, AES_BLOCK_SIZE);
+			in += AES_BLOCK_SIZE;
+
+			if (blocks || enc_after)
+				__aes_arm64_encrypt(ctx->key_enc, dg, dg,
+						    rounds);
+		}
+	}
+}
+
 static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
 {
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-	int rounds = 6 + tctx->key.key_length / 4;
 
 	while (len > 0) {
 		unsigned int l;
@@ -476,10 +511,8 @@ static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
 
 			len %= AES_BLOCK_SIZE;
 
-			kernel_neon_begin();
-			aes_mac_update(p, tctx->key.key_enc, rounds, blocks,
-				       ctx->dg, (ctx->len != 0), (len != 0));
-			kernel_neon_end();
+			mac_do_update(&tctx->key, p, blocks, ctx->dg,
+				      (ctx->len != 0), (len != 0));
 
 			p += blocks * AES_BLOCK_SIZE;
 
@@ -507,11 +540,8 @@ static int cbcmac_final(struct shash_desc *desc, u8 *out)
 {
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-	int rounds = 6 + tctx->key.key_length / 4;
 
-	kernel_neon_begin();
-	aes_mac_update(NULL, tctx->key.key_enc, rounds, 0, ctx->dg, 1, 0);
-	kernel_neon_end();
+	mac_do_update(&tctx->key, NULL, 0, ctx->dg, 1, 0);
 
 	memcpy(out, ctx->dg, AES_BLOCK_SIZE);
 
@@ -522,7 +552,6 @@ static int cmac_final(struct shash_desc *desc, u8 *out)
 {
 	struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
 	struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
-	int rounds = 6 + tctx->key.key_length / 4;
 	u8 *consts = tctx->consts;
 
 	if (ctx->len != AES_BLOCK_SIZE) {
@@ -530,9 +559,7 @@ static int cmac_final(struct shash_desc *desc, u8 *out)
 		consts += AES_BLOCK_SIZE;
 	}
 
-	kernel_neon_begin();
-	aes_mac_update(consts, tctx->key.key_enc, rounds, 1, ctx->dg, 0, 1);
-	kernel_neon_end();
+	mac_do_update(&tctx->key, consts, 1, ctx->dg, 0, 1);
 
 	memcpy(out, ctx->dg, AES_BLOCK_SIZE);
 

+ 45 - 8
arch/arm64/crypto/aes-neonbs-glue.c

@@ -1,7 +1,7 @@
 /*
  * Bit sliced AES using NEON instructions
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,12 +9,15 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <crypto/aes.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/xts.h>
 #include <linux/module.h>
 
+#include "aes-ctr-fallback.h"
+
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
@@ -58,6 +61,11 @@ struct aesbs_cbc_ctx {
 	u32			enc[AES_MAX_KEYLENGTH_U32];
 };
 
+struct aesbs_ctr_ctx {
+	struct aesbs_ctx	key;		/* must be first member */
+	struct crypto_aes_ctx	fallback;
+};
+
 struct aesbs_xts_ctx {
 	struct aesbs_ctx	key;
 	u32			twkey[AES_MAX_KEYLENGTH_U32];
@@ -196,6 +204,25 @@ static int cbc_decrypt(struct skcipher_request *req)
 	return err;
 }
 
+static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key,
+				 unsigned int key_len)
+{
+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = crypto_aes_expand_key(&ctx->fallback, in_key, key_len);
+	if (err)
+		return err;
+
+	ctx->key.rounds = 6 + key_len / 4;
+
+	kernel_neon_begin();
+	aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds);
+	kernel_neon_end();
+
+	return 0;
+}
+
 static int ctr_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -224,9 +251,8 @@ static int ctr_encrypt(struct skcipher_request *req)
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 
-			if (dst != src)
-				memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
-			crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
+			crypto_xor_cpy(dst, src, final,
+				       walk.total % AES_BLOCK_SIZE);
 
 			err = skcipher_walk_done(&walk, 0);
 			break;
@@ -260,6 +286,17 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 	return aesbs_setkey(tfm, in_key, key_len);
 }
 
+static int ctr_encrypt_sync(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	if (!may_use_simd())
+		return aes_ctr_encrypt_fallback(&ctx->fallback, req);
+
+	return ctr_encrypt(req);
+}
+
 static int __xts_crypt(struct skcipher_request *req,
 		       void (*fn)(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[]))
@@ -356,7 +393,7 @@ static struct skcipher_alg aes_algs[] = { {
 	.base.cra_driver_name	= "ctr-aes-neonbs",
 	.base.cra_priority	= 250 - 1,
 	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct aesbs_ctx),
+	.base.cra_ctxsize	= sizeof(struct aesbs_ctr_ctx),
 	.base.cra_module	= THIS_MODULE,
 
 	.min_keysize		= AES_MIN_KEY_SIZE,
@@ -364,9 +401,9 @@ static struct skcipher_alg aes_algs[] = { {
 	.chunksize		= AES_BLOCK_SIZE,
 	.walksize		= 8 * AES_BLOCK_SIZE,
 	.ivsize			= AES_BLOCK_SIZE,
-	.setkey			= aesbs_setkey,
-	.encrypt		= ctr_encrypt,
-	.decrypt		= ctr_encrypt,
+	.setkey			= aesbs_ctr_setkey_sync,
+	.encrypt		= ctr_encrypt_sync,
+	.decrypt		= ctr_encrypt_sync,
 }, {
 	.base.cra_name		= "__xts(aes)",
 	.base.cra_driver_name	= "__xts-aes-neonbs",

+ 3 - 2
arch/arm64/crypto/chacha20-neon-glue.c

@@ -1,7 +1,7 @@
 /*
  * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
  *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -26,6 +26,7 @@
 
 #include <asm/hwcap.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 
 asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
 asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
@@ -64,7 +65,7 @@ static int chacha20_neon(struct skcipher_request *req)
 	u32 state[16];
 	int err;
 
-	if (req->cryptlen <= CHACHA20_BLOCK_SIZE)
+	if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
 		return crypto_chacha20_crypt(req);
 
 	err = skcipher_walk_virt(&walk, req, true);

+ 6 - 5
arch/arm64/crypto/crc32-ce-glue.c

@@ -1,7 +1,7 @@
 /*
  * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -19,6 +19,7 @@
 
 #include <asm/hwcap.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 
 #define PMULL_MIN_LEN		64L	/* minimum size of buffer
@@ -105,10 +106,10 @@ static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
 		length -= l;
 	}
 
-	if (length >= PMULL_MIN_LEN) {
+	if (length >= PMULL_MIN_LEN && may_use_simd()) {
 		l = round_down(length, SCALE_F);
 
-		kernel_neon_begin_partial(10);
+		kernel_neon_begin();
 		*crc = crc32_pmull_le(data, l, *crc);
 		kernel_neon_end();
 
@@ -137,10 +138,10 @@ static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
 		length -= l;
 	}
 
-	if (length >= PMULL_MIN_LEN) {
+	if (length >= PMULL_MIN_LEN && may_use_simd()) {
 		l = round_down(length, SCALE_F);
 
-		kernel_neon_begin_partial(10);
+		kernel_neon_begin();
 		*crc = crc32c_pmull_le(data, l, *crc);
 		kernel_neon_end();
 

+ 9 - 4
arch/arm64/crypto/crct10dif-ce-glue.c

@@ -1,7 +1,7 @@
 /*
  * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
  *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -18,6 +18,7 @@
 #include <crypto/internal/hash.h>
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 
 #define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
 
@@ -48,9 +49,13 @@ static int crct10dif_update(struct shash_desc *desc, const u8 *data,
 	}
 
 	if (length > 0) {
-		kernel_neon_begin_partial(14);
-		*crc = crc_t10dif_pmull(*crc, data, length);
-		kernel_neon_end();
+		if (may_use_simd()) {
+			kernel_neon_begin();
+			*crc = crc_t10dif_pmull(*crc, data, length);
+			kernel_neon_end();
+		} else {
+			*crc = crc_t10dif_generic(*crc, data, length);
+		}
 	}
 
 	return 0;

+ 383 - 18
arch/arm64/crypto/ghash-ce-core.S

@@ -1,7 +1,7 @@
 /*
  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
  *
- * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
@@ -11,31 +11,215 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-	SHASH	.req	v0
-	SHASH2	.req	v1
-	T1	.req	v2
-	T2	.req	v3
-	MASK	.req	v4
-	XL	.req	v5
-	XM	.req	v6
-	XH	.req	v7
-	IN1	.req	v7
+	SHASH		.req	v0
+	SHASH2		.req	v1
+	T1		.req	v2
+	T2		.req	v3
+	MASK		.req	v4
+	XL		.req	v5
+	XM		.req	v6
+	XH		.req	v7
+	IN1		.req	v7
+
+	k00_16		.req	v8
+	k32_48		.req	v9
+
+	t3		.req	v10
+	t4		.req	v11
+	t5		.req	v12
+	t6		.req	v13
+	t7		.req	v14
+	t8		.req	v15
+	t9		.req	v16
+
+	perm1		.req	v17
+	perm2		.req	v18
+	perm3		.req	v19
+
+	sh1		.req	v20
+	sh2		.req	v21
+	sh3		.req	v22
+	sh4		.req	v23
+
+	ss1		.req	v24
+	ss2		.req	v25
+	ss3		.req	v26
+	ss4		.req	v27
 
 	.text
 	.arch		armv8-a+crypto
 
-	/*
-	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-	 *			   struct ghash_key const *k, const char *head)
-	 */
-ENTRY(pmull_ghash_update)
+	.macro		__pmull_p64, rd, rn, rm
+	pmull		\rd\().1q, \rn\().1d, \rm\().1d
+	.endm
+
+	.macro		__pmull2_p64, rd, rn, rm
+	pmull2		\rd\().1q, \rn\().2d, \rm\().2d
+	.endm
+
+	.macro		__pmull_p8, rq, ad, bd
+	ext		t3.8b, \ad\().8b, \ad\().8b, #1		// A1
+	ext		t5.8b, \ad\().8b, \ad\().8b, #2		// A2
+	ext		t7.8b, \ad\().8b, \ad\().8b, #3		// A3
+
+	__pmull_p8_\bd	\rq, \ad
+	.endm
+
+	.macro		__pmull2_p8, rq, ad, bd
+	tbl		t3.16b, {\ad\().16b}, perm1.16b		// A1
+	tbl		t5.16b, {\ad\().16b}, perm2.16b		// A2
+	tbl		t7.16b, {\ad\().16b}, perm3.16b		// A3
+
+	__pmull2_p8_\bd	\rq, \ad
+	.endm
+
+	.macro		__pmull_p8_SHASH, rq, ad
+	__pmull_p8_tail	\rq, \ad\().8b, SHASH.8b, 8b,, sh1, sh2, sh3, sh4
+	.endm
+
+	.macro		__pmull_p8_SHASH2, rq, ad
+	__pmull_p8_tail	\rq, \ad\().8b, SHASH2.8b, 8b,, ss1, ss2, ss3, ss4
+	.endm
+
+	.macro		__pmull2_p8_SHASH, rq, ad
+	__pmull_p8_tail	\rq, \ad\().16b, SHASH.16b, 16b, 2, sh1, sh2, sh3, sh4
+	.endm
+
+	.macro		__pmull_p8_tail, rq, ad, bd, nb, t, b1, b2, b3, b4
+	pmull\t		t3.8h, t3.\nb, \bd			// F = A1*B
+	pmull\t		t4.8h, \ad, \b1\().\nb			// E = A*B1
+	pmull\t		t5.8h, t5.\nb, \bd			// H = A2*B
+	pmull\t		t6.8h, \ad, \b2\().\nb			// G = A*B2
+	pmull\t		t7.8h, t7.\nb, \bd			// J = A3*B
+	pmull\t		t8.8h, \ad, \b3\().\nb			// I = A*B3
+	pmull\t		t9.8h, \ad, \b4\().\nb			// K = A*B4
+	pmull\t		\rq\().8h, \ad, \bd			// D = A*B
+
+	eor		t3.16b, t3.16b, t4.16b			// L = E + F
+	eor		t5.16b, t5.16b, t6.16b			// M = G + H
+	eor		t7.16b, t7.16b, t8.16b			// N = I + J
+
+	uzp1		t4.2d, t3.2d, t5.2d
+	uzp2		t3.2d, t3.2d, t5.2d
+	uzp1		t6.2d, t7.2d, t9.2d
+	uzp2		t7.2d, t7.2d, t9.2d
+
+	// t3 = (L) (P0 + P1) << 8
+	// t5 = (M) (P2 + P3) << 16
+	eor		t4.16b, t4.16b, t3.16b
+	and		t3.16b, t3.16b, k32_48.16b
+
+	// t7 = (N) (P4 + P5) << 24
+	// t9 = (K) (P6 + P7) << 32
+	eor		t6.16b, t6.16b, t7.16b
+	and		t7.16b, t7.16b, k00_16.16b
+
+	eor		t4.16b, t4.16b, t3.16b
+	eor		t6.16b, t6.16b, t7.16b
+
+	zip2		t5.2d, t4.2d, t3.2d
+	zip1		t3.2d, t4.2d, t3.2d
+	zip2		t9.2d, t6.2d, t7.2d
+	zip1		t7.2d, t6.2d, t7.2d
+
+	ext		t3.16b, t3.16b, t3.16b, #15
+	ext		t5.16b, t5.16b, t5.16b, #14
+	ext		t7.16b, t7.16b, t7.16b, #13
+	ext		t9.16b, t9.16b, t9.16b, #12
+
+	eor		t3.16b, t3.16b, t5.16b
+	eor		t7.16b, t7.16b, t9.16b
+	eor		\rq\().16b, \rq\().16b, t3.16b
+	eor		\rq\().16b, \rq\().16b, t7.16b
+	.endm
+
+	.macro		__pmull_pre_p64
+	movi		MASK.16b, #0xe1
+	shl		MASK.2d, MASK.2d, #57
+	.endm
+
+	.macro		__pmull_pre_p8
+	// k00_16 := 0x0000000000000000_000000000000ffff
+	// k32_48 := 0x00000000ffffffff_0000ffffffffffff
+	movi		k32_48.2d, #0xffffffff
+	mov		k32_48.h[2], k32_48.h[0]
+	ushr		k00_16.2d, k32_48.2d, #32
+
+	// prepare the permutation vectors
+	mov_q		x5, 0x080f0e0d0c0b0a09
+	movi		T1.8b, #8
+	dup		perm1.2d, x5
+	eor		perm1.16b, perm1.16b, T1.16b
+	ushr		perm2.2d, perm1.2d, #8
+	ushr		perm3.2d, perm1.2d, #16
+	ushr		T1.2d, perm1.2d, #24
+	sli		perm2.2d, perm1.2d, #56
+	sli		perm3.2d, perm1.2d, #48
+	sli		T1.2d, perm1.2d, #40
+
+	// precompute loop invariants
+	tbl		sh1.16b, {SHASH.16b}, perm1.16b
+	tbl		sh2.16b, {SHASH.16b}, perm2.16b
+	tbl		sh3.16b, {SHASH.16b}, perm3.16b
+	tbl		sh4.16b, {SHASH.16b}, T1.16b
+	ext		ss1.8b, SHASH2.8b, SHASH2.8b, #1
+	ext		ss2.8b, SHASH2.8b, SHASH2.8b, #2
+	ext		ss3.8b, SHASH2.8b, SHASH2.8b, #3
+	ext		ss4.8b, SHASH2.8b, SHASH2.8b, #4
+	.endm
+
+	//
+	// PMULL (64x64->128) based reduction for CPUs that can do
+	// it in a single instruction.
+	//
+	.macro		__pmull_reduce_p64
+	pmull		T2.1q, XL.1d, MASK.1d
+	eor		XM.16b, XM.16b, T1.16b
+
+	mov		XH.d[0], XM.d[1]
+	mov		XM.d[1], XL.d[0]
+
+	eor		XL.16b, XM.16b, T2.16b
+	ext		T2.16b, XL.16b, XL.16b, #8
+	pmull		XL.1q, XL.1d, MASK.1d
+	.endm
+
+	//
+	// Alternative reduction for CPUs that lack support for the
+	// 64x64->128 PMULL instruction
+	//
+	.macro		__pmull_reduce_p8
+	eor		XM.16b, XM.16b, T1.16b
+
+	mov		XL.d[1], XM.d[0]
+	mov		XH.d[0], XM.d[1]
+
+	shl		T1.2d, XL.2d, #57
+	shl		T2.2d, XL.2d, #62
+	eor		T2.16b, T2.16b, T1.16b
+	shl		T1.2d, XL.2d, #63
+	eor		T2.16b, T2.16b, T1.16b
+	ext		T1.16b, XL.16b, XH.16b, #8
+	eor		T2.16b, T2.16b, T1.16b
+
+	mov		XL.d[1], T2.d[0]
+	mov		XH.d[0], T2.d[1]
+
+	ushr		T2.2d, XL.2d, #1
+	eor		XH.16b, XH.16b, XL.16b
+	eor		XL.16b, XL.16b, T2.16b
+	ushr		T2.2d, T2.2d, #6
+	ushr		XL.2d, XL.2d, #1
+	.endm
+
+	.macro		__pmull_ghash, pn
 	ld1		{SHASH.2d}, [x3]
 	ld1		{XL.2d}, [x1]
-	movi		MASK.16b, #0xe1
 	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
-	shl		MASK.2d, MASK.2d, #57
 	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
 
+	__pmull_pre_\pn
+
 	/* do the head block first, if supplied */
 	cbz		x4, 0f
 	ld1		{T1.2d}, [x4]
@@ -52,28 +236,209 @@ CPU_LE(	rev64		T1.16b, T1.16b	)
 	eor		T1.16b, T1.16b, T2.16b
 	eor		XL.16b, XL.16b, IN1.16b
 
+	__pmull2_\pn	XH, XL, SHASH			// a1 * b1
+	eor		T1.16b, T1.16b, XL.16b
+	__pmull_\pn 	XL, XL, SHASH			// a0 * b0
+	__pmull_\pn	XM, T1, SHASH2			// (a1 + a0)(b1 + b0)
+
+	eor		T2.16b, XL.16b, XH.16b
+	ext		T1.16b, XL.16b, XH.16b, #8
+	eor		XM.16b, XM.16b, T2.16b
+
+	__pmull_reduce_\pn
+
+	eor		T2.16b, T2.16b, XH.16b
+	eor		XL.16b, XL.16b, T2.16b
+
+	cbnz		w0, 0b
+
+	st1		{XL.2d}, [x1]
+	ret
+	.endm
+
+	/*
+	 * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+	 *			   struct ghash_key const *k, const char *head)
+	 */
+ENTRY(pmull_ghash_update_p64)
+	__pmull_ghash	p64
+ENDPROC(pmull_ghash_update_p64)
+
+ENTRY(pmull_ghash_update_p8)
+	__pmull_ghash	p8
+ENDPROC(pmull_ghash_update_p8)
+
+	KS		.req	v8
+	CTR		.req	v9
+	INP		.req	v10
+
+	.macro		load_round_keys, rounds, rk
+	cmp		\rounds, #12
+	blo		2222f		/* 128 bits */
+	beq		1111f		/* 192 bits */
+	ld1		{v17.4s-v18.4s}, [\rk], #32
+1111:	ld1		{v19.4s-v20.4s}, [\rk], #32
+2222:	ld1		{v21.4s-v24.4s}, [\rk], #64
+	ld1		{v25.4s-v28.4s}, [\rk], #64
+	ld1		{v29.4s-v31.4s}, [\rk]
+	.endm
+
+	.macro		enc_round, state, key
+	aese		\state\().16b, \key\().16b
+	aesmc		\state\().16b, \state\().16b
+	.endm
+
+	.macro		enc_block, state, rounds
+	cmp		\rounds, #12
+	b.lo		2222f		/* 128 bits */
+	b.eq		1111f		/* 192 bits */
+	enc_round	\state, v17
+	enc_round	\state, v18
+1111:	enc_round	\state, v19
+	enc_round	\state, v20
+2222:	.irp		key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+	enc_round	\state, \key
+	.endr
+	aese		\state\().16b, v30.16b
+	eor		\state\().16b, \state\().16b, v31.16b
+	.endm
+
+	.macro		pmull_gcm_do_crypt, enc
+	ld1		{SHASH.2d}, [x4]
+	ld1		{XL.2d}, [x1]
+	ldr		x8, [x5, #8]			// load lower counter
+
+	movi		MASK.16b, #0xe1
+	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
+CPU_LE(	rev		x8, x8		)
+	shl		MASK.2d, MASK.2d, #57
+	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
+
+	.if		\enc == 1
+	ld1		{KS.16b}, [x7]
+	.endif
+
+0:	ld1		{CTR.8b}, [x5]			// load upper counter
+	ld1		{INP.16b}, [x3], #16
+	rev		x9, x8
+	add		x8, x8, #1
+	sub		w0, w0, #1
+	ins		CTR.d[1], x9			// set lower counter
+
+	.if		\enc == 1
+	eor		INP.16b, INP.16b, KS.16b	// encrypt input
+	st1		{INP.16b}, [x2], #16
+	.endif
+
+	rev64		T1.16b, INP.16b
+
+	cmp		w6, #12
+	b.ge		2f				// AES-192/256?
+
+1:	enc_round	CTR, v21
+
+	ext		T2.16b, XL.16b, XL.16b, #8
+	ext		IN1.16b, T1.16b, T1.16b, #8
+
+	enc_round	CTR, v22
+
+	eor		T1.16b, T1.16b, T2.16b
+	eor		XL.16b, XL.16b, IN1.16b
+
+	enc_round	CTR, v23
+
 	pmull2		XH.1q, SHASH.2d, XL.2d		// a1 * b1
 	eor		T1.16b, T1.16b, XL.16b
+
+	enc_round	CTR, v24
+
 	pmull		XL.1q, SHASH.1d, XL.1d		// a0 * b0
 	pmull		XM.1q, SHASH2.1d, T1.1d		// (a1 + a0)(b1 + b0)
 
+	enc_round	CTR, v25
+
 	ext		T1.16b, XL.16b, XH.16b, #8
 	eor		T2.16b, XL.16b, XH.16b
 	eor		XM.16b, XM.16b, T1.16b
+
+	enc_round	CTR, v26
+
 	eor		XM.16b, XM.16b, T2.16b
 	pmull		T2.1q, XL.1d, MASK.1d
 
+	enc_round	CTR, v27
+
 	mov		XH.d[0], XM.d[1]
 	mov		XM.d[1], XL.d[0]
 
+	enc_round	CTR, v28
+
 	eor		XL.16b, XM.16b, T2.16b
+
+	enc_round	CTR, v29
+
 	ext		T2.16b, XL.16b, XL.16b, #8
+
+	aese		CTR.16b, v30.16b
+
 	pmull		XL.1q, XL.1d, MASK.1d
 	eor		T2.16b, T2.16b, XH.16b
+
+	eor		KS.16b, CTR.16b, v31.16b
+
 	eor		XL.16b, XL.16b, T2.16b
 
+	.if		\enc == 0
+	eor		INP.16b, INP.16b, KS.16b
+	st1		{INP.16b}, [x2], #16
+	.endif
+
 	cbnz		w0, 0b
 
+CPU_LE(	rev		x8, x8		)
 	st1		{XL.2d}, [x1]
+	str		x8, [x5, #8]			// store lower counter
+
+	.if		\enc == 1
+	st1		{KS.16b}, [x7]
+	.endif
+
+	ret
+
+2:	b.eq		3f				// AES-192?
+	enc_round	CTR, v17
+	enc_round	CTR, v18
+3:	enc_round	CTR, v19
+	enc_round	CTR, v20
+	b		1b
+	.endm
+
+	/*
+	 * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
+	 *			  struct ghash_key const *k, u8 ctr[],
+	 *			  int rounds, u8 ks[])
+	 */
+ENTRY(pmull_gcm_encrypt)
+	pmull_gcm_do_crypt	1
+ENDPROC(pmull_gcm_encrypt)
+
+	/*
+	 * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
+	 *			  struct ghash_key const *k, u8 ctr[],
+	 *			  int rounds)
+	 */
+ENTRY(pmull_gcm_decrypt)
+	pmull_gcm_do_crypt	0
+ENDPROC(pmull_gcm_decrypt)
+
+	/*
+	 * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
+	 */
+ENTRY(pmull_gcm_encrypt_block)
+	cbz		x2, 0f
+	load_round_keys	w3, x2
+0:	ld1		{v0.16b}, [x1]
+	enc_block	v0, w3
+	st1		{v0.16b}, [x0]
 	ret
-ENDPROC(pmull_ghash_update)
+ENDPROC(pmull_gcm_encrypt_block)

+ 482 - 35
arch/arm64/crypto/ghash-ce-glue.c

@@ -1,7 +1,7 @@
 /*
  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
  *
- * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
@@ -9,22 +9,33 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/aead.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 
-MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("ghash");
 
 #define GHASH_BLOCK_SIZE	16
 #define GHASH_DIGEST_SIZE	16
+#define GCM_IV_SIZE		12
 
 struct ghash_key {
 	u64 a;
 	u64 b;
+	be128 k;
 };
 
 struct ghash_desc_ctx {
@@ -33,8 +44,35 @@ struct ghash_desc_ctx {
 	u32 count;
 };
 
-asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
-				   struct ghash_key const *k, const char *head);
+struct gcm_aes_ctx {
+	struct crypto_aes_ctx	aes_key;
+	struct ghash_key	ghash_key;
+};
+
+asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
+				       struct ghash_key const *k,
+				       const char *head);
+
+asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
+				      struct ghash_key const *k,
+				      const char *head);
+
+static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
+				  struct ghash_key const *k,
+				  const char *head);
+
+asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
+				  const u8 src[], struct ghash_key const *k,
+				  u8 ctr[], int rounds, u8 ks[]);
+
+asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
+				  const u8 src[], struct ghash_key const *k,
+				  u8 ctr[], int rounds);
+
+asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
+					u32 const rk[], int rounds);
+
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
 
 static int ghash_init(struct shash_desc *desc)
 {
@@ -44,6 +82,36 @@ static int ghash_init(struct shash_desc *desc)
 	return 0;
 }
 
+static void ghash_do_update(int blocks, u64 dg[], const char *src,
+			    struct ghash_key *key, const char *head)
+{
+	if (likely(may_use_simd())) {
+		kernel_neon_begin();
+		pmull_ghash_update(blocks, dg, src, key, head);
+		kernel_neon_end();
+	} else {
+		be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
+
+		do {
+			const u8 *in = src;
+
+			if (head) {
+				in = head;
+				blocks++;
+				head = NULL;
+			} else {
+				src += GHASH_BLOCK_SIZE;
+			}
+
+			crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
+			gf128mul_lle(&dst, &key->k);
+		} while (--blocks);
+
+		dg[0] = be64_to_cpu(dst.b);
+		dg[1] = be64_to_cpu(dst.a);
+	}
+}
+
 static int ghash_update(struct shash_desc *desc, const u8 *src,
 			unsigned int len)
 {
@@ -67,10 +135,9 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
 		blocks = len / GHASH_BLOCK_SIZE;
 		len %= GHASH_BLOCK_SIZE;
 
-		kernel_neon_begin_partial(8);
-		pmull_ghash_update(blocks, ctx->digest, src, key,
-				   partial ? ctx->buf : NULL);
-		kernel_neon_end();
+		ghash_do_update(blocks, ctx->digest, src, key,
+				partial ? ctx->buf : NULL);
+
 		src += blocks * GHASH_BLOCK_SIZE;
 		partial = 0;
 	}
@@ -89,9 +156,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 
 		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
 
-		kernel_neon_begin_partial(8);
-		pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
-		kernel_neon_end();
+		ghash_do_update(1, ctx->digest, ctx->buf, key, NULL);
 	}
 	put_unaligned_be64(ctx->digest[1], dst);
 	put_unaligned_be64(ctx->digest[0], dst + 8);
@@ -100,16 +165,13 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 	return 0;
 }
 
-static int ghash_setkey(struct crypto_shash *tfm,
-			const u8 *inkey, unsigned int keylen)
+static int __ghash_setkey(struct ghash_key *key,
+			  const u8 *inkey, unsigned int keylen)
 {
-	struct ghash_key *key = crypto_shash_ctx(tfm);
 	u64 a, b;
 
-	if (keylen != GHASH_BLOCK_SIZE) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
+	/* needed for the fallback */
+	memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
 
 	/* perform multiplication by 'x' in GF(2^128) */
 	b = get_unaligned_be64(inkey);
@@ -124,33 +186,418 @@ static int ghash_setkey(struct crypto_shash *tfm,
 	return 0;
 }
 
+static int ghash_setkey(struct crypto_shash *tfm,
+			const u8 *inkey, unsigned int keylen)
+{
+	struct ghash_key *key = crypto_shash_ctx(tfm);
+
+	if (keylen != GHASH_BLOCK_SIZE) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	return __ghash_setkey(key, inkey, keylen);
+}
+
 static struct shash_alg ghash_alg = {
-	.digestsize	= GHASH_DIGEST_SIZE,
-	.init		= ghash_init,
-	.update		= ghash_update,
-	.final		= ghash_final,
-	.setkey		= ghash_setkey,
-	.descsize	= sizeof(struct ghash_desc_ctx),
-	.base		= {
-		.cra_name		= "ghash",
-		.cra_driver_name	= "ghash-ce",
-		.cra_priority		= 200,
-		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize		= GHASH_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct ghash_key),
-		.cra_module		= THIS_MODULE,
-	},
+	.base.cra_name		= "ghash",
+	.base.cra_driver_name	= "ghash-ce",
+	.base.cra_priority	= 200,
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= GHASH_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct ghash_key),
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= GHASH_DIGEST_SIZE,
+	.init			= ghash_init,
+	.update			= ghash_update,
+	.final			= ghash_final,
+	.setkey			= ghash_setkey,
+	.descsize		= sizeof(struct ghash_desc_ctx),
+};
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+	/*
+	 * # of rounds specified by AES:
+	 * 128 bit key		10 rounds
+	 * 192 bit key		12 rounds
+	 * 256 bit key		14 rounds
+	 * => n byte key	=> 6 + (n/4) rounds
+	 */
+	return 6 + ctx->key_length / 4;
+}
+
+static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
+		      unsigned int keylen)
+{
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
+	u8 key[GHASH_BLOCK_SIZE];
+	int ret;
+
+	ret = crypto_aes_expand_key(&ctx->aes_key, inkey, keylen);
+	if (ret) {
+		tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	__aes_arm64_encrypt(ctx->aes_key.key_enc, key, (u8[AES_BLOCK_SIZE]){},
+			    num_rounds(&ctx->aes_key));
+
+	return __ghash_setkey(&ctx->ghash_key, key, sizeof(key));
+}
+
+static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 8:
+	case 12 ... 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
+			   int *buf_count, struct gcm_aes_ctx *ctx)
+{
+	if (*buf_count > 0) {
+		int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count);
+
+		memcpy(&buf[*buf_count], src, buf_added);
+
+		*buf_count += buf_added;
+		src += buf_added;
+		count -= buf_added;
+	}
+
+	if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
+		int blocks = count / GHASH_BLOCK_SIZE;
+
+		ghash_do_update(blocks, dg, src, &ctx->ghash_key,
+				*buf_count ? buf : NULL);
+
+		src += blocks * GHASH_BLOCK_SIZE;
+		count %= GHASH_BLOCK_SIZE;
+		*buf_count = 0;
+	}
+
+	if (count > 0) {
+		memcpy(buf, src, count);
+		*buf_count = count;
+	}
+}
+
+static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+	u8 buf[GHASH_BLOCK_SIZE];
+	struct scatter_walk walk;
+	u32 len = req->assoclen;
+	int buf_count = 0;
+
+	scatterwalk_start(&walk, req->src);
+
+	do {
+		u32 n = scatterwalk_clamp(&walk, len);
+		u8 *p;
+
+		if (!n) {
+			scatterwalk_start(&walk, sg_next(walk.sg));
+			n = scatterwalk_clamp(&walk, len);
+		}
+		p = scatterwalk_map(&walk);
+
+		gcm_update_mac(dg, p, n, buf, &buf_count, ctx);
+		len -= n;
+
+		scatterwalk_unmap(p);
+		scatterwalk_advance(&walk, n);
+		scatterwalk_done(&walk, 0, len);
+	} while (len);
+
+	if (buf_count) {
+		memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
+		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+	}
+}
+
+static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
+		      u64 dg[], u8 tag[], int cryptlen)
+{
+	u8 mac[AES_BLOCK_SIZE];
+	u128 lengths;
+
+	lengths.a = cpu_to_be64(req->assoclen * 8);
+	lengths.b = cpu_to_be64(cryptlen * 8);
+
+	ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL);
+
+	put_unaligned_be64(dg[1], mac);
+	put_unaligned_be64(dg[0], mac + 8);
+
+	crypto_xor(tag, mac, AES_BLOCK_SIZE);
+}
+
+static int gcm_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+	struct skcipher_walk walk;
+	u8 iv[AES_BLOCK_SIZE];
+	u8 ks[AES_BLOCK_SIZE];
+	u8 tag[AES_BLOCK_SIZE];
+	u64 dg[2] = {};
+	int err;
+
+	if (req->assoclen)
+		gcm_calculate_auth_mac(req, dg);
+
+	memcpy(iv, req->iv, GCM_IV_SIZE);
+	put_unaligned_be32(1, iv + GCM_IV_SIZE);
+
+	if (likely(may_use_simd())) {
+		kernel_neon_begin();
+
+		pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
+					num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+		pmull_gcm_encrypt_block(ks, iv, NULL,
+					num_rounds(&ctx->aes_key));
+		put_unaligned_be32(3, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+			pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
+					  walk.src.virt.addr, &ctx->ghash_key,
+					  iv, num_rounds(&ctx->aes_key), ks);
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		kernel_neon_end();
+	} else {
+		__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
+				    num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+			u8 *dst = walk.dst.virt.addr;
+			u8 *src = walk.src.virt.addr;
+
+			do {
+				__aes_arm64_encrypt(ctx->aes_key.key_enc,
+						    ks, iv,
+						    num_rounds(&ctx->aes_key));
+				crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE);
+				crypto_inc(iv, AES_BLOCK_SIZE);
+
+				dst += AES_BLOCK_SIZE;
+				src += AES_BLOCK_SIZE;
+			} while (--blocks > 0);
+
+			ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg,
+					walk.dst.virt.addr, &ctx->ghash_key,
+					NULL);
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		if (walk.nbytes)
+			__aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv,
+					    num_rounds(&ctx->aes_key));
+	}
+
+	/* handle the tail */
+	if (walk.nbytes) {
+		u8 buf[GHASH_BLOCK_SIZE];
+
+		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks,
+			       walk.nbytes);
+
+		memcpy(buf, walk.dst.virt.addr, walk.nbytes);
+		memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
+		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	if (err)
+		return err;
+
+	gcm_final(req, ctx, dg, tag, req->cryptlen);
+
+	/* copy authtag to end of dst */
+	scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
+				 crypto_aead_authsize(aead), 1);
+
+	return 0;
+}
+
+static int gcm_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+	unsigned int authsize = crypto_aead_authsize(aead);
+	struct skcipher_walk walk;
+	u8 iv[AES_BLOCK_SIZE];
+	u8 tag[AES_BLOCK_SIZE];
+	u8 buf[GHASH_BLOCK_SIZE];
+	u64 dg[2] = {};
+	int err;
+
+	if (req->assoclen)
+		gcm_calculate_auth_mac(req, dg);
+
+	memcpy(iv, req->iv, GCM_IV_SIZE);
+	put_unaligned_be32(1, iv + GCM_IV_SIZE);
+
+	if (likely(may_use_simd())) {
+		kernel_neon_begin();
+
+		pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
+					num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+
+			pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
+					  walk.src.virt.addr, &ctx->ghash_key,
+					  iv, num_rounds(&ctx->aes_key));
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		if (walk.nbytes)
+			pmull_gcm_encrypt_block(iv, iv, NULL,
+						num_rounds(&ctx->aes_key));
+
+		kernel_neon_end();
+	} else {
+		__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
+				    num_rounds(&ctx->aes_key));
+		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+		err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+		while (walk.nbytes >= AES_BLOCK_SIZE) {
+			int blocks = walk.nbytes / AES_BLOCK_SIZE;
+			u8 *dst = walk.dst.virt.addr;
+			u8 *src = walk.src.virt.addr;
+
+			ghash_do_update(blocks, dg, walk.src.virt.addr,
+					&ctx->ghash_key, NULL);
+
+			do {
+				__aes_arm64_encrypt(ctx->aes_key.key_enc,
+						    buf, iv,
+						    num_rounds(&ctx->aes_key));
+				crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
+				crypto_inc(iv, AES_BLOCK_SIZE);
+
+				dst += AES_BLOCK_SIZE;
+				src += AES_BLOCK_SIZE;
+			} while (--blocks > 0);
+
+			err = skcipher_walk_done(&walk,
+						 walk.nbytes % AES_BLOCK_SIZE);
+		}
+		if (walk.nbytes)
+			__aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv,
+					    num_rounds(&ctx->aes_key));
+	}
+
+	/* handle the tail */
+	if (walk.nbytes) {
+		memcpy(buf, walk.src.virt.addr, walk.nbytes);
+		memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
+		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+
+		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv,
+			       walk.nbytes);
+
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	if (err)
+		return err;
+
+	gcm_final(req, ctx, dg, tag, req->cryptlen - authsize);
+
+	/* compare calculated auth tag with the stored one */
+	scatterwalk_map_and_copy(buf, req->src,
+				 req->assoclen + req->cryptlen - authsize,
+				 authsize, 0);
+
+	if (crypto_memneq(tag, buf, authsize))
+		return -EBADMSG;
+	return 0;
+}
+
+static struct aead_alg gcm_aes_alg = {
+	.ivsize			= GCM_IV_SIZE,
+	.chunksize		= AES_BLOCK_SIZE,
+	.maxauthsize		= AES_BLOCK_SIZE,
+	.setkey			= gcm_setkey,
+	.setauthsize		= gcm_setauthsize,
+	.encrypt		= gcm_encrypt,
+	.decrypt		= gcm_decrypt,
+
+	.base.cra_name		= "gcm(aes)",
+	.base.cra_driver_name	= "gcm-aes-ce",
+	.base.cra_priority	= 300,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct gcm_aes_ctx),
+	.base.cra_module	= THIS_MODULE,
 };
 
 static int __init ghash_ce_mod_init(void)
 {
-	return crypto_register_shash(&ghash_alg);
+	int ret;
+
+	if (!(elf_hwcap & HWCAP_ASIMD))
+		return -ENODEV;
+
+	if (elf_hwcap & HWCAP_PMULL)
+		pmull_ghash_update = pmull_ghash_update_p64;
+
+	else
+		pmull_ghash_update = pmull_ghash_update_p8;
+
+	ret = crypto_register_shash(&ghash_alg);
+	if (ret)
+		return ret;
+
+	if (elf_hwcap & HWCAP_PMULL) {
+		ret = crypto_register_aead(&gcm_aes_alg);
+		if (ret)
+			crypto_unregister_shash(&ghash_alg);
+	}
+	return ret;
 }
 
 static void __exit ghash_ce_mod_exit(void)
 {
 	crypto_unregister_shash(&ghash_alg);
+	crypto_unregister_aead(&gcm_aes_alg);
 }
 
-module_cpu_feature_match(PMULL, ghash_ce_mod_init);
+static const struct cpu_feature ghash_cpu_feature[] = {
+	{ cpu_feature(PMULL) }, { }
+};
+MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature);
+
+module_init(ghash_ce_mod_init);
 module_exit(ghash_ce_mod_exit);

+ 14 - 4
arch/arm64/crypto/sha1-ce-glue.c

@@ -1,7 +1,7 @@
 /*
  * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
  *
- * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
@@ -37,8 +38,11 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
 {
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 
+	if (!may_use_simd())
+		return crypto_sha1_update(desc, data, len);
+
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(16);
+	kernel_neon_begin();
 	sha1_base_do_update(desc, data, len,
 			    (sha1_block_fn *)sha1_ce_transform);
 	kernel_neon_end();
@@ -52,13 +56,16 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 	bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
 
+	if (!may_use_simd())
+		return crypto_sha1_finup(desc, data, len, out);
+
 	/*
 	 * Allow the asm code to perform the finalization if there is no
 	 * partial data and the input is a round multiple of the block size.
 	 */
 	sctx->finalize = finalize;
 
-	kernel_neon_begin_partial(16);
+	kernel_neon_begin();
 	sha1_base_do_update(desc, data, len,
 			    (sha1_block_fn *)sha1_ce_transform);
 	if (!finalize)
@@ -71,8 +78,11 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out)
 {
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 
+	if (!may_use_simd())
+		return crypto_sha1_finup(desc, NULL, 0, out);
+
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(16);
+	kernel_neon_begin();
 	sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
 	kernel_neon_end();
 	return sha1_base_finish(desc, out);

+ 26 - 4
arch/arm64/crypto/sha2-ce-glue.c

@@ -1,7 +1,7 @@
 /*
  * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
  *
- * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
@@ -34,13 +35,19 @@ const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
 const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
 						 finalize);
 
+asmlinkage void sha256_block_data_order(u32 *digest, u8 const *src, int blocks);
+
 static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
 			    unsigned int len)
 {
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 
+	if (!may_use_simd())
+		return sha256_base_do_update(desc, data, len,
+				(sha256_block_fn *)sha256_block_data_order);
+
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(28);
+	kernel_neon_begin();
 	sha256_base_do_update(desc, data, len,
 			      (sha256_block_fn *)sha2_ce_transform);
 	kernel_neon_end();
@@ -54,13 +61,22 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 	bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
 
+	if (!may_use_simd()) {
+		if (len)
+			sha256_base_do_update(desc, data, len,
+				(sha256_block_fn *)sha256_block_data_order);
+		sha256_base_do_finalize(desc,
+				(sha256_block_fn *)sha256_block_data_order);
+		return sha256_base_finish(desc, out);
+	}
+
 	/*
 	 * Allow the asm code to perform the finalization if there is no
 	 * partial data and the input is a round multiple of the block size.
 	 */
 	sctx->finalize = finalize;
 
-	kernel_neon_begin_partial(28);
+	kernel_neon_begin();
 	sha256_base_do_update(desc, data, len,
 			      (sha256_block_fn *)sha2_ce_transform);
 	if (!finalize)
@@ -74,8 +90,14 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out)
 {
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 
+	if (!may_use_simd()) {
+		sha256_base_do_finalize(desc,
+				(sha256_block_fn *)sha256_block_data_order);
+		return sha256_base_finish(desc, out);
+	}
+
 	sctx->finalize = 0;
-	kernel_neon_begin_partial(28);
+	kernel_neon_begin();
 	sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
 	kernel_neon_end();
 	return sha256_base_finish(desc, out);

+ 1 - 0
arch/arm64/crypto/sha256-glue.c

@@ -29,6 +29,7 @@ MODULE_ALIAS_CRYPTO("sha256");
 
 asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
 					unsigned int num_blks);
+EXPORT_SYMBOL(sha256_block_data_order);
 
 asmlinkage void sha256_block_neon(u32 *digest, const void *data,
 				  unsigned int num_blks);

+ 1 - 2
arch/sparc/crypto/aes_glue.c

@@ -344,8 +344,7 @@ static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
 
 	ctx->ops->ecb_encrypt(&ctx->key[0], (const u64 *)ctrblk,
 			      keystream, AES_BLOCK_SIZE);
-	crypto_xor((u8 *) keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, (u8 *) keystream, src, nbytes);
 	crypto_inc(ctrblk, AES_BLOCK_SIZE);
 }
 

+ 2 - 2
arch/x86/crypto/aesni-intel_glue.c

@@ -475,8 +475,8 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
 	unsigned int nbytes = walk->nbytes;
 
 	aesni_enc(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
+
 	crypto_inc(ctrblk, AES_BLOCK_SIZE);
 }
 

+ 1 - 2
arch/x86/crypto/blowfish_glue.c

@@ -271,8 +271,7 @@ static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
 	unsigned int nbytes = walk->nbytes;
 
 	blowfish_enc_blk(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 	crypto_inc(ctrblk, BF_BLOCK_SIZE);
 }

+ 1 - 2
arch/x86/crypto/cast5_avx_glue.c

@@ -256,8 +256,7 @@ static void ctr_crypt_final(struct blkcipher_desc *desc,
 	unsigned int nbytes = walk->nbytes;
 
 	__cast5_encrypt(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
 }

+ 1 - 2
arch/x86/crypto/des3_ede_glue.c

@@ -277,8 +277,7 @@ static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
 	unsigned int nbytes = walk->nbytes;
 
 	des3_ede_enc_blk(ctx, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 	crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
 }

+ 2 - 0
crypto/Kconfig

@@ -1753,6 +1753,8 @@ config CRYPTO_USER_API_AEAD
 	tristate "User-space interface for AEAD cipher algorithms"
 	depends on NET
 	select CRYPTO_AEAD
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_NULL
 	select CRYPTO_USER_API
 	help
 	  This option enables the user-spaces interface for AEAD

+ 691 - 0
crypto/af_alg.c

@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/rwsem.h>
+#include <linux/sched/signal.h>
 #include <linux/security.h>
 
 struct alg_type_list {
@@ -507,6 +508,696 @@ void af_alg_complete(struct crypto_async_request *req, int err)
 }
 EXPORT_SYMBOL_GPL(af_alg_complete);
 
+/**
+ * af_alg_alloc_tsgl - allocate the TX SGL
+ *
+ * @sk socket of connection to user space
+ * @return: 0 upon success, < 0 upon error
+ */
+int af_alg_alloc_tsgl(struct sock *sk)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl;
+	struct scatterlist *sg = NULL;
+
+	sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, list);
+	if (!list_empty(&ctx->tsgl_list))
+		sg = sgl->sg;
+
+	if (!sg || sgl->cur >= MAX_SGL_ENTS) {
+		sgl = sock_kmalloc(sk, sizeof(*sgl) +
+				       sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1),
+				   GFP_KERNEL);
+		if (!sgl)
+			return -ENOMEM;
+
+		sg_init_table(sgl->sg, MAX_SGL_ENTS + 1);
+		sgl->cur = 0;
+
+		if (sg)
+			sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg);
+
+		list_add_tail(&sgl->list, &ctx->tsgl_list);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(af_alg_alloc_tsgl);
+
+/**
+ * aead_count_tsgl - Count number of TX SG entries
+ *
+ * The counting starts from the beginning of the SGL to @bytes. If
+ * an offset is provided, the counting of the SG entries starts at the offset.
+ *
+ * @sk socket of connection to user space
+ * @bytes Count the number of SG entries holding given number of bytes.
+ * @offset Start the counting of SG entries from the given offset.
+ * @return Number of TX SG entries found given the constraints
+ */
+unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl, *tmp;
+	unsigned int i;
+	unsigned int sgl_count = 0;
+
+	if (!bytes)
+		return 0;
+
+	list_for_each_entry_safe(sgl, tmp, &ctx->tsgl_list, list) {
+		struct scatterlist *sg = sgl->sg;
+
+		for (i = 0; i < sgl->cur; i++) {
+			size_t bytes_count;
+
+			/* Skip offset */
+			if (offset >= sg[i].length) {
+				offset -= sg[i].length;
+				bytes -= sg[i].length;
+				continue;
+			}
+
+			bytes_count = sg[i].length - offset;
+
+			offset = 0;
+			sgl_count++;
+
+			/* If we have seen requested number of bytes, stop */
+			if (bytes_count >= bytes)
+				return sgl_count;
+
+			bytes -= bytes_count;
+		}
+	}
+
+	return sgl_count;
+}
+EXPORT_SYMBOL_GPL(af_alg_count_tsgl);
+
+/**
+ * aead_pull_tsgl - Release the specified buffers from TX SGL
+ *
+ * If @dst is non-null, reassign the pages to dst. The caller must release
+ * the pages. If @dst_offset is given only reassign the pages to @dst starting
+ * at the @dst_offset (byte). The caller must ensure that @dst is large
+ * enough (e.g. by using af_alg_count_tsgl with the same offset).
+ *
+ * @sk socket of connection to user space
+ * @used Number of bytes to pull from TX SGL
+ * @dst If non-NULL, buffer is reassigned to dst SGL instead of releasing. The
+ *	caller must release the buffers in dst.
+ * @dst_offset Reassign the TX SGL from given offset. All buffers before
+ *	       reaching the offset is released.
+ */
+void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
+		      size_t dst_offset)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl;
+	struct scatterlist *sg;
+	unsigned int i, j;
+
+	while (!list_empty(&ctx->tsgl_list)) {
+		sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl,
+				       list);
+		sg = sgl->sg;
+
+		for (i = 0, j = 0; i < sgl->cur; i++) {
+			size_t plen = min_t(size_t, used, sg[i].length);
+			struct page *page = sg_page(sg + i);
+
+			if (!page)
+				continue;
+
+			/*
+			 * Assumption: caller created af_alg_count_tsgl(len)
+			 * SG entries in dst.
+			 */
+			if (dst) {
+				if (dst_offset >= plen) {
+					/* discard page before offset */
+					dst_offset -= plen;
+				} else {
+					/* reassign page to dst after offset */
+					get_page(page);
+					sg_set_page(dst + j, page,
+						    plen - dst_offset,
+						    sg[i].offset + dst_offset);
+					dst_offset = 0;
+					j++;
+				}
+			}
+
+			sg[i].length -= plen;
+			sg[i].offset += plen;
+
+			used -= plen;
+			ctx->used -= plen;
+
+			if (sg[i].length)
+				return;
+
+			put_page(page);
+			sg_assign_page(sg + i, NULL);
+		}
+
+		list_del(&sgl->list);
+		sock_kfree_s(sk, sgl, sizeof(*sgl) + sizeof(sgl->sg[0]) *
+						     (MAX_SGL_ENTS + 1));
+	}
+
+	if (!ctx->used)
+		ctx->merge = 0;
+}
+EXPORT_SYMBOL_GPL(af_alg_pull_tsgl);
+
+/**
+ * af_alg_free_areq_sgls - Release TX and RX SGLs of the request
+ *
+ * @areq Request holding the TX and RX SGL
+ */
+void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
+{
+	struct sock *sk = areq->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_rsgl *rsgl, *tmp;
+	struct scatterlist *tsgl;
+	struct scatterlist *sg;
+	unsigned int i;
+
+	list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) {
+		ctx->rcvused -= rsgl->sg_num_bytes;
+		af_alg_free_sg(&rsgl->sgl);
+		list_del(&rsgl->list);
+		if (rsgl != &areq->first_rsgl)
+			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
+	}
+
+	tsgl = areq->tsgl;
+	for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
+		if (!sg_page(sg))
+			continue;
+		put_page(sg_page(sg));
+	}
+
+	if (areq->tsgl && areq->tsgl_entries)
+		sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl));
+}
+EXPORT_SYMBOL_GPL(af_alg_free_areq_sgls);
+
+/**
+ * af_alg_wait_for_wmem - wait for availability of writable memory
+ *
+ * @sk socket of connection to user space
+ * @flags If MSG_DONTWAIT is set, then only report if function would sleep
+ * @return 0 when writable memory is available, < 0 upon error
+ */
+int af_alg_wait_for_wmem(struct sock *sk, unsigned int flags)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int err = -ERESTARTSYS;
+	long timeout;
+
+	if (flags & MSG_DONTWAIT)
+		return -EAGAIN;
+
+	sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	for (;;) {
+		if (signal_pending(current))
+			break;
+		timeout = MAX_SCHEDULE_TIMEOUT;
+		if (sk_wait_event(sk, &timeout, af_alg_writable(sk), &wait)) {
+			err = 0;
+			break;
+		}
+	}
+	remove_wait_queue(sk_sleep(sk), &wait);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(af_alg_wait_for_wmem);
+
+/**
+ * af_alg_wmem_wakeup - wakeup caller when writable memory is available
+ *
+ * @sk socket of connection to user space
+ */
+void af_alg_wmem_wakeup(struct sock *sk)
+{
+	struct socket_wq *wq;
+
+	if (!af_alg_writable(sk))
+		return;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (skwq_has_sleeper(wq))
+		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
+							   POLLRDNORM |
+							   POLLRDBAND);
+	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(af_alg_wmem_wakeup);
+
+/**
+ * af_alg_wait_for_data - wait for availability of TX data
+ *
+ * @sk socket of connection to user space
+ * @flags If MSG_DONTWAIT is set, then only report if function would sleep
+ * @return 0 when writable memory is available, < 0 upon error
+ */
+int af_alg_wait_for_data(struct sock *sk, unsigned flags)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	long timeout;
+	int err = -ERESTARTSYS;
+
+	if (flags & MSG_DONTWAIT)
+		return -EAGAIN;
+
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	for (;;) {
+		if (signal_pending(current))
+			break;
+		timeout = MAX_SCHEDULE_TIMEOUT;
+		if (sk_wait_event(sk, &timeout, (ctx->used || !ctx->more),
+				  &wait)) {
+			err = 0;
+			break;
+		}
+	}
+	remove_wait_queue(sk_sleep(sk), &wait);
+
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(af_alg_wait_for_data);
+
+/**
+ * af_alg_data_wakeup - wakeup caller when new data can be sent to kernel
+ *
+ * @sk socket of connection to user space
+ */
+
+void af_alg_data_wakeup(struct sock *sk)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct socket_wq *wq;
+
+	if (!ctx->used)
+		return;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (skwq_has_sleeper(wq))
+		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
+							   POLLRDNORM |
+							   POLLRDBAND);
+	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(af_alg_data_wakeup);
+
+/**
+ * af_alg_sendmsg - implementation of sendmsg system call handler
+ *
+ * The sendmsg system call handler obtains the user data and stores it
+ * in ctx->tsgl_list. This implies allocation of the required numbers of
+ * struct af_alg_tsgl.
+ *
+ * In addition, the ctx is filled with the information sent via CMSG.
+ *
+ * @sock socket of connection to user space
+ * @msg message from user space
+ * @size size of message from user space
+ * @ivsize the size of the IV for the cipher operation to verify that the
+ *	   user-space-provided IV has the right size
+ * @return the number of copied data upon success, < 0 upon error
+ */
+int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		   unsigned int ivsize)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl;
+	struct af_alg_control con = {};
+	long copied = 0;
+	bool enc = 0;
+	bool init = 0;
+	int err = 0;
+
+	if (msg->msg_controllen) {
+		err = af_alg_cmsg_send(msg, &con);
+		if (err)
+			return err;
+
+		init = 1;
+		switch (con.op) {
+		case ALG_OP_ENCRYPT:
+			enc = 1;
+			break;
+		case ALG_OP_DECRYPT:
+			enc = 0;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (con.iv && con.iv->ivlen != ivsize)
+			return -EINVAL;
+	}
+
+	lock_sock(sk);
+	if (!ctx->more && ctx->used) {
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	if (init) {
+		ctx->enc = enc;
+		if (con.iv)
+			memcpy(ctx->iv, con.iv->iv, ivsize);
+
+		ctx->aead_assoclen = con.aead_assoclen;
+	}
+
+	while (size) {
+		struct scatterlist *sg;
+		size_t len = size;
+		size_t plen;
+
+		/* use the existing memory in an allocated page */
+		if (ctx->merge) {
+			sgl = list_entry(ctx->tsgl_list.prev,
+					 struct af_alg_tsgl, list);
+			sg = sgl->sg + sgl->cur - 1;
+			len = min_t(size_t, len,
+				    PAGE_SIZE - sg->offset - sg->length);
+
+			err = memcpy_from_msg(page_address(sg_page(sg)) +
+					      sg->offset + sg->length,
+					      msg, len);
+			if (err)
+				goto unlock;
+
+			sg->length += len;
+			ctx->merge = (sg->offset + sg->length) &
+				     (PAGE_SIZE - 1);
+
+			ctx->used += len;
+			copied += len;
+			size -= len;
+			continue;
+		}
+
+		if (!af_alg_writable(sk)) {
+			err = af_alg_wait_for_wmem(sk, msg->msg_flags);
+			if (err)
+				goto unlock;
+		}
+
+		/* allocate a new page */
+		len = min_t(unsigned long, len, af_alg_sndbuf(sk));
+
+		err = af_alg_alloc_tsgl(sk);
+		if (err)
+			goto unlock;
+
+		sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl,
+				 list);
+		sg = sgl->sg;
+		if (sgl->cur)
+			sg_unmark_end(sg + sgl->cur - 1);
+
+		do {
+			unsigned int i = sgl->cur;
+
+			plen = min_t(size_t, len, PAGE_SIZE);
+
+			sg_assign_page(sg + i, alloc_page(GFP_KERNEL));
+			if (!sg_page(sg + i)) {
+				err = -ENOMEM;
+				goto unlock;
+			}
+
+			err = memcpy_from_msg(page_address(sg_page(sg + i)),
+					      msg, plen);
+			if (err) {
+				__free_page(sg_page(sg + i));
+				sg_assign_page(sg + i, NULL);
+				goto unlock;
+			}
+
+			sg[i].length = plen;
+			len -= plen;
+			ctx->used += plen;
+			copied += plen;
+			size -= plen;
+			sgl->cur++;
+		} while (len && sgl->cur < MAX_SGL_ENTS);
+
+		if (!size)
+			sg_mark_end(sg + sgl->cur - 1);
+
+		ctx->merge = plen & (PAGE_SIZE - 1);
+	}
+
+	err = 0;
+
+	ctx->more = msg->msg_flags & MSG_MORE;
+
+unlock:
+	af_alg_data_wakeup(sk);
+	release_sock(sk);
+
+	return copied ?: err;
+}
+EXPORT_SYMBOL_GPL(af_alg_sendmsg);
+
+/**
+ * af_alg_sendpage - sendpage system call handler
+ *
+ * This is a generic implementation of sendpage to fill ctx->tsgl_list.
+ */
+ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
+			int offset, size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct af_alg_tsgl *sgl;
+	int err = -EINVAL;
+
+	if (flags & MSG_SENDPAGE_NOTLAST)
+		flags |= MSG_MORE;
+
+	lock_sock(sk);
+	if (!ctx->more && ctx->used)
+		goto unlock;
+
+	if (!size)
+		goto done;
+
+	if (!af_alg_writable(sk)) {
+		err = af_alg_wait_for_wmem(sk, flags);
+		if (err)
+			goto unlock;
+	}
+
+	err = af_alg_alloc_tsgl(sk);
+	if (err)
+		goto unlock;
+
+	ctx->merge = 0;
+	sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, list);
+
+	if (sgl->cur)
+		sg_unmark_end(sgl->sg + sgl->cur - 1);
+
+	sg_mark_end(sgl->sg + sgl->cur);
+
+	get_page(page);
+	sg_set_page(sgl->sg + sgl->cur, page, size, offset);
+	sgl->cur++;
+	ctx->used += size;
+
+done:
+	ctx->more = flags & MSG_MORE;
+
+unlock:
+	af_alg_data_wakeup(sk);
+	release_sock(sk);
+
+	return err ?: size;
+}
+EXPORT_SYMBOL_GPL(af_alg_sendpage);
+
+/**
+ * af_alg_async_cb - AIO callback handler
+ *
+ * This handler cleans up the struct af_alg_async_req upon completion of the
+ * AIO operation.
+ *
+ * The number of bytes to be generated with the AIO operation must be set
+ * in areq->outlen before the AIO callback handler is invoked.
+ */
+void af_alg_async_cb(struct crypto_async_request *_req, int err)
+{
+	struct af_alg_async_req *areq = _req->data;
+	struct sock *sk = areq->sk;
+	struct kiocb *iocb = areq->iocb;
+	unsigned int resultlen;
+
+	lock_sock(sk);
+
+	/* Buffer size written by crypto operation. */
+	resultlen = areq->outlen;
+
+	af_alg_free_areq_sgls(areq);
+	sock_kfree_s(sk, areq, areq->areqlen);
+	__sock_put(sk);
+
+	iocb->ki_complete(iocb, err ? err : resultlen, 0);
+
+	release_sock(sk);
+}
+EXPORT_SYMBOL_GPL(af_alg_async_cb);
+
+/**
+ * af_alg_poll - poll system call handler
+ */
+unsigned int af_alg_poll(struct file *file, struct socket *sock,
+			 poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	unsigned int mask;
+
+	sock_poll_wait(file, sk_sleep(sk), wait);
+	mask = 0;
+
+	if (!ctx->more || ctx->used)
+		mask |= POLLIN | POLLRDNORM;
+
+	if (af_alg_writable(sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+	return mask;
+}
+EXPORT_SYMBOL_GPL(af_alg_poll);
+
+/**
+ * af_alg_alloc_areq - allocate struct af_alg_async_req
+ *
+ * @sk socket of connection to user space
+ * @areqlen size of struct af_alg_async_req + crypto_*_reqsize
+ * @return allocated data structure or ERR_PTR upon error
+ */
+struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
+					   unsigned int areqlen)
+{
+	struct af_alg_async_req *areq = sock_kmalloc(sk, areqlen, GFP_KERNEL);
+
+	if (unlikely(!areq))
+		return ERR_PTR(-ENOMEM);
+
+	areq->areqlen = areqlen;
+	areq->sk = sk;
+	areq->last_rsgl = NULL;
+	INIT_LIST_HEAD(&areq->rsgl_list);
+	areq->tsgl = NULL;
+	areq->tsgl_entries = 0;
+
+	return areq;
+}
+EXPORT_SYMBOL_GPL(af_alg_alloc_areq);
+
+/**
+ * af_alg_get_rsgl - create the RX SGL for the output data from the crypto
+ *		     operation
+ *
+ * @sk socket of connection to user space
+ * @msg user space message
+ * @flags flags used to invoke recvmsg with
+ * @areq instance of the cryptographic request that will hold the RX SGL
+ * @maxsize maximum number of bytes to be pulled from user space
+ * @outlen number of bytes in the RX SGL
+ * @return 0 on success, < 0 upon error
+ */
+int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
+		    struct af_alg_async_req *areq, size_t maxsize,
+		    size_t *outlen)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	size_t len = 0;
+
+	while (maxsize > len && msg_data_left(msg)) {
+		struct af_alg_rsgl *rsgl;
+		size_t seglen;
+		int err;
+
+		/* limit the amount of readable buffers */
+		if (!af_alg_readable(sk))
+			break;
+
+		if (!ctx->used) {
+			err = af_alg_wait_for_data(sk, flags);
+			if (err)
+				return err;
+		}
+
+		seglen = min_t(size_t, (maxsize - len),
+			       msg_data_left(msg));
+
+		if (list_empty(&areq->rsgl_list)) {
+			rsgl = &areq->first_rsgl;
+		} else {
+			rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
+			if (unlikely(!rsgl))
+				return -ENOMEM;
+		}
+
+		rsgl->sgl.npages = 0;
+		list_add_tail(&rsgl->list, &areq->rsgl_list);
+
+		/* make one iovec available as scatterlist */
+		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
+		if (err < 0)
+			return err;
+
+		/* chain the new scatterlist with previous one */
+		if (areq->last_rsgl)
+			af_alg_link_sg(&areq->last_rsgl->sgl, &rsgl->sgl);
+
+		areq->last_rsgl = rsgl;
+		len += err;
+		ctx->rcvused += err;
+		rsgl->sg_num_bytes = err;
+		iov_iter_advance(&msg->msg_iter, err);
+	}
+
+	*outlen = len;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(af_alg_get_rsgl);
+
 static int __init af_alg_init(void)
 {
 	int err = proto_register(&alg_proto, 0);

+ 29 - 0
crypto/ahash.c

@@ -588,6 +588,35 @@ int crypto_unregister_ahash(struct ahash_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_ahash);
 
+int crypto_register_ahashes(struct ahash_alg *algs, int count)
+{
+	int i, ret;
+
+	for (i = 0; i < count; i++) {
+		ret = crypto_register_ahash(&algs[i]);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	for (--i; i >= 0; --i)
+		crypto_unregister_ahash(&algs[i]);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_register_ahashes);
+
+void crypto_unregister_ahashes(struct ahash_alg *algs, int count)
+{
+	int i;
+
+	for (i = count - 1; i >= 0; --i)
+		crypto_unregister_ahash(&algs[i]);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_ahashes);
+
 int ahash_register_instance(struct crypto_template *tmpl,
 			    struct ahash_instance *inst)
 {

+ 15 - 10
crypto/algapi.c

@@ -975,13 +975,15 @@ void crypto_inc(u8 *a, unsigned int size)
 }
 EXPORT_SYMBOL_GPL(crypto_inc);
 
-void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
+void __crypto_xor(u8 *dst, const u8 *src1, const u8 *src2, unsigned int len)
 {
 	int relalign = 0;
 
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
 		int size = sizeof(unsigned long);
-		int d = ((unsigned long)dst ^ (unsigned long)src) & (size - 1);
+		int d = (((unsigned long)dst ^ (unsigned long)src1) |
+			 ((unsigned long)dst ^ (unsigned long)src2)) &
+			(size - 1);
 
 		relalign = d ? 1 << __ffs(d) : size;
 
@@ -992,34 +994,37 @@ void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
 		 * process the remainder of the input using optimal strides.
 		 */
 		while (((unsigned long)dst & (relalign - 1)) && len > 0) {
-			*dst++ ^= *src++;
+			*dst++ = *src1++ ^ *src2++;
 			len--;
 		}
 	}
 
 	while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
-		*(u64 *)dst ^= *(u64 *)src;
+		*(u64 *)dst = *(u64 *)src1 ^  *(u64 *)src2;
 		dst += 8;
-		src += 8;
+		src1 += 8;
+		src2 += 8;
 		len -= 8;
 	}
 
 	while (len >= 4 && !(relalign & 3)) {
-		*(u32 *)dst ^= *(u32 *)src;
+		*(u32 *)dst = *(u32 *)src1 ^ *(u32 *)src2;
 		dst += 4;
-		src += 4;
+		src1 += 4;
+		src2 += 4;
 		len -= 4;
 	}
 
 	while (len >= 2 && !(relalign & 1)) {
-		*(u16 *)dst ^= *(u16 *)src;
+		*(u16 *)dst = *(u16 *)src1 ^ *(u16 *)src2;
 		dst += 2;
-		src += 2;
+		src1 += 2;
+		src2 += 2;
 		len -= 2;
 	}
 
 	while (len--)
-		*dst++ ^= *src++;
+		*dst++ = *src1++ ^ *src2++;
 }
 EXPORT_SYMBOL_GPL(__crypto_xor);
 

+ 250 - 614
crypto/algif_aead.c

@@ -5,88 +5,56 @@
  *
  * This file provides the user-space API for AEAD ciphers.
  *
- * This file is derived from algif_skcipher.c.
- *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
  * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
+ *
+ * The following concept of the memory management is used:
+ *
+ * The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
+ * filled by user space with the data submitted via sendpage/sendmsg. Filling
+ * up the TX SGL does not cause a crypto operation -- the data will only be
+ * tracked by the kernel. Upon receipt of one recvmsg call, the caller must
+ * provide a buffer which is tracked with the RX SGL.
+ *
+ * During the processing of the recvmsg operation, the cipher request is
+ * allocated and prepared. As part of the recvmsg operation, the processed
+ * TX buffers are extracted from the TX SGL into a separate SGL.
+ *
+ * After the completion of the crypto operation, the RX SGL and the cipher
+ * request is released. The extracted TX SGL parts are released together with
+ * the RX SGL release.
  */
 
 #include <crypto/internal/aead.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/if_alg.h>
+#include <crypto/skcipher.h>
+#include <crypto/null.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
-#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/net.h>
 #include <net/sock.h>
 
-struct aead_sg_list {
-	unsigned int cur;
-	struct scatterlist sg[ALG_MAX_PAGES];
-};
-
-struct aead_async_rsgl {
-	struct af_alg_sgl sgl;
-	struct list_head list;
-};
-
-struct aead_async_req {
-	struct scatterlist *tsgl;
-	struct aead_async_rsgl first_rsgl;
-	struct list_head list;
-	struct kiocb *iocb;
-	struct sock *sk;
-	unsigned int tsgls;
-	char iv[];
-};
-
 struct aead_tfm {
 	struct crypto_aead *aead;
 	bool has_key;
+	struct crypto_skcipher *null_tfm;
 };
 
-struct aead_ctx {
-	struct aead_sg_list tsgl;
-	struct aead_async_rsgl first_rsgl;
-	struct list_head list;
-
-	void *iv;
-
-	struct af_alg_completion completion;
-
-	unsigned long used;
-
-	unsigned int len;
-	bool more;
-	bool merge;
-	bool enc;
-
-	size_t aead_assoclen;
-	struct aead_request aead_req;
-};
-
-static inline int aead_sndbuf(struct sock *sk)
+static inline bool aead_sufficient_data(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-
-	return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
-			  ctx->used, 0);
-}
-
-static inline bool aead_writable(struct sock *sk)
-{
-	return PAGE_SIZE <= aead_sndbuf(sk);
-}
-
-static inline bool aead_sufficient_data(struct aead_ctx *ctx)
-{
-	unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct aead_tfm *aeadc = pask->private;
+	struct crypto_aead *tfm = aeadc->aead;
+	unsigned int as = crypto_aead_authsize(tfm);
 
 	/*
 	 * The minimum amount of memory needed for an AEAD cipher is
@@ -95,484 +63,58 @@ static inline bool aead_sufficient_data(struct aead_ctx *ctx)
 	return ctx->used >= ctx->aead_assoclen + (ctx->enc ? 0 : as);
 }
 
-static void aead_reset_ctx(struct aead_ctx *ctx)
-{
-	struct aead_sg_list *sgl = &ctx->tsgl;
-
-	sg_init_table(sgl->sg, ALG_MAX_PAGES);
-	sgl->cur = 0;
-	ctx->used = 0;
-	ctx->more = 0;
-	ctx->merge = 0;
-}
-
-static void aead_put_sgl(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct aead_sg_list *sgl = &ctx->tsgl;
-	struct scatterlist *sg = sgl->sg;
-	unsigned int i;
-
-	for (i = 0; i < sgl->cur; i++) {
-		if (!sg_page(sg + i))
-			continue;
-
-		put_page(sg_page(sg + i));
-		sg_assign_page(sg + i, NULL);
-	}
-	aead_reset_ctx(ctx);
-}
-
-static void aead_wmem_wakeup(struct sock *sk)
-{
-	struct socket_wq *wq;
-
-	if (!aead_writable(sk))
-		return;
-
-	rcu_read_lock();
-	wq = rcu_dereference(sk->sk_wq);
-	if (skwq_has_sleeper(wq))
-		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
-							   POLLRDNORM |
-							   POLLRDBAND);
-	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-	rcu_read_unlock();
-}
-
-static int aead_wait_for_data(struct sock *sk, unsigned flags)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	long timeout;
-	int err = -ERESTARTSYS;
-
-	if (flags & MSG_DONTWAIT)
-		return -EAGAIN;
-
-	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-	add_wait_queue(sk_sleep(sk), &wait);
-	for (;;) {
-		if (signal_pending(current))
-			break;
-		timeout = MAX_SCHEDULE_TIMEOUT;
-		if (sk_wait_event(sk, &timeout, !ctx->more, &wait)) {
-			err = 0;
-			break;
-		}
-	}
-	remove_wait_queue(sk_sleep(sk), &wait);
-
-	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
-	return err;
-}
-
-static void aead_data_wakeup(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct socket_wq *wq;
-
-	if (ctx->more)
-		return;
-	if (!ctx->used)
-		return;
-
-	rcu_read_lock();
-	wq = rcu_dereference(sk->sk_wq);
-	if (skwq_has_sleeper(wq))
-		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
-							   POLLRDNORM |
-							   POLLRDBAND);
-	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
-	rcu_read_unlock();
-}
-
 static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	unsigned ivsize =
-		crypto_aead_ivsize(crypto_aead_reqtfm(&ctx->aead_req));
-	struct aead_sg_list *sgl = &ctx->tsgl;
-	struct af_alg_control con = {};
-	long copied = 0;
-	bool enc = 0;
-	bool init = 0;
-	int err = -EINVAL;
-
-	if (msg->msg_controllen) {
-		err = af_alg_cmsg_send(msg, &con);
-		if (err)
-			return err;
-
-		init = 1;
-		switch (con.op) {
-		case ALG_OP_ENCRYPT:
-			enc = 1;
-			break;
-		case ALG_OP_DECRYPT:
-			enc = 0;
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		if (con.iv && con.iv->ivlen != ivsize)
-			return -EINVAL;
-	}
-
-	lock_sock(sk);
-	if (!ctx->more && ctx->used)
-		goto unlock;
-
-	if (init) {
-		ctx->enc = enc;
-		if (con.iv)
-			memcpy(ctx->iv, con.iv->iv, ivsize);
-
-		ctx->aead_assoclen = con.aead_assoclen;
-	}
-
-	while (size) {
-		size_t len = size;
-		struct scatterlist *sg = NULL;
-
-		/* use the existing memory in an allocated page */
-		if (ctx->merge) {
-			sg = sgl->sg + sgl->cur - 1;
-			len = min_t(unsigned long, len,
-				    PAGE_SIZE - sg->offset - sg->length);
-			err = memcpy_from_msg(page_address(sg_page(sg)) +
-					      sg->offset + sg->length,
-					      msg, len);
-			if (err)
-				goto unlock;
-
-			sg->length += len;
-			ctx->merge = (sg->offset + sg->length) &
-				     (PAGE_SIZE - 1);
-
-			ctx->used += len;
-			copied += len;
-			size -= len;
-			continue;
-		}
-
-		if (!aead_writable(sk)) {
-			/* user space sent too much data */
-			aead_put_sgl(sk);
-			err = -EMSGSIZE;
-			goto unlock;
-		}
-
-		/* allocate a new page */
-		len = min_t(unsigned long, size, aead_sndbuf(sk));
-		while (len) {
-			size_t plen = 0;
-
-			if (sgl->cur >= ALG_MAX_PAGES) {
-				aead_put_sgl(sk);
-				err = -E2BIG;
-				goto unlock;
-			}
-
-			sg = sgl->sg + sgl->cur;
-			plen = min_t(size_t, len, PAGE_SIZE);
-
-			sg_assign_page(sg, alloc_page(GFP_KERNEL));
-			err = -ENOMEM;
-			if (!sg_page(sg))
-				goto unlock;
-
-			err = memcpy_from_msg(page_address(sg_page(sg)),
-					      msg, plen);
-			if (err) {
-				__free_page(sg_page(sg));
-				sg_assign_page(sg, NULL);
-				goto unlock;
-			}
-
-			sg->offset = 0;
-			sg->length = plen;
-			len -= plen;
-			ctx->used += plen;
-			copied += plen;
-			sgl->cur++;
-			size -= plen;
-			ctx->merge = plen & (PAGE_SIZE - 1);
-		}
-	}
-
-	err = 0;
-
-	ctx->more = msg->msg_flags & MSG_MORE;
-	if (!ctx->more && !aead_sufficient_data(ctx)) {
-		aead_put_sgl(sk);
-		err = -EMSGSIZE;
-	}
-
-unlock:
-	aead_data_wakeup(sk);
-	release_sock(sk);
-
-	return err ?: copied;
-}
-
-static ssize_t aead_sendpage(struct socket *sock, struct page *page,
-			     int offset, size_t size, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct aead_sg_list *sgl = &ctx->tsgl;
-	int err = -EINVAL;
-
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		flags |= MSG_MORE;
-
-	if (sgl->cur >= ALG_MAX_PAGES)
-		return -E2BIG;
-
-	lock_sock(sk);
-	if (!ctx->more && ctx->used)
-		goto unlock;
-
-	if (!size)
-		goto done;
-
-	if (!aead_writable(sk)) {
-		/* user space sent too much data */
-		aead_put_sgl(sk);
-		err = -EMSGSIZE;
-		goto unlock;
-	}
-
-	ctx->merge = 0;
-
-	get_page(page);
-	sg_set_page(sgl->sg + sgl->cur, page, size, offset);
-	sgl->cur++;
-	ctx->used += size;
-
-	err = 0;
-
-done:
-	ctx->more = flags & MSG_MORE;
-	if (!ctx->more && !aead_sufficient_data(ctx)) {
-		aead_put_sgl(sk);
-		err = -EMSGSIZE;
-	}
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
+	struct aead_tfm *aeadc = pask->private;
+	struct crypto_aead *tfm = aeadc->aead;
+	unsigned int ivsize = crypto_aead_ivsize(tfm);
 
-unlock:
-	aead_data_wakeup(sk);
-	release_sock(sk);
-
-	return err ?: size;
+	return af_alg_sendmsg(sock, msg, size, ivsize);
 }
 
-#define GET_ASYM_REQ(req, tfm) (struct aead_async_req *) \
-		((char *)req + sizeof(struct aead_request) + \
-		 crypto_aead_reqsize(tfm))
-
- #define GET_REQ_SIZE(tfm) sizeof(struct aead_async_req) + \
-	crypto_aead_reqsize(tfm) + crypto_aead_ivsize(tfm) + \
-	sizeof(struct aead_request)
-
-static void aead_async_cb(struct crypto_async_request *_req, int err)
+static int crypto_aead_copy_sgl(struct crypto_skcipher *null_tfm,
+				struct scatterlist *src,
+				struct scatterlist *dst, unsigned int len)
 {
-	struct aead_request *req = _req->data;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aead_async_req *areq = GET_ASYM_REQ(req, tfm);
-	struct sock *sk = areq->sk;
-	struct scatterlist *sg = areq->tsgl;
-	struct aead_async_rsgl *rsgl;
-	struct kiocb *iocb = areq->iocb;
-	unsigned int i, reqlen = GET_REQ_SIZE(tfm);
-
-	list_for_each_entry(rsgl, &areq->list, list) {
-		af_alg_free_sg(&rsgl->sgl);
-		if (rsgl != &areq->first_rsgl)
-			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
-	}
+	SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
 
-	for (i = 0; i < areq->tsgls; i++)
-		put_page(sg_page(sg + i));
+	skcipher_request_set_tfm(skreq, null_tfm);
+	skcipher_request_set_callback(skreq, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      NULL, NULL);
+	skcipher_request_set_crypt(skreq, src, dst, len, NULL);
 
-	sock_kfree_s(sk, areq->tsgl, sizeof(*areq->tsgl) * areq->tsgls);
-	sock_kfree_s(sk, req, reqlen);
-	__sock_put(sk);
-	iocb->ki_complete(iocb, err, err);
+	return crypto_skcipher_encrypt(skreq);
 }
 
-static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
-			      int flags)
+static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
+			 size_t ignored, int flags)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(&ctx->aead_req);
-	struct aead_async_req *areq;
-	struct aead_request *req = NULL;
-	struct aead_sg_list *sgl = &ctx->tsgl;
-	struct aead_async_rsgl *last_rsgl = NULL, *rsgl;
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct aead_tfm *aeadc = pask->private;
+	struct crypto_aead *tfm = aeadc->aead;
+	struct crypto_skcipher *null_tfm = aeadc->null_tfm;
 	unsigned int as = crypto_aead_authsize(tfm);
-	unsigned int i, reqlen = GET_REQ_SIZE(tfm);
-	int err = -ENOMEM;
-	unsigned long used;
-	size_t outlen = 0;
-	size_t usedpages = 0;
-
-	lock_sock(sk);
-	if (ctx->more) {
-		err = aead_wait_for_data(sk, flags);
-		if (err)
-			goto unlock;
-	}
-
-	if (!aead_sufficient_data(ctx))
-		goto unlock;
-
-	used = ctx->used;
-	if (ctx->enc)
-		outlen = used + as;
-	else
-		outlen = used - as;
-
-	req = sock_kmalloc(sk, reqlen, GFP_KERNEL);
-	if (unlikely(!req))
-		goto unlock;
-
-	areq = GET_ASYM_REQ(req, tfm);
-	memset(&areq->first_rsgl, '\0', sizeof(areq->first_rsgl));
-	INIT_LIST_HEAD(&areq->list);
-	areq->iocb = msg->msg_iocb;
-	areq->sk = sk;
-	memcpy(areq->iv, ctx->iv, crypto_aead_ivsize(tfm));
-	aead_request_set_tfm(req, tfm);
-	aead_request_set_ad(req, ctx->aead_assoclen);
-	aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-				  aead_async_cb, req);
-	used -= ctx->aead_assoclen;
-
-	/* take over all tx sgls from ctx */
-	areq->tsgl = sock_kmalloc(sk,
-				  sizeof(*areq->tsgl) * max_t(u32, sgl->cur, 1),
-				  GFP_KERNEL);
-	if (unlikely(!areq->tsgl))
-		goto free;
-
-	sg_init_table(areq->tsgl, max_t(u32, sgl->cur, 1));
-	for (i = 0; i < sgl->cur; i++)
-		sg_set_page(&areq->tsgl[i], sg_page(&sgl->sg[i]),
-			    sgl->sg[i].length, sgl->sg[i].offset);
-
-	areq->tsgls = sgl->cur;
-
-	/* create rx sgls */
-	while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) {
-		size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
-				      (outlen - usedpages));
-
-		if (list_empty(&areq->list)) {
-			rsgl = &areq->first_rsgl;
-
-		} else {
-			rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
-			if (unlikely(!rsgl)) {
-				err = -ENOMEM;
-				goto free;
-			}
-		}
-		rsgl->sgl.npages = 0;
-		list_add_tail(&rsgl->list, &areq->list);
-
-		/* make one iovec available as scatterlist */
-		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
-		if (err < 0)
-			goto free;
-
-		usedpages += err;
-
-		/* chain the new scatterlist with previous one */
-		if (last_rsgl)
-			af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
-
-		last_rsgl = rsgl;
-
-		iov_iter_advance(&msg->msg_iter, err);
-	}
-
-	/* ensure output buffer is sufficiently large */
-	if (usedpages < outlen) {
-		err = -EINVAL;
-		goto unlock;
-	}
-
-	aead_request_set_crypt(req, areq->tsgl, areq->first_rsgl.sgl.sg, used,
-			       areq->iv);
-	err = ctx->enc ? crypto_aead_encrypt(req) : crypto_aead_decrypt(req);
-	if (err) {
-		if (err == -EINPROGRESS) {
-			sock_hold(sk);
-			err = -EIOCBQUEUED;
-			aead_reset_ctx(ctx);
-			goto unlock;
-		} else if (err == -EBADMSG) {
-			aead_put_sgl(sk);
-		}
-		goto free;
-	}
-	aead_put_sgl(sk);
-
-free:
-	list_for_each_entry(rsgl, &areq->list, list) {
-		af_alg_free_sg(&rsgl->sgl);
-		if (rsgl != &areq->first_rsgl)
-			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
-	}
-	if (areq->tsgl)
-		sock_kfree_s(sk, areq->tsgl, sizeof(*areq->tsgl) * areq->tsgls);
-	if (req)
-		sock_kfree_s(sk, req, reqlen);
-unlock:
-	aead_wmem_wakeup(sk);
-	release_sock(sk);
-	return err ? err : outlen;
-}
-
-static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
-	struct aead_sg_list *sgl = &ctx->tsgl;
-	struct aead_async_rsgl *last_rsgl = NULL;
-	struct aead_async_rsgl *rsgl, *tmp;
-	int err = -EINVAL;
-	unsigned long used = 0;
-	size_t outlen = 0;
-	size_t usedpages = 0;
-
-	lock_sock(sk);
+	struct af_alg_async_req *areq;
+	struct af_alg_tsgl *tsgl;
+	struct scatterlist *src;
+	int err = 0;
+	size_t used = 0;		/* [in]  TX bufs to be en/decrypted */
+	size_t outlen = 0;		/* [out] RX bufs produced by kernel */
+	size_t usedpages = 0;		/* [in]  RX bufs to be used from user */
+	size_t processed = 0;		/* [in]  TX bufs to be consumed */
 
 	/*
-	 * Please see documentation of aead_request_set_crypt for the
-	 * description of the AEAD memory structure expected from the caller.
+	 * Data length provided by caller via sendmsg/sendpage that has not
+	 * yet been processed.
 	 */
-
-	if (ctx->more) {
-		err = aead_wait_for_data(sk, flags);
-		if (err)
-			goto unlock;
-	}
-
-	/* data length provided by caller via sendmsg/sendpage */
 	used = ctx->used;
 
 	/*
@@ -584,8 +126,8 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
 	 * the error message in sendmsg/sendpage and still call recvmsg. This
 	 * check here protects the kernel integrity.
 	 */
-	if (!aead_sufficient_data(ctx))
-		goto unlock;
+	if (!aead_sufficient_data(sk))
+		return -EINVAL;
 
 	/*
 	 * Calculate the minimum output buffer size holding the result of the
@@ -606,104 +148,191 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
 	 */
 	used -= ctx->aead_assoclen;
 
-	/* convert iovecs of output buffers into scatterlists */
-	while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) {
-		size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
-				      (outlen - usedpages));
-
-		if (list_empty(&ctx->list)) {
-			rsgl = &ctx->first_rsgl;
-		} else {
-			rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
-			if (unlikely(!rsgl)) {
-				err = -ENOMEM;
-				goto unlock;
-			}
-		}
-		rsgl->sgl.npages = 0;
-		list_add_tail(&rsgl->list, &ctx->list);
+	/* Allocate cipher request for current operation. */
+	areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
+				     crypto_aead_reqsize(tfm));
+	if (IS_ERR(areq))
+		return PTR_ERR(areq);
 
-		/* make one iovec available as scatterlist */
-		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
-		if (err < 0)
-			goto unlock;
-		usedpages += err;
-		/* chain the new scatterlist with previous one */
-		if (last_rsgl)
-			af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
+	/* convert iovecs of output buffers into RX SGL */
+	err = af_alg_get_rsgl(sk, msg, flags, areq, outlen, &usedpages);
+	if (err)
+		goto free;
 
-		last_rsgl = rsgl;
+	/*
+	 * Ensure output buffer is sufficiently large. If the caller provides
+	 * less buffer space, only use the relative required input size. This
+	 * allows AIO operation where the caller sent all data to be processed
+	 * and the AIO operation performs the operation on the different chunks
+	 * of the input data.
+	 */
+	if (usedpages < outlen) {
+		size_t less = outlen - usedpages;
 
-		iov_iter_advance(&msg->msg_iter, err);
+		if (used < less) {
+			err = -EINVAL;
+			goto free;
+		}
+		used -= less;
+		outlen -= less;
 	}
 
-	/* ensure output buffer is sufficiently large */
-	if (usedpages < outlen) {
-		err = -EINVAL;
-		goto unlock;
-	}
+	processed = used + ctx->aead_assoclen;
+	tsgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl, list);
 
-	sg_mark_end(sgl->sg + sgl->cur - 1);
-	aead_request_set_crypt(&ctx->aead_req, sgl->sg, ctx->first_rsgl.sgl.sg,
-			       used, ctx->iv);
-	aead_request_set_ad(&ctx->aead_req, ctx->aead_assoclen);
+	/*
+	 * Copy of AAD from source to destination
+	 *
+	 * The AAD is copied to the destination buffer without change. Even
+	 * when user space uses an in-place cipher operation, the kernel
+	 * will copy the data as it does not see whether such in-place operation
+	 * is initiated.
+	 *
+	 * To ensure efficiency, the following implementation ensure that the
+	 * ciphers are invoked to perform a crypto operation in-place. This
+	 * is achieved by memory management specified as follows.
+	 */
 
-	err = af_alg_wait_for_completion(ctx->enc ?
-					 crypto_aead_encrypt(&ctx->aead_req) :
-					 crypto_aead_decrypt(&ctx->aead_req),
-					 &ctx->completion);
+	/* Use the RX SGL as source (and destination) for crypto op. */
+	src = areq->first_rsgl.sgl.sg;
+
+	if (ctx->enc) {
+		/*
+		 * Encryption operation - The in-place cipher operation is
+		 * achieved by the following operation:
+		 *
+		 * TX SGL: AAD || PT
+		 *	    |	   |
+		 *	    | copy |
+		 *	    v	   v
+		 * RX SGL: AAD || PT || Tag
+		 */
+		err = crypto_aead_copy_sgl(null_tfm, tsgl->sg,
+					   areq->first_rsgl.sgl.sg, processed);
+		if (err)
+			goto free;
+		af_alg_pull_tsgl(sk, processed, NULL, 0);
+	} else {
+		/*
+		 * Decryption operation - To achieve an in-place cipher
+		 * operation, the following  SGL structure is used:
+		 *
+		 * TX SGL: AAD || CT || Tag
+		 *	    |	   |	 ^
+		 *	    | copy |	 | Create SGL link.
+		 *	    v	   v	 |
+		 * RX SGL: AAD || CT ----+
+		 */
+
+		 /* Copy AAD || CT to RX SGL buffer for in-place operation. */
+		err = crypto_aead_copy_sgl(null_tfm, tsgl->sg,
+					   areq->first_rsgl.sgl.sg, outlen);
+		if (err)
+			goto free;
 
-	if (err) {
-		/* EBADMSG implies a valid cipher operation took place */
-		if (err == -EBADMSG)
-			aead_put_sgl(sk);
+		/* Create TX SGL for tag and chain it to RX SGL. */
+		areq->tsgl_entries = af_alg_count_tsgl(sk, processed,
+						       processed - as);
+		if (!areq->tsgl_entries)
+			areq->tsgl_entries = 1;
+		areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) *
+					      areq->tsgl_entries,
+					  GFP_KERNEL);
+		if (!areq->tsgl) {
+			err = -ENOMEM;
+			goto free;
+		}
+		sg_init_table(areq->tsgl, areq->tsgl_entries);
+
+		/* Release TX SGL, except for tag data and reassign tag data. */
+		af_alg_pull_tsgl(sk, processed, areq->tsgl, processed - as);
+
+		/* chain the areq TX SGL holding the tag with RX SGL */
+		if (usedpages) {
+			/* RX SGL present */
+			struct af_alg_sgl *sgl_prev = &areq->last_rsgl->sgl;
+
+			sg_unmark_end(sgl_prev->sg + sgl_prev->npages - 1);
+			sg_chain(sgl_prev->sg, sgl_prev->npages + 1,
+				 areq->tsgl);
+		} else
+			/* no RX SGL present (e.g. authentication only) */
+			src = areq->tsgl;
+	}
 
-		goto unlock;
+	/* Initialize the crypto operation */
+	aead_request_set_crypt(&areq->cra_u.aead_req, src,
+			       areq->first_rsgl.sgl.sg, used, ctx->iv);
+	aead_request_set_ad(&areq->cra_u.aead_req, ctx->aead_assoclen);
+	aead_request_set_tfm(&areq->cra_u.aead_req, tfm);
+
+	if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) {
+		/* AIO operation */
+		areq->iocb = msg->msg_iocb;
+		aead_request_set_callback(&areq->cra_u.aead_req,
+					  CRYPTO_TFM_REQ_MAY_BACKLOG,
+					  af_alg_async_cb, areq);
+		err = ctx->enc ? crypto_aead_encrypt(&areq->cra_u.aead_req) :
+				 crypto_aead_decrypt(&areq->cra_u.aead_req);
+	} else {
+		/* Synchronous operation */
+		aead_request_set_callback(&areq->cra_u.aead_req,
+					  CRYPTO_TFM_REQ_MAY_BACKLOG,
+					  af_alg_complete, &ctx->completion);
+		err = af_alg_wait_for_completion(ctx->enc ?
+				crypto_aead_encrypt(&areq->cra_u.aead_req) :
+				crypto_aead_decrypt(&areq->cra_u.aead_req),
+						 &ctx->completion);
 	}
 
-	aead_put_sgl(sk);
-	err = 0;
+	/* AIO operation in progress */
+	if (err == -EINPROGRESS) {
+		sock_hold(sk);
 
-unlock:
-	list_for_each_entry_safe(rsgl, tmp, &ctx->list, list) {
-		af_alg_free_sg(&rsgl->sgl);
-		list_del(&rsgl->list);
-		if (rsgl != &ctx->first_rsgl)
-			sock_kfree_s(sk, rsgl, sizeof(*rsgl));
+		/* Remember output size that will be generated. */
+		areq->outlen = outlen;
+
+		return -EIOCBQUEUED;
 	}
-	INIT_LIST_HEAD(&ctx->list);
-	aead_wmem_wakeup(sk);
-	release_sock(sk);
 
-	return err ? err : outlen;
-}
+free:
+	af_alg_free_areq_sgls(areq);
+	sock_kfree_s(sk, areq, areq->areqlen);
 
-static int aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t ignored,
-			int flags)
-{
-	return (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) ?
-		aead_recvmsg_async(sock, msg, flags) :
-		aead_recvmsg_sync(sock, msg, flags);
+	return err ? err : outlen;
 }
 
-static unsigned int aead_poll(struct file *file, struct socket *sock,
-			      poll_table *wait)
+static int aead_recvmsg(struct socket *sock, struct msghdr *msg,
+			size_t ignored, int flags)
 {
 	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	unsigned int mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	int ret = 0;
 
-	if (!ctx->more)
-		mask |= POLLIN | POLLRDNORM;
+	lock_sock(sk);
+	while (msg_data_left(msg)) {
+		int err = _aead_recvmsg(sock, msg, ignored, flags);
+
+		/*
+		 * This error covers -EIOCBQUEUED which implies that we can
+		 * only handle one AIO request. If the caller wants to have
+		 * multiple AIO requests in parallel, he must make multiple
+		 * separate AIO calls.
+		 *
+		 * Also return the error if no data has been processed so far.
+		 */
+		if (err <= 0) {
+			if (err == -EIOCBQUEUED || err == -EBADMSG || !ret)
+				ret = err;
+			goto out;
+		}
 
-	if (aead_writable(sk))
-		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+		ret += err;
+	}
 
-	return mask;
+out:
+	af_alg_wmem_wakeup(sk);
+	release_sock(sk);
+	return ret;
 }
 
 static struct proto_ops algif_aead_ops = {
@@ -723,9 +352,9 @@ static struct proto_ops algif_aead_ops = {
 
 	.release	=	af_alg_release,
 	.sendmsg	=	aead_sendmsg,
-	.sendpage	=	aead_sendpage,
+	.sendpage	=	af_alg_sendpage,
 	.recvmsg	=	aead_recvmsg,
-	.poll		=	aead_poll,
+	.poll		=	af_alg_poll,
 };
 
 static int aead_check_key(struct socket *sock)
@@ -787,7 +416,7 @@ static ssize_t aead_sendpage_nokey(struct socket *sock, struct page *page,
 	if (err)
 		return err;
 
-	return aead_sendpage(sock, page, offset, size, flags);
+	return af_alg_sendpage(sock, page, offset, size, flags);
 }
 
 static int aead_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
@@ -821,13 +450,14 @@ static struct proto_ops algif_aead_ops_nokey = {
 	.sendmsg	=	aead_sendmsg_nokey,
 	.sendpage	=	aead_sendpage_nokey,
 	.recvmsg	=	aead_recvmsg_nokey,
-	.poll		=	aead_poll,
+	.poll		=	af_alg_poll,
 };
 
 static void *aead_bind(const char *name, u32 type, u32 mask)
 {
 	struct aead_tfm *tfm;
 	struct crypto_aead *aead;
+	struct crypto_skcipher *null_tfm;
 
 	tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
 	if (!tfm)
@@ -839,7 +469,15 @@ static void *aead_bind(const char *name, u32 type, u32 mask)
 		return ERR_CAST(aead);
 	}
 
+	null_tfm = crypto_get_default_null_skcipher2();
+	if (IS_ERR(null_tfm)) {
+		crypto_free_aead(aead);
+		kfree(tfm);
+		return ERR_CAST(null_tfm);
+	}
+
 	tfm->aead = aead;
+	tfm->null_tfm = null_tfm;
 
 	return tfm;
 }
@@ -873,12 +511,15 @@ static int aead_setkey(void *private, const u8 *key, unsigned int keylen)
 static void aead_sock_destruct(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	unsigned int ivlen = crypto_aead_ivsize(
-				crypto_aead_reqtfm(&ctx->aead_req));
-
-	WARN_ON(refcount_read(&sk->sk_refcnt) != 0);
-	aead_put_sgl(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
+	struct aead_tfm *aeadc = pask->private;
+	struct crypto_aead *tfm = aeadc->aead;
+	unsigned int ivlen = crypto_aead_ivsize(tfm);
+
+	af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
+	crypto_put_default_null_skcipher2();
 	sock_kzfree_s(sk, ctx->iv, ivlen);
 	sock_kfree_s(sk, ctx, ctx->len);
 	af_alg_release_parent(sk);
@@ -886,11 +527,11 @@ static void aead_sock_destruct(struct sock *sk)
 
 static int aead_accept_parent_nokey(void *private, struct sock *sk)
 {
-	struct aead_ctx *ctx;
+	struct af_alg_ctx *ctx;
 	struct alg_sock *ask = alg_sk(sk);
 	struct aead_tfm *tfm = private;
 	struct crypto_aead *aead = tfm->aead;
-	unsigned int len = sizeof(*ctx) + crypto_aead_reqsize(aead);
+	unsigned int len = sizeof(*ctx);
 	unsigned int ivlen = crypto_aead_ivsize(aead);
 
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
@@ -905,23 +546,18 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk)
 	}
 	memset(ctx->iv, 0, ivlen);
 
+	INIT_LIST_HEAD(&ctx->tsgl_list);
 	ctx->len = len;
 	ctx->used = 0;
+	ctx->rcvused = 0;
 	ctx->more = 0;
 	ctx->merge = 0;
 	ctx->enc = 0;
-	ctx->tsgl.cur = 0;
 	ctx->aead_assoclen = 0;
 	af_alg_init_completion(&ctx->completion);
-	sg_init_table(ctx->tsgl.sg, ALG_MAX_PAGES);
-	INIT_LIST_HEAD(&ctx->list);
 
 	ask->private = ctx;
 
-	aead_request_set_tfm(&ctx->aead_req, aead);
-	aead_request_set_callback(&ctx->aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-				  af_alg_complete, &ctx->completion);
-
 	sk->sk_destruct = aead_sock_destruct;
 
 	return 0;

+ 132 - 702
crypto/algif_skcipher.c

@@ -10,6 +10,21 @@
  * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
  *
+ * The following concept of the memory management is used:
+ *
+ * The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
+ * filled by user space with the data submitted via sendpage/sendmsg. Filling
+ * up the TX SGL does not cause a crypto operation -- the data will only be
+ * tracked by the kernel. Upon receipt of one recvmsg call, the caller must
+ * provide a buffer which is tracked with the RX SGL.
+ *
+ * During the processing of the recvmsg operation, the cipher request is
+ * allocated and prepared. As part of the recvmsg operation, the processed
+ * TX buffers are extracted from the TX SGL into a separate SGL.
+ *
+ * After the completion of the crypto operation, the RX SGL and the cipher
+ * request is released. The extracted TX SGL parts are released together with
+ * the RX SGL release.
  */
 
 #include <crypto/scatterwalk.h>
@@ -18,284 +33,16 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
-#include <linux/sched/signal.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/net.h>
 #include <net/sock.h>
 
-struct skcipher_sg_list {
-	struct list_head list;
-
-	int cur;
-
-	struct scatterlist sg[0];
-};
-
 struct skcipher_tfm {
 	struct crypto_skcipher *skcipher;
 	bool has_key;
 };
 
-struct skcipher_ctx {
-	struct list_head tsgl;
-	struct af_alg_sgl rsgl;
-
-	void *iv;
-
-	struct af_alg_completion completion;
-
-	atomic_t inflight;
-	size_t used;
-
-	unsigned int len;
-	bool more;
-	bool merge;
-	bool enc;
-
-	struct skcipher_request req;
-};
-
-struct skcipher_async_rsgl {
-	struct af_alg_sgl sgl;
-	struct list_head list;
-};
-
-struct skcipher_async_req {
-	struct kiocb *iocb;
-	struct skcipher_async_rsgl first_sgl;
-	struct list_head list;
-	struct scatterlist *tsg;
-	atomic_t *inflight;
-	struct skcipher_request req;
-};
-
-#define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \
-		      sizeof(struct scatterlist) - 1)
-
-static void skcipher_free_async_sgls(struct skcipher_async_req *sreq)
-{
-	struct skcipher_async_rsgl *rsgl, *tmp;
-	struct scatterlist *sgl;
-	struct scatterlist *sg;
-	int i, n;
-
-	list_for_each_entry_safe(rsgl, tmp, &sreq->list, list) {
-		af_alg_free_sg(&rsgl->sgl);
-		if (rsgl != &sreq->first_sgl)
-			kfree(rsgl);
-	}
-	sgl = sreq->tsg;
-	n = sg_nents(sgl);
-	for_each_sg(sgl, sg, n, i) {
-		struct page *page = sg_page(sg);
-
-		/* some SGs may not have a page mapped */
-		if (page && page_ref_count(page))
-			put_page(page);
-	}
-
-	kfree(sreq->tsg);
-}
-
-static void skcipher_async_cb(struct crypto_async_request *req, int err)
-{
-	struct skcipher_async_req *sreq = req->data;
-	struct kiocb *iocb = sreq->iocb;
-
-	atomic_dec(sreq->inflight);
-	skcipher_free_async_sgls(sreq);
-	kzfree(sreq);
-	iocb->ki_complete(iocb, err, err);
-}
-
-static inline int skcipher_sndbuf(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-
-	return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
-			  ctx->used, 0);
-}
-
-static inline bool skcipher_writable(struct sock *sk)
-{
-	return PAGE_SIZE <= skcipher_sndbuf(sk);
-}
-
-static int skcipher_alloc_sgl(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_sg_list *sgl;
-	struct scatterlist *sg = NULL;
-
-	sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
-	if (!list_empty(&ctx->tsgl))
-		sg = sgl->sg;
-
-	if (!sg || sgl->cur >= MAX_SGL_ENTS) {
-		sgl = sock_kmalloc(sk, sizeof(*sgl) +
-				       sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1),
-				   GFP_KERNEL);
-		if (!sgl)
-			return -ENOMEM;
-
-		sg_init_table(sgl->sg, MAX_SGL_ENTS + 1);
-		sgl->cur = 0;
-
-		if (sg)
-			sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg);
-
-		list_add_tail(&sgl->list, &ctx->tsgl);
-	}
-
-	return 0;
-}
-
-static void skcipher_pull_sgl(struct sock *sk, size_t used, int put)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_sg_list *sgl;
-	struct scatterlist *sg;
-	int i;
-
-	while (!list_empty(&ctx->tsgl)) {
-		sgl = list_first_entry(&ctx->tsgl, struct skcipher_sg_list,
-				       list);
-		sg = sgl->sg;
-
-		for (i = 0; i < sgl->cur; i++) {
-			size_t plen = min_t(size_t, used, sg[i].length);
-
-			if (!sg_page(sg + i))
-				continue;
-
-			sg[i].length -= plen;
-			sg[i].offset += plen;
-
-			used -= plen;
-			ctx->used -= plen;
-
-			if (sg[i].length)
-				return;
-			if (put)
-				put_page(sg_page(sg + i));
-			sg_assign_page(sg + i, NULL);
-		}
-
-		list_del(&sgl->list);
-		sock_kfree_s(sk, sgl,
-			     sizeof(*sgl) + sizeof(sgl->sg[0]) *
-					    (MAX_SGL_ENTS + 1));
-	}
-
-	if (!ctx->used)
-		ctx->merge = 0;
-}
-
-static void skcipher_free_sgl(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-
-	skcipher_pull_sgl(sk, ctx->used, 1);
-}
-
-static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	int err = -ERESTARTSYS;
-	long timeout;
-
-	if (flags & MSG_DONTWAIT)
-		return -EAGAIN;
-
-	sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
-	add_wait_queue(sk_sleep(sk), &wait);
-	for (;;) {
-		if (signal_pending(current))
-			break;
-		timeout = MAX_SCHEDULE_TIMEOUT;
-		if (sk_wait_event(sk, &timeout, skcipher_writable(sk), &wait)) {
-			err = 0;
-			break;
-		}
-	}
-	remove_wait_queue(sk_sleep(sk), &wait);
-
-	return err;
-}
-
-static void skcipher_wmem_wakeup(struct sock *sk)
-{
-	struct socket_wq *wq;
-
-	if (!skcipher_writable(sk))
-		return;
-
-	rcu_read_lock();
-	wq = rcu_dereference(sk->sk_wq);
-	if (skwq_has_sleeper(wq))
-		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
-							   POLLRDNORM |
-							   POLLRDBAND);
-	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-	rcu_read_unlock();
-}
-
-static int skcipher_wait_for_data(struct sock *sk, unsigned flags)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	long timeout;
-	int err = -ERESTARTSYS;
-
-	if (flags & MSG_DONTWAIT) {
-		return -EAGAIN;
-	}
-
-	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
-	add_wait_queue(sk_sleep(sk), &wait);
-	for (;;) {
-		if (signal_pending(current))
-			break;
-		timeout = MAX_SCHEDULE_TIMEOUT;
-		if (sk_wait_event(sk, &timeout, ctx->used, &wait)) {
-			err = 0;
-			break;
-		}
-	}
-	remove_wait_queue(sk_sleep(sk), &wait);
-
-	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
-	return err;
-}
-
-static void skcipher_data_wakeup(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct socket_wq *wq;
-
-	if (!ctx->used)
-		return;
-
-	rcu_read_lock();
-	wq = rcu_dereference(sk->sk_wq);
-	if (skwq_has_sleeper(wq))
-		wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
-							   POLLRDNORM |
-							   POLLRDBAND);
-	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
-	rcu_read_unlock();
-}
-
 static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 			    size_t size)
 {
@@ -303,445 +50,143 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg,
 	struct alg_sock *ask = alg_sk(sk);
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
-	struct skcipher_ctx *ctx = ask->private;
 	struct skcipher_tfm *skc = pask->private;
 	struct crypto_skcipher *tfm = skc->skcipher;
 	unsigned ivsize = crypto_skcipher_ivsize(tfm);
-	struct skcipher_sg_list *sgl;
-	struct af_alg_control con = {};
-	long copied = 0;
-	bool enc = 0;
-	bool init = 0;
-	int err;
-	int i;
-
-	if (msg->msg_controllen) {
-		err = af_alg_cmsg_send(msg, &con);
-		if (err)
-			return err;
-
-		init = 1;
-		switch (con.op) {
-		case ALG_OP_ENCRYPT:
-			enc = 1;
-			break;
-		case ALG_OP_DECRYPT:
-			enc = 0;
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		if (con.iv && con.iv->ivlen != ivsize)
-			return -EINVAL;
-	}
-
-	err = -EINVAL;
-
-	lock_sock(sk);
-	if (!ctx->more && ctx->used)
-		goto unlock;
-
-	if (init) {
-		ctx->enc = enc;
-		if (con.iv)
-			memcpy(ctx->iv, con.iv->iv, ivsize);
-	}
-
-	while (size) {
-		struct scatterlist *sg;
-		unsigned long len = size;
-		size_t plen;
-
-		if (ctx->merge) {
-			sgl = list_entry(ctx->tsgl.prev,
-					 struct skcipher_sg_list, list);
-			sg = sgl->sg + sgl->cur - 1;
-			len = min_t(unsigned long, len,
-				    PAGE_SIZE - sg->offset - sg->length);
-
-			err = memcpy_from_msg(page_address(sg_page(sg)) +
-					      sg->offset + sg->length,
-					      msg, len);
-			if (err)
-				goto unlock;
-
-			sg->length += len;
-			ctx->merge = (sg->offset + sg->length) &
-				     (PAGE_SIZE - 1);
-
-			ctx->used += len;
-			copied += len;
-			size -= len;
-			continue;
-		}
 
-		if (!skcipher_writable(sk)) {
-			err = skcipher_wait_for_wmem(sk, msg->msg_flags);
-			if (err)
-				goto unlock;
-		}
-
-		len = min_t(unsigned long, len, skcipher_sndbuf(sk));
-
-		err = skcipher_alloc_sgl(sk);
-		if (err)
-			goto unlock;
-
-		sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
-		sg = sgl->sg;
-		if (sgl->cur)
-			sg_unmark_end(sg + sgl->cur - 1);
-		do {
-			i = sgl->cur;
-			plen = min_t(size_t, len, PAGE_SIZE);
-
-			sg_assign_page(sg + i, alloc_page(GFP_KERNEL));
-			err = -ENOMEM;
-			if (!sg_page(sg + i))
-				goto unlock;
-
-			err = memcpy_from_msg(page_address(sg_page(sg + i)),
-					      msg, plen);
-			if (err) {
-				__free_page(sg_page(sg + i));
-				sg_assign_page(sg + i, NULL);
-				goto unlock;
-			}
-
-			sg[i].length = plen;
-			len -= plen;
-			ctx->used += plen;
-			copied += plen;
-			size -= plen;
-			sgl->cur++;
-		} while (len && sgl->cur < MAX_SGL_ENTS);
-
-		if (!size)
-			sg_mark_end(sg + sgl->cur - 1);
-
-		ctx->merge = plen & (PAGE_SIZE - 1);
-	}
-
-	err = 0;
-
-	ctx->more = msg->msg_flags & MSG_MORE;
-
-unlock:
-	skcipher_data_wakeup(sk);
-	release_sock(sk);
-
-	return copied ?: err;
+	return af_alg_sendmsg(sock, msg, size, ivsize);
 }
 
-static ssize_t skcipher_sendpage(struct socket *sock, struct page *page,
-				 int offset, size_t size, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_sg_list *sgl;
-	int err = -EINVAL;
-
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		flags |= MSG_MORE;
-
-	lock_sock(sk);
-	if (!ctx->more && ctx->used)
-		goto unlock;
-
-	if (!size)
-		goto done;
-
-	if (!skcipher_writable(sk)) {
-		err = skcipher_wait_for_wmem(sk, flags);
-		if (err)
-			goto unlock;
-	}
-
-	err = skcipher_alloc_sgl(sk);
-	if (err)
-		goto unlock;
-
-	ctx->merge = 0;
-	sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
-
-	if (sgl->cur)
-		sg_unmark_end(sgl->sg + sgl->cur - 1);
-
-	sg_mark_end(sgl->sg + sgl->cur);
-	get_page(page);
-	sg_set_page(sgl->sg + sgl->cur, page, size, offset);
-	sgl->cur++;
-	ctx->used += size;
-
-done:
-	ctx->more = flags & MSG_MORE;
-
-unlock:
-	skcipher_data_wakeup(sk);
-	release_sock(sk);
-
-	return err ?: size;
-}
-
-static int skcipher_all_sg_nents(struct skcipher_ctx *ctx)
-{
-	struct skcipher_sg_list *sgl;
-	struct scatterlist *sg;
-	int nents = 0;
-
-	list_for_each_entry(sgl, &ctx->tsgl, list) {
-		sg = sgl->sg;
-
-		while (!sg->length)
-			sg++;
-
-		nents += sg_nents(sg);
-	}
-	return nents;
-}
-
-static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
-				  int flags)
+static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
+			     size_t ignored, int flags)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct sock *psk = ask->parent;
 	struct alg_sock *pask = alg_sk(psk);
-	struct skcipher_ctx *ctx = ask->private;
+	struct af_alg_ctx *ctx = ask->private;
 	struct skcipher_tfm *skc = pask->private;
 	struct crypto_skcipher *tfm = skc->skcipher;
-	struct skcipher_sg_list *sgl;
-	struct scatterlist *sg;
-	struct skcipher_async_req *sreq;
-	struct skcipher_request *req;
-	struct skcipher_async_rsgl *last_rsgl = NULL;
-	unsigned int txbufs = 0, len = 0, tx_nents;
-	unsigned int reqsize = crypto_skcipher_reqsize(tfm);
-	unsigned int ivsize = crypto_skcipher_ivsize(tfm);
-	int err = -ENOMEM;
-	bool mark = false;
-	char *iv;
-
-	sreq = kzalloc(sizeof(*sreq) + reqsize + ivsize, GFP_KERNEL);
-	if (unlikely(!sreq))
-		goto out;
-
-	req = &sreq->req;
-	iv = (char *)(req + 1) + reqsize;
-	sreq->iocb = msg->msg_iocb;
-	INIT_LIST_HEAD(&sreq->list);
-	sreq->inflight = &ctx->inflight;
+	unsigned int bs = crypto_skcipher_blocksize(tfm);
+	struct af_alg_async_req *areq;
+	int err = 0;
+	size_t len = 0;
 
-	lock_sock(sk);
-	tx_nents = skcipher_all_sg_nents(ctx);
-	sreq->tsg = kcalloc(tx_nents, sizeof(*sg), GFP_KERNEL);
-	if (unlikely(!sreq->tsg))
-		goto unlock;
-	sg_init_table(sreq->tsg, tx_nents);
-	memcpy(iv, ctx->iv, ivsize);
-	skcipher_request_set_tfm(req, tfm);
-	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
-				      skcipher_async_cb, sreq);
-
-	while (iov_iter_count(&msg->msg_iter)) {
-		struct skcipher_async_rsgl *rsgl;
-		int used;
-
-		if (!ctx->used) {
-			err = skcipher_wait_for_data(sk, flags);
-			if (err)
-				goto free;
-		}
-		sgl = list_first_entry(&ctx->tsgl,
-				       struct skcipher_sg_list, list);
-		sg = sgl->sg;
-
-		while (!sg->length)
-			sg++;
-
-		used = min_t(unsigned long, ctx->used,
-			     iov_iter_count(&msg->msg_iter));
-		used = min_t(unsigned long, used, sg->length);
-
-		if (txbufs == tx_nents) {
-			struct scatterlist *tmp;
-			int x;
-			/* Ran out of tx slots in async request
-			 * need to expand */
-			tmp = kcalloc(tx_nents * 2, sizeof(*tmp),
-				      GFP_KERNEL);
-			if (!tmp) {
-				err = -ENOMEM;
-				goto free;
-			}
-
-			sg_init_table(tmp, tx_nents * 2);
-			for (x = 0; x < tx_nents; x++)
-				sg_set_page(&tmp[x], sg_page(&sreq->tsg[x]),
-					    sreq->tsg[x].length,
-					    sreq->tsg[x].offset);
-			kfree(sreq->tsg);
-			sreq->tsg = tmp;
-			tx_nents *= 2;
-			mark = true;
-		}
-		/* Need to take over the tx sgl from ctx
-		 * to the asynch req - these sgls will be freed later */
-		sg_set_page(sreq->tsg + txbufs++, sg_page(sg), sg->length,
-			    sg->offset);
-
-		if (list_empty(&sreq->list)) {
-			rsgl = &sreq->first_sgl;
-			list_add_tail(&rsgl->list, &sreq->list);
-		} else {
-			rsgl = kmalloc(sizeof(*rsgl), GFP_KERNEL);
-			if (!rsgl) {
-				err = -ENOMEM;
-				goto free;
-			}
-			list_add_tail(&rsgl->list, &sreq->list);
-		}
+	/* Allocate cipher request for current operation. */
+	areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
+				     crypto_skcipher_reqsize(tfm));
+	if (IS_ERR(areq))
+		return PTR_ERR(areq);
 
-		used = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, used);
-		err = used;
-		if (used < 0)
-			goto free;
-		if (last_rsgl)
-			af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
-
-		last_rsgl = rsgl;
-		len += used;
-		skcipher_pull_sgl(sk, used, 0);
-		iov_iter_advance(&msg->msg_iter, used);
+	/* convert iovecs of output buffers into RX SGL */
+	err = af_alg_get_rsgl(sk, msg, flags, areq, -1, &len);
+	if (err)
+		goto free;
+
+	/* Process only as much RX buffers for which we have TX data */
+	if (len > ctx->used)
+		len = ctx->used;
+
+	/*
+	 * If more buffers are to be expected to be processed, process only
+	 * full block size buffers.
+	 */
+	if (ctx->more || len < ctx->used)
+		len -= len % bs;
+
+	/*
+	 * Create a per request TX SGL for this request which tracks the
+	 * SG entries from the global TX SGL.
+	 */
+	areq->tsgl_entries = af_alg_count_tsgl(sk, len, 0);
+	if (!areq->tsgl_entries)
+		areq->tsgl_entries = 1;
+	areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * areq->tsgl_entries,
+				  GFP_KERNEL);
+	if (!areq->tsgl) {
+		err = -ENOMEM;
+		goto free;
+	}
+	sg_init_table(areq->tsgl, areq->tsgl_entries);
+	af_alg_pull_tsgl(sk, len, areq->tsgl, 0);
+
+	/* Initialize the crypto operation */
+	skcipher_request_set_tfm(&areq->cra_u.skcipher_req, tfm);
+	skcipher_request_set_crypt(&areq->cra_u.skcipher_req, areq->tsgl,
+				   areq->first_rsgl.sgl.sg, len, ctx->iv);
+
+	if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) {
+		/* AIO operation */
+		areq->iocb = msg->msg_iocb;
+		skcipher_request_set_callback(&areq->cra_u.skcipher_req,
+					      CRYPTO_TFM_REQ_MAY_SLEEP,
+					      af_alg_async_cb, areq);
+		err = ctx->enc ?
+			crypto_skcipher_encrypt(&areq->cra_u.skcipher_req) :
+			crypto_skcipher_decrypt(&areq->cra_u.skcipher_req);
+	} else {
+		/* Synchronous operation */
+		skcipher_request_set_callback(&areq->cra_u.skcipher_req,
+					      CRYPTO_TFM_REQ_MAY_SLEEP |
+					      CRYPTO_TFM_REQ_MAY_BACKLOG,
+					      af_alg_complete,
+					      &ctx->completion);
+		err = af_alg_wait_for_completion(ctx->enc ?
+			crypto_skcipher_encrypt(&areq->cra_u.skcipher_req) :
+			crypto_skcipher_decrypt(&areq->cra_u.skcipher_req),
+						 &ctx->completion);
 	}
 
-	if (mark)
-		sg_mark_end(sreq->tsg + txbufs - 1);
-
-	skcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg,
-				   len, iv);
-	err = ctx->enc ? crypto_skcipher_encrypt(req) :
-			 crypto_skcipher_decrypt(req);
+	/* AIO operation in progress */
 	if (err == -EINPROGRESS) {
-		atomic_inc(&ctx->inflight);
-		err = -EIOCBQUEUED;
-		sreq = NULL;
-		goto unlock;
+		sock_hold(sk);
+
+		/* Remember output size that will be generated. */
+		areq->outlen = len;
+
+		return -EIOCBQUEUED;
 	}
+
 free:
-	skcipher_free_async_sgls(sreq);
-unlock:
-	skcipher_wmem_wakeup(sk);
-	release_sock(sk);
-	kzfree(sreq);
-out:
-	return err;
+	af_alg_free_areq_sgls(areq);
+	sock_kfree_s(sk, areq, areq->areqlen);
+
+	return err ? err : len;
 }
 
-static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
-				 int flags)
+static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
+			    size_t ignored, int flags)
 {
 	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct sock *psk = ask->parent;
-	struct alg_sock *pask = alg_sk(psk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct skcipher_tfm *skc = pask->private;
-	struct crypto_skcipher *tfm = skc->skcipher;
-	unsigned bs = crypto_skcipher_blocksize(tfm);
-	struct skcipher_sg_list *sgl;
-	struct scatterlist *sg;
-	int err = -EAGAIN;
-	int used;
-	long copied = 0;
+	int ret = 0;
 
 	lock_sock(sk);
 	while (msg_data_left(msg)) {
-		if (!ctx->used) {
-			err = skcipher_wait_for_data(sk, flags);
-			if (err)
-				goto unlock;
+		int err = _skcipher_recvmsg(sock, msg, ignored, flags);
+
+		/*
+		 * This error covers -EIOCBQUEUED which implies that we can
+		 * only handle one AIO request. If the caller wants to have
+		 * multiple AIO requests in parallel, he must make multiple
+		 * separate AIO calls.
+		 *
+		 * Also return the error if no data has been processed so far.
+		 */
+		if (err <= 0) {
+			if (err == -EIOCBQUEUED || !ret)
+				ret = err;
+			goto out;
 		}
 
-		used = min_t(unsigned long, ctx->used, msg_data_left(msg));
-
-		used = af_alg_make_sg(&ctx->rsgl, &msg->msg_iter, used);
-		err = used;
-		if (err < 0)
-			goto unlock;
-
-		if (ctx->more || used < ctx->used)
-			used -= used % bs;
-
-		err = -EINVAL;
-		if (!used)
-			goto free;
-
-		sgl = list_first_entry(&ctx->tsgl,
-				       struct skcipher_sg_list, list);
-		sg = sgl->sg;
-
-		while (!sg->length)
-			sg++;
-
-		skcipher_request_set_crypt(&ctx->req, sg, ctx->rsgl.sg, used,
-					   ctx->iv);
-
-		err = af_alg_wait_for_completion(
-				ctx->enc ?
-					crypto_skcipher_encrypt(&ctx->req) :
-					crypto_skcipher_decrypt(&ctx->req),
-				&ctx->completion);
-
-free:
-		af_alg_free_sg(&ctx->rsgl);
-
-		if (err)
-			goto unlock;
-
-		copied += used;
-		skcipher_pull_sgl(sk, used, 1);
-		iov_iter_advance(&msg->msg_iter, used);
+		ret += err;
 	}
 
-	err = 0;
-
-unlock:
-	skcipher_wmem_wakeup(sk);
+out:
+	af_alg_wmem_wakeup(sk);
 	release_sock(sk);
-
-	return copied ?: err;
-}
-
-static int skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
-			    size_t ignored, int flags)
-{
-	return (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) ?
-		skcipher_recvmsg_async(sock, msg, flags) :
-		skcipher_recvmsg_sync(sock, msg, flags);
+	return ret;
 }
 
-static unsigned int skcipher_poll(struct file *file, struct socket *sock,
-				  poll_table *wait)
-{
-	struct sock *sk = sock->sk;
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	unsigned int mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
-
-	if (ctx->used)
-		mask |= POLLIN | POLLRDNORM;
-
-	if (skcipher_writable(sk))
-		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
-
-	return mask;
-}
 
 static struct proto_ops algif_skcipher_ops = {
 	.family		=	PF_ALG,
@@ -760,9 +205,9 @@ static struct proto_ops algif_skcipher_ops = {
 
 	.release	=	af_alg_release,
 	.sendmsg	=	skcipher_sendmsg,
-	.sendpage	=	skcipher_sendpage,
+	.sendpage	=	af_alg_sendpage,
 	.recvmsg	=	skcipher_recvmsg,
-	.poll		=	skcipher_poll,
+	.poll		=	af_alg_poll,
 };
 
 static int skcipher_check_key(struct socket *sock)
@@ -824,7 +269,7 @@ static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page,
 	if (err)
 		return err;
 
-	return skcipher_sendpage(sock, page, offset, size, flags);
+	return af_alg_sendpage(sock, page, offset, size, flags);
 }
 
 static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
@@ -858,7 +303,7 @@ static struct proto_ops algif_skcipher_ops_nokey = {
 	.sendmsg	=	skcipher_sendmsg_nokey,
 	.sendpage	=	skcipher_sendpage_nokey,
 	.recvmsg	=	skcipher_recvmsg_nokey,
-	.poll		=	skcipher_poll,
+	.poll		=	af_alg_poll,
 };
 
 static void *skcipher_bind(const char *name, u32 type, u32 mask)
@@ -900,26 +345,16 @@ static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
 	return err;
 }
 
-static void skcipher_wait(struct sock *sk)
-{
-	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	int ctr = 0;
-
-	while (atomic_read(&ctx->inflight) && ctr++ < 100)
-		msleep(100);
-}
-
 static void skcipher_sock_destruct(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
-	struct skcipher_ctx *ctx = ask->private;
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req);
-
-	if (atomic_read(&ctx->inflight))
-		skcipher_wait(sk);
+	struct af_alg_ctx *ctx = ask->private;
+	struct sock *psk = ask->parent;
+	struct alg_sock *pask = alg_sk(psk);
+	struct skcipher_tfm *skc = pask->private;
+	struct crypto_skcipher *tfm = skc->skcipher;
 
-	skcipher_free_sgl(sk);
+	af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
 	sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm));
 	sock_kfree_s(sk, ctx, ctx->len);
 	af_alg_release_parent(sk);
@@ -927,11 +362,11 @@ static void skcipher_sock_destruct(struct sock *sk)
 
 static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 {
-	struct skcipher_ctx *ctx;
+	struct af_alg_ctx *ctx;
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_tfm *tfm = private;
 	struct crypto_skcipher *skcipher = tfm->skcipher;
-	unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(skcipher);
+	unsigned int len = sizeof(*ctx);
 
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
 	if (!ctx)
@@ -946,22 +381,17 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
 
 	memset(ctx->iv, 0, crypto_skcipher_ivsize(skcipher));
 
-	INIT_LIST_HEAD(&ctx->tsgl);
+	INIT_LIST_HEAD(&ctx->tsgl_list);
 	ctx->len = len;
 	ctx->used = 0;
+	ctx->rcvused = 0;
 	ctx->more = 0;
 	ctx->merge = 0;
 	ctx->enc = 0;
-	atomic_set(&ctx->inflight, 0);
 	af_alg_init_completion(&ctx->completion);
 
 	ask->private = ctx;
 
-	skcipher_request_set_tfm(&ctx->req, skcipher);
-	skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_SLEEP |
-						 CRYPTO_TFM_REQ_MAY_BACKLOG,
-				      af_alg_complete, &ctx->completion);
-
 	sk->sk_destruct = skcipher_sock_destruct;
 
 	return 0;

+ 1 - 2
crypto/ctr.c

@@ -65,8 +65,7 @@ static void crypto_ctr_crypt_final(struct blkcipher_walk *walk,
 	unsigned int nbytes = walk->nbytes;
 
 	crypto_cipher_encrypt_one(tfm, keystream, ctrblk);
-	crypto_xor(keystream, src, nbytes);
-	memcpy(dst, keystream, nbytes);
+	crypto_xor_cpy(dst, keystream, src, nbytes);
 
 	crypto_inc(ctrblk, bsize);
 }

+ 33 - 18
crypto/ecdh.c

@@ -20,8 +20,6 @@ struct ecdh_ctx {
 	unsigned int curve_id;
 	unsigned int ndigits;
 	u64 private_key[ECC_MAX_DIGITS];
-	u64 public_key[2 * ECC_MAX_DIGITS];
-	u64 shared_secret[ECC_MAX_DIGITS];
 };
 
 static inline struct ecdh_ctx *ecdh_get_ctx(struct crypto_kpp *tfm)
@@ -70,41 +68,58 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
 
 static int ecdh_compute_value(struct kpp_request *req)
 {
-	int ret = 0;
 	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
 	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
-	size_t copied, nbytes;
+	u64 *public_key;
+	u64 *shared_secret = NULL;
 	void *buf;
+	size_t copied, nbytes, public_key_sz;
+	int ret = -ENOMEM;
 
 	nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	/* Public part is a point thus it has both coordinates */
+	public_key_sz = 2 * nbytes;
+
+	public_key = kmalloc(public_key_sz, GFP_KERNEL);
+	if (!public_key)
+		return -ENOMEM;
 
 	if (req->src) {
-		copied = sg_copy_to_buffer(req->src, 1, ctx->public_key,
-					   2 * nbytes);
-		if (copied != 2 * nbytes)
-			return -EINVAL;
+		shared_secret = kmalloc(nbytes, GFP_KERNEL);
+		if (!shared_secret)
+			goto free_pubkey;
+
+		copied = sg_copy_to_buffer(req->src, 1, public_key,
+					   public_key_sz);
+		if (copied != public_key_sz) {
+			ret = -EINVAL;
+			goto free_all;
+		}
 
 		ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits,
-						ctx->private_key,
-						ctx->public_key,
-						ctx->shared_secret);
+						ctx->private_key, public_key,
+						shared_secret);
 
-		buf = ctx->shared_secret;
+		buf = shared_secret;
 	} else {
 		ret = ecc_make_pub_key(ctx->curve_id, ctx->ndigits,
-				       ctx->private_key, ctx->public_key);
-		buf = ctx->public_key;
-		/* Public part is a point thus it has both coordinates */
-		nbytes *= 2;
+				       ctx->private_key, public_key);
+		buf = public_key;
+		nbytes = public_key_sz;
 	}
 
 	if (ret < 0)
-		return ret;
+		goto free_all;
 
 	copied = sg_copy_from_buffer(req->dst, 1, buf, nbytes);
 	if (copied != nbytes)
-		return -EINVAL;
+		ret = -EINVAL;
 
+	/* fall through */
+free_all:
+	kzfree(shared_secret);
+free_pubkey:
+	kfree(public_key);
 	return ret;
 }
 

+ 4 - 8
crypto/pcbc.c

@@ -55,8 +55,7 @@ static int crypto_pcbc_encrypt_segment(struct skcipher_request *req,
 	do {
 		crypto_xor(iv, src, bsize);
 		crypto_cipher_encrypt_one(tfm, dst, iv);
-		memcpy(iv, dst, bsize);
-		crypto_xor(iv, src, bsize);
+		crypto_xor_cpy(iv, dst, src, bsize);
 
 		src += bsize;
 		dst += bsize;
@@ -79,8 +78,7 @@ static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req,
 		memcpy(tmpbuf, src, bsize);
 		crypto_xor(iv, src, bsize);
 		crypto_cipher_encrypt_one(tfm, src, iv);
-		memcpy(iv, tmpbuf, bsize);
-		crypto_xor(iv, src, bsize);
+		crypto_xor_cpy(iv, tmpbuf, src, bsize);
 
 		src += bsize;
 	} while ((nbytes -= bsize) >= bsize);
@@ -127,8 +125,7 @@ static int crypto_pcbc_decrypt_segment(struct skcipher_request *req,
 	do {
 		crypto_cipher_decrypt_one(tfm, dst, src);
 		crypto_xor(dst, iv, bsize);
-		memcpy(iv, src, bsize);
-		crypto_xor(iv, dst, bsize);
+		crypto_xor_cpy(iv, dst, src, bsize);
 
 		src += bsize;
 		dst += bsize;
@@ -153,8 +150,7 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req,
 		memcpy(tmpbuf, src, bsize);
 		crypto_cipher_decrypt_one(tfm, src, src);
 		crypto_xor(src, iv, bsize);
-		memcpy(iv, tmpbuf, bsize);
-		crypto_xor(iv, src, bsize);
+		crypto_xor_cpy(iv, src, tmpbuf, bsize);
 
 		src += bsize;
 	} while ((nbytes -= bsize) >= bsize);

+ 4 - 2
crypto/rng.c

@@ -43,12 +43,14 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
 		if (!buf)
 			return -ENOMEM;
 
-		get_random_bytes(buf, slen);
+		err = get_random_bytes_wait(buf, slen);
+		if (err)
+			goto out;
 		seed = buf;
 	}
 
 	err = crypto_rng_alg(tfm)->seed(tfm, seed, slen);
-
+out:
 	kzfree(buf);
 	return err;
 }

+ 22 - 33
crypto/scompress.c

@@ -65,11 +65,6 @@ static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
 	seq_puts(m, "type         : scomp\n");
 }
 
-static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
-{
-	return 0;
-}
-
 static void crypto_scomp_free_scratches(void * __percpu *scratches)
 {
 	int i;
@@ -125,12 +120,26 @@ static int crypto_scomp_alloc_all_scratches(void)
 		if (!scomp_src_scratches)
 			return -ENOMEM;
 		scomp_dst_scratches = crypto_scomp_alloc_scratches();
-		if (!scomp_dst_scratches)
+		if (!scomp_dst_scratches) {
+			crypto_scomp_free_scratches(scomp_src_scratches);
+			scomp_src_scratches = NULL;
 			return -ENOMEM;
+		}
 	}
 	return 0;
 }
 
+static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
+{
+	int ret;
+
+	mutex_lock(&scomp_lock);
+	ret = crypto_scomp_alloc_all_scratches();
+	mutex_unlock(&scomp_lock);
+
+	return ret;
+}
+
 static void crypto_scomp_sg_free(struct scatterlist *sgl)
 {
 	int i, n;
@@ -211,9 +220,7 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
 					      scratch_dst, &req->dlen, *ctx);
 	if (!ret) {
 		if (!req->dst) {
-			req->dst = crypto_scomp_sg_alloc(req->dlen,
-				   req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
-				   GFP_KERNEL : GFP_ATOMIC);
+			req->dst = crypto_scomp_sg_alloc(req->dlen, GFP_ATOMIC);
 			if (!req->dst)
 				goto out;
 		}
@@ -240,6 +247,10 @@ static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm)
 	struct crypto_scomp **ctx = crypto_tfm_ctx(tfm);
 
 	crypto_free_scomp(*ctx);
+
+	mutex_lock(&scomp_lock);
+	crypto_scomp_free_all_scratches();
+	mutex_unlock(&scomp_lock);
 }
 
 int crypto_init_scomp_ops_async(struct crypto_tfm *tfm)
@@ -316,40 +327,18 @@ static const struct crypto_type crypto_scomp_type = {
 int crypto_register_scomp(struct scomp_alg *alg)
 {
 	struct crypto_alg *base = &alg->base;
-	int ret = -ENOMEM;
-
-	mutex_lock(&scomp_lock);
-	if (crypto_scomp_alloc_all_scratches())
-		goto error;
 
 	base->cra_type = &crypto_scomp_type;
 	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
 	base->cra_flags |= CRYPTO_ALG_TYPE_SCOMPRESS;
 
-	ret = crypto_register_alg(base);
-	if (ret)
-		goto error;
-
-	mutex_unlock(&scomp_lock);
-	return ret;
-
-error:
-	crypto_scomp_free_all_scratches();
-	mutex_unlock(&scomp_lock);
-	return ret;
+	return crypto_register_alg(base);
 }
 EXPORT_SYMBOL_GPL(crypto_register_scomp);
 
 int crypto_unregister_scomp(struct scomp_alg *alg)
 {
-	int ret;
-
-	mutex_lock(&scomp_lock);
-	ret = crypto_unregister_alg(&alg->base);
-	crypto_scomp_free_all_scratches();
-	mutex_unlock(&scomp_lock);
-
-	return ret;
+	return crypto_unregister_alg(&alg->base);
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_scomp);
 

+ 41 - 36
crypto/serpent_generic.c

@@ -229,6 +229,46 @@
 	x4 ^= x2;					\
 	})
 
+static void __serpent_setkey_sbox(u32 r0, u32 r1, u32 r2, u32 r3, u32 r4, u32 *k)
+{
+	k += 100;
+	S3(r3, r4, r0, r1, r2); store_and_load_keys(r1, r2, r4, r3, 28, 24);
+	S4(r1, r2, r4, r3, r0); store_and_load_keys(r2, r4, r3, r0, 24, 20);
+	S5(r2, r4, r3, r0, r1); store_and_load_keys(r1, r2, r4, r0, 20, 16);
+	S6(r1, r2, r4, r0, r3); store_and_load_keys(r4, r3, r2, r0, 16, 12);
+	S7(r4, r3, r2, r0, r1); store_and_load_keys(r1, r2, r0, r4, 12, 8);
+	S0(r1, r2, r0, r4, r3); store_and_load_keys(r0, r2, r4, r1, 8, 4);
+	S1(r0, r2, r4, r1, r3); store_and_load_keys(r3, r4, r1, r0, 4, 0);
+	S2(r3, r4, r1, r0, r2); store_and_load_keys(r2, r4, r3, r0, 0, -4);
+	S3(r2, r4, r3, r0, r1); store_and_load_keys(r0, r1, r4, r2, -4, -8);
+	S4(r0, r1, r4, r2, r3); store_and_load_keys(r1, r4, r2, r3, -8, -12);
+	S5(r1, r4, r2, r3, r0); store_and_load_keys(r0, r1, r4, r3, -12, -16);
+	S6(r0, r1, r4, r3, r2); store_and_load_keys(r4, r2, r1, r3, -16, -20);
+	S7(r4, r2, r1, r3, r0); store_and_load_keys(r0, r1, r3, r4, -20, -24);
+	S0(r0, r1, r3, r4, r2); store_and_load_keys(r3, r1, r4, r0, -24, -28);
+	k -= 50;
+	S1(r3, r1, r4, r0, r2); store_and_load_keys(r2, r4, r0, r3, 22, 18);
+	S2(r2, r4, r0, r3, r1); store_and_load_keys(r1, r4, r2, r3, 18, 14);
+	S3(r1, r4, r2, r3, r0); store_and_load_keys(r3, r0, r4, r1, 14, 10);
+	S4(r3, r0, r4, r1, r2); store_and_load_keys(r0, r4, r1, r2, 10, 6);
+	S5(r0, r4, r1, r2, r3); store_and_load_keys(r3, r0, r4, r2, 6, 2);
+	S6(r3, r0, r4, r2, r1); store_and_load_keys(r4, r1, r0, r2, 2, -2);
+	S7(r4, r1, r0, r2, r3); store_and_load_keys(r3, r0, r2, r4, -2, -6);
+	S0(r3, r0, r2, r4, r1); store_and_load_keys(r2, r0, r4, r3, -6, -10);
+	S1(r2, r0, r4, r3, r1); store_and_load_keys(r1, r4, r3, r2, -10, -14);
+	S2(r1, r4, r3, r2, r0); store_and_load_keys(r0, r4, r1, r2, -14, -18);
+	S3(r0, r4, r1, r2, r3); store_and_load_keys(r2, r3, r4, r0, -18, -22);
+	k -= 50;
+	S4(r2, r3, r4, r0, r1); store_and_load_keys(r3, r4, r0, r1, 28, 24);
+	S5(r3, r4, r0, r1, r2); store_and_load_keys(r2, r3, r4, r1, 24, 20);
+	S6(r2, r3, r4, r1, r0); store_and_load_keys(r4, r0, r3, r1, 20, 16);
+	S7(r4, r0, r3, r1, r2); store_and_load_keys(r2, r3, r1, r4, 16, 12);
+	S0(r2, r3, r1, r4, r0); store_and_load_keys(r1, r3, r4, r2, 12, 8);
+	S1(r1, r3, r4, r2, r0); store_and_load_keys(r0, r4, r2, r1, 8, 4);
+	S2(r0, r4, r2, r1, r3); store_and_load_keys(r3, r4, r0, r1, 4, 0);
+	S3(r3, r4, r0, r1, r2); storekeys(r1, r2, r4, r3, 0);
+}
+
 int __serpent_setkey(struct serpent_ctx *ctx, const u8 *key,
 		     unsigned int keylen)
 {
@@ -395,42 +435,7 @@ int __serpent_setkey(struct serpent_ctx *ctx, const u8 *key,
 	keyiter(k[23], r1, r0, r3, 131, 31);
 
 	/* Apply S-boxes */
-
-	S3(r3, r4, r0, r1, r2); store_and_load_keys(r1, r2, r4, r3, 28, 24);
-	S4(r1, r2, r4, r3, r0); store_and_load_keys(r2, r4, r3, r0, 24, 20);
-	S5(r2, r4, r3, r0, r1); store_and_load_keys(r1, r2, r4, r0, 20, 16);
-	S6(r1, r2, r4, r0, r3); store_and_load_keys(r4, r3, r2, r0, 16, 12);
-	S7(r4, r3, r2, r0, r1); store_and_load_keys(r1, r2, r0, r4, 12, 8);
-	S0(r1, r2, r0, r4, r3); store_and_load_keys(r0, r2, r4, r1, 8, 4);
-	S1(r0, r2, r4, r1, r3); store_and_load_keys(r3, r4, r1, r0, 4, 0);
-	S2(r3, r4, r1, r0, r2); store_and_load_keys(r2, r4, r3, r0, 0, -4);
-	S3(r2, r4, r3, r0, r1); store_and_load_keys(r0, r1, r4, r2, -4, -8);
-	S4(r0, r1, r4, r2, r3); store_and_load_keys(r1, r4, r2, r3, -8, -12);
-	S5(r1, r4, r2, r3, r0); store_and_load_keys(r0, r1, r4, r3, -12, -16);
-	S6(r0, r1, r4, r3, r2); store_and_load_keys(r4, r2, r1, r3, -16, -20);
-	S7(r4, r2, r1, r3, r0); store_and_load_keys(r0, r1, r3, r4, -20, -24);
-	S0(r0, r1, r3, r4, r2); store_and_load_keys(r3, r1, r4, r0, -24, -28);
-	k -= 50;
-	S1(r3, r1, r4, r0, r2); store_and_load_keys(r2, r4, r0, r3, 22, 18);
-	S2(r2, r4, r0, r3, r1); store_and_load_keys(r1, r4, r2, r3, 18, 14);
-	S3(r1, r4, r2, r3, r0); store_and_load_keys(r3, r0, r4, r1, 14, 10);
-	S4(r3, r0, r4, r1, r2); store_and_load_keys(r0, r4, r1, r2, 10, 6);
-	S5(r0, r4, r1, r2, r3); store_and_load_keys(r3, r0, r4, r2, 6, 2);
-	S6(r3, r0, r4, r2, r1); store_and_load_keys(r4, r1, r0, r2, 2, -2);
-	S7(r4, r1, r0, r2, r3); store_and_load_keys(r3, r0, r2, r4, -2, -6);
-	S0(r3, r0, r2, r4, r1); store_and_load_keys(r2, r0, r4, r3, -6, -10);
-	S1(r2, r0, r4, r3, r1); store_and_load_keys(r1, r4, r3, r2, -10, -14);
-	S2(r1, r4, r3, r2, r0); store_and_load_keys(r0, r4, r1, r2, -14, -18);
-	S3(r0, r4, r1, r2, r3); store_and_load_keys(r2, r3, r4, r0, -18, -22);
-	k -= 50;
-	S4(r2, r3, r4, r0, r1); store_and_load_keys(r3, r4, r0, r1, 28, 24);
-	S5(r3, r4, r0, r1, r2); store_and_load_keys(r2, r3, r4, r1, 24, 20);
-	S6(r2, r3, r4, r1, r0); store_and_load_keys(r4, r0, r3, r1, 20, 16);
-	S7(r4, r0, r3, r1, r2); store_and_load_keys(r2, r3, r1, r4, 16, 12);
-	S0(r2, r3, r1, r4, r0); store_and_load_keys(r1, r3, r4, r2, 12, 8);
-	S1(r1, r3, r4, r2, r0); store_and_load_keys(r0, r4, r2, r1, 8, 4);
-	S2(r0, r4, r2, r1, r3); store_and_load_keys(r3, r4, r0, r1, 4, 0);
-	S3(r3, r4, r0, r1, r2); storekeys(r1, r2, r4, r3, 0);
+	__serpent_setkey_sbox(r0, r1, r2, r3, r4, ctx->expkey);
 
 	return 0;
 }

+ 4 - 4
crypto/tcrypt.c

@@ -1404,9 +1404,9 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 		test_cipher_speed("lrw(aes)", DECRYPT, sec, NULL, 0,
 				speed_template_32_40_48);
 		test_cipher_speed("xts(aes)", ENCRYPT, sec, NULL, 0,
-				speed_template_32_48_64);
+				speed_template_32_64);
 		test_cipher_speed("xts(aes)", DECRYPT, sec, NULL, 0,
-				speed_template_32_48_64);
+				speed_template_32_64);
 		test_cipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0,
 				speed_template_16_24_32);
 		test_cipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0,
@@ -1837,9 +1837,9 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 		test_acipher_speed("lrw(aes)", DECRYPT, sec, NULL, 0,
 				   speed_template_32_40_48);
 		test_acipher_speed("xts(aes)", ENCRYPT, sec, NULL, 0,
-				   speed_template_32_48_64);
+				   speed_template_32_64);
 		test_acipher_speed("xts(aes)", DECRYPT, sec, NULL, 0,
-				   speed_template_32_48_64);
+				   speed_template_32_64);
 		test_acipher_speed("cts(cbc(aes))", ENCRYPT, sec, NULL, 0,
 				   speed_template_16_24_32);
 		test_acipher_speed("cts(cbc(aes))", DECRYPT, sec, NULL, 0,

+ 16 - 4
drivers/char/hw_random/Kconfig

@@ -13,10 +13,8 @@ menuconfig HW_RANDOM
 	  that's usually called /dev/hwrng, and which exposes one
 	  of possibly several hardware random number generators.
 
-	  These hardware random number generators do not feed directly
-	  into the kernel's random number generator.  That is usually
-	  handled by the "rngd" daemon.  Documentation/hw_random.txt
-	  has more information.
+	  These hardware random number generators do feed into the
+	  kernel's random number generator entropy pool.
 
 	  If unsure, say Y.
 
@@ -255,6 +253,20 @@ config HW_RANDOM_MXC_RNGA
 
 	  If unsure, say Y.
 
+config HW_RANDOM_IMX_RNGC
+	tristate "Freescale i.MX RNGC Random Number Generator"
+	depends on ARCH_MXC
+	default HW_RANDOM
+	---help---
+	  This driver provides kernel-side support for the Random Number
+	  Generator Version C hardware found on some Freescale i.MX
+	  processors. Version B is also supported by this driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called imx-rngc.
+
+	  If unsure, say Y.
+
 config HW_RANDOM_NOMADIK
 	tristate "ST-Ericsson Nomadik Random Number Generator support"
 	depends on ARCH_NOMADIK

+ 1 - 0
drivers/char/hw_random/Makefile

@@ -20,6 +20,7 @@ obj-$(CONFIG_HW_RANDOM_PASEMI) += pasemi-rng.o
 obj-$(CONFIG_HW_RANDOM_VIRTIO) += virtio-rng.o
 obj-$(CONFIG_HW_RANDOM_TX4939) += tx4939-rng.o
 obj-$(CONFIG_HW_RANDOM_MXC_RNGA) += mxc-rnga.o
+obj-$(CONFIG_HW_RANDOM_IMX_RNGC) += imx-rngc.o
 obj-$(CONFIG_HW_RANDOM_OCTEON) += octeon-rng.o
 obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o
 obj-$(CONFIG_HW_RANDOM_PSERIES) += pseries-rng.o

+ 36 - 6
drivers/char/hw_random/core.c

@@ -28,7 +28,10 @@
 #define RNG_MODULE_NAME		"hw_random"
 
 static struct hwrng *current_rng;
+/* the current rng has been explicitly chosen by user via sysfs */
+static int cur_rng_set_by_user;
 static struct task_struct *hwrng_fill;
+/* list of registered rngs, sorted decending by quality */
 static LIST_HEAD(rng_list);
 /* Protects rng_list and current_rng */
 static DEFINE_MUTEX(rng_mutex);
@@ -303,6 +306,7 @@ static ssize_t hwrng_attr_current_store(struct device *dev,
 	list_for_each_entry(rng, &rng_list, list) {
 		if (sysfs_streq(rng->name, buf)) {
 			err = 0;
+			cur_rng_set_by_user = 1;
 			if (rng != current_rng)
 				err = set_current_rng(rng);
 			break;
@@ -351,16 +355,27 @@ static ssize_t hwrng_attr_available_show(struct device *dev,
 	return strlen(buf);
 }
 
+static ssize_t hwrng_attr_selected_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", cur_rng_set_by_user);
+}
+
 static DEVICE_ATTR(rng_current, S_IRUGO | S_IWUSR,
 		   hwrng_attr_current_show,
 		   hwrng_attr_current_store);
 static DEVICE_ATTR(rng_available, S_IRUGO,
 		   hwrng_attr_available_show,
 		   NULL);
+static DEVICE_ATTR(rng_selected, S_IRUGO,
+		   hwrng_attr_selected_show,
+		   NULL);
 
 static struct attribute *rng_dev_attrs[] = {
 	&dev_attr_rng_current.attr,
 	&dev_attr_rng_available.attr,
+	&dev_attr_rng_selected.attr,
 	NULL
 };
 
@@ -417,6 +432,7 @@ int hwrng_register(struct hwrng *rng)
 {
 	int err = -EINVAL;
 	struct hwrng *old_rng, *tmp;
+	struct list_head *rng_list_ptr;
 
 	if (!rng->name || (!rng->data_read && !rng->read))
 		goto out;
@@ -432,14 +448,27 @@ int hwrng_register(struct hwrng *rng)
 	init_completion(&rng->cleanup_done);
 	complete(&rng->cleanup_done);
 
+	/* rng_list is sorted by decreasing quality */
+	list_for_each(rng_list_ptr, &rng_list) {
+		tmp = list_entry(rng_list_ptr, struct hwrng, list);
+		if (tmp->quality < rng->quality)
+			break;
+	}
+	list_add_tail(&rng->list, rng_list_ptr);
+
 	old_rng = current_rng;
 	err = 0;
-	if (!old_rng) {
+	if (!old_rng ||
+	    (!cur_rng_set_by_user && rng->quality > old_rng->quality)) {
+		/*
+		 * Set new rng as current as the new rng source
+		 * provides better entropy quality and was not
+		 * chosen by userspace.
+		 */
 		err = set_current_rng(rng);
 		if (err)
 			goto out_unlock;
 	}
-	list_add_tail(&rng->list, &rng_list);
 
 	if (old_rng && !rng->init) {
 		/*
@@ -466,12 +495,13 @@ void hwrng_unregister(struct hwrng *rng)
 	list_del(&rng->list);
 	if (current_rng == rng) {
 		drop_current_rng();
+		cur_rng_set_by_user = 0;
+		/* rng_list is sorted by quality, use the best (=first) one */
 		if (!list_empty(&rng_list)) {
-			struct hwrng *tail;
-
-			tail = list_entry(rng_list.prev, struct hwrng, list);
+			struct hwrng *new_rng;
 
-			set_current_rng(tail);
+			new_rng = list_entry(rng_list.next, struct hwrng, list);
+			set_current_rng(new_rng);
 		}
 	}
 

+ 331 - 0
drivers/char/hw_random/imx-rngc.c

@@ -0,0 +1,331 @@
+/*
+ * RNG driver for Freescale RNGC
+ *
+ * Copyright (C) 2008-2012 Freescale Semiconductor, Inc.
+ * Copyright (C) 2017 Martin Kaiser <martin@kaiser.cx>
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/hw_random.h>
+#include <linux/completion.h>
+#include <linux/io.h>
+
+#define RNGC_COMMAND			0x0004
+#define RNGC_CONTROL			0x0008
+#define RNGC_STATUS			0x000C
+#define RNGC_ERROR			0x0010
+#define RNGC_FIFO			0x0014
+
+#define RNGC_CMD_CLR_ERR		0x00000020
+#define RNGC_CMD_CLR_INT		0x00000010
+#define RNGC_CMD_SEED			0x00000002
+#define RNGC_CMD_SELF_TEST		0x00000001
+
+#define RNGC_CTRL_MASK_ERROR		0x00000040
+#define RNGC_CTRL_MASK_DONE		0x00000020
+
+#define RNGC_STATUS_ERROR		0x00010000
+#define RNGC_STATUS_FIFO_LEVEL_MASK	0x00000f00
+#define RNGC_STATUS_FIFO_LEVEL_SHIFT	8
+#define RNGC_STATUS_SEED_DONE		0x00000020
+#define RNGC_STATUS_ST_DONE		0x00000010
+
+#define RNGC_ERROR_STATUS_STAT_ERR	0x00000008
+
+#define RNGC_TIMEOUT  3000 /* 3 sec */
+
+
+static bool self_test = true;
+module_param(self_test, bool, 0);
+
+struct imx_rngc {
+	struct device		*dev;
+	struct clk		*clk;
+	void __iomem		*base;
+	struct hwrng		rng;
+	struct completion	rng_op_done;
+	/*
+	 * err_reg is written only by the irq handler and read only
+	 * when interrupts are masked, we need no spinlock
+	 */
+	u32			err_reg;
+};
+
+
+static inline void imx_rngc_irq_mask_clear(struct imx_rngc *rngc)
+{
+	u32 ctrl, cmd;
+
+	/* mask interrupts */
+	ctrl = readl(rngc->base + RNGC_CONTROL);
+	ctrl |= RNGC_CTRL_MASK_DONE | RNGC_CTRL_MASK_ERROR;
+	writel(ctrl, rngc->base + RNGC_CONTROL);
+
+	/*
+	 * CLR_INT clears the interrupt only if there's no error
+	 * CLR_ERR clear the interrupt and the error register if there
+	 * is an error
+	 */
+	cmd = readl(rngc->base + RNGC_COMMAND);
+	cmd |= RNGC_CMD_CLR_INT | RNGC_CMD_CLR_ERR;
+	writel(cmd, rngc->base + RNGC_COMMAND);
+}
+
+static inline void imx_rngc_irq_unmask(struct imx_rngc *rngc)
+{
+	u32 ctrl;
+
+	ctrl = readl(rngc->base + RNGC_CONTROL);
+	ctrl &= ~(RNGC_CTRL_MASK_DONE | RNGC_CTRL_MASK_ERROR);
+	writel(ctrl, rngc->base + RNGC_CONTROL);
+}
+
+static int imx_rngc_self_test(struct imx_rngc *rngc)
+{
+	u32 cmd;
+	int ret;
+
+	imx_rngc_irq_unmask(rngc);
+
+	/* run self test */
+	cmd = readl(rngc->base + RNGC_COMMAND);
+	writel(cmd | RNGC_CMD_SELF_TEST, rngc->base + RNGC_COMMAND);
+
+	ret = wait_for_completion_timeout(&rngc->rng_op_done, RNGC_TIMEOUT);
+	if (!ret) {
+		imx_rngc_irq_mask_clear(rngc);
+		return -ETIMEDOUT;
+	}
+
+	if (rngc->err_reg != 0)
+		return -EIO;
+
+	return 0;
+}
+
+static int imx_rngc_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+	struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng);
+	unsigned int status;
+	unsigned int level;
+	int retval = 0;
+
+	while (max >= sizeof(u32)) {
+		status = readl(rngc->base + RNGC_STATUS);
+
+		/* is there some error while reading this random number? */
+		if (status & RNGC_STATUS_ERROR)
+			break;
+
+		/* how many random numbers are in FIFO? [0-16] */
+		level = (status & RNGC_STATUS_FIFO_LEVEL_MASK) >>
+			RNGC_STATUS_FIFO_LEVEL_SHIFT;
+
+		if (level) {
+			/* retrieve a random number from FIFO */
+			*(u32 *)data = readl(rngc->base + RNGC_FIFO);
+
+			retval += sizeof(u32);
+			data += sizeof(u32);
+			max -= sizeof(u32);
+		}
+	}
+
+	return retval ? retval : -EIO;
+}
+
+static irqreturn_t imx_rngc_irq(int irq, void *priv)
+{
+	struct imx_rngc *rngc = (struct imx_rngc *)priv;
+	u32 status;
+
+	/*
+	 * clearing the interrupt will also clear the error register
+	 * read error and status before clearing
+	 */
+	status = readl(rngc->base + RNGC_STATUS);
+	rngc->err_reg = readl(rngc->base + RNGC_ERROR);
+
+	imx_rngc_irq_mask_clear(rngc);
+
+	if (status & (RNGC_STATUS_SEED_DONE | RNGC_STATUS_ST_DONE))
+		complete(&rngc->rng_op_done);
+
+	return IRQ_HANDLED;
+}
+
+static int imx_rngc_init(struct hwrng *rng)
+{
+	struct imx_rngc *rngc = container_of(rng, struct imx_rngc, rng);
+	u32 cmd;
+	int ret;
+
+	/* clear error */
+	cmd = readl(rngc->base + RNGC_COMMAND);
+	writel(cmd | RNGC_CMD_CLR_ERR, rngc->base + RNGC_COMMAND);
+
+	/* create seed, repeat while there is some statistical error */
+	do {
+		imx_rngc_irq_unmask(rngc);
+
+		/* seed creation */
+		cmd = readl(rngc->base + RNGC_COMMAND);
+		writel(cmd | RNGC_CMD_SEED, rngc->base + RNGC_COMMAND);
+
+		ret = wait_for_completion_timeout(&rngc->rng_op_done,
+				RNGC_TIMEOUT);
+
+		if (!ret) {
+			imx_rngc_irq_mask_clear(rngc);
+			return -ETIMEDOUT;
+		}
+
+	} while (rngc->err_reg == RNGC_ERROR_STATUS_STAT_ERR);
+
+	return rngc->err_reg ? -EIO : 0;
+}
+
+static int imx_rngc_probe(struct platform_device *pdev)
+{
+	struct imx_rngc *rngc;
+	struct resource *res;
+	int ret;
+	int irq;
+
+	rngc = devm_kzalloc(&pdev->dev, sizeof(*rngc), GFP_KERNEL);
+	if (!rngc)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	rngc->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(rngc->base))
+		return PTR_ERR(rngc->base);
+
+	rngc->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(rngc->clk)) {
+		dev_err(&pdev->dev, "Can not get rng_clk\n");
+		return PTR_ERR(rngc->clk);
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		dev_err(&pdev->dev, "Couldn't get irq %d\n", irq);
+		return irq;
+	}
+
+	ret = clk_prepare_enable(rngc->clk);
+	if (ret)
+		return ret;
+
+	ret = devm_request_irq(&pdev->dev,
+			irq, imx_rngc_irq, 0, pdev->name, (void *)rngc);
+	if (ret) {
+		dev_err(rngc->dev, "Can't get interrupt working.\n");
+		goto err;
+	}
+
+	init_completion(&rngc->rng_op_done);
+
+	rngc->rng.name = pdev->name;
+	rngc->rng.init = imx_rngc_init;
+	rngc->rng.read = imx_rngc_read;
+
+	rngc->dev = &pdev->dev;
+	platform_set_drvdata(pdev, rngc);
+
+	imx_rngc_irq_mask_clear(rngc);
+
+	if (self_test) {
+		ret = imx_rngc_self_test(rngc);
+		if (ret) {
+			dev_err(rngc->dev, "FSL RNGC self test failed.\n");
+			goto err;
+		}
+	}
+
+	ret = hwrng_register(&rngc->rng);
+	if (ret) {
+		dev_err(&pdev->dev, "FSL RNGC registering failed (%d)\n", ret);
+		goto err;
+	}
+
+	dev_info(&pdev->dev, "Freescale RNGC registered.\n");
+	return 0;
+
+err:
+	clk_disable_unprepare(rngc->clk);
+
+	return ret;
+}
+
+static int __exit imx_rngc_remove(struct platform_device *pdev)
+{
+	struct imx_rngc *rngc = platform_get_drvdata(pdev);
+
+	hwrng_unregister(&rngc->rng);
+
+	clk_disable_unprepare(rngc->clk);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int imx_rngc_suspend(struct device *dev)
+{
+	struct imx_rngc *rngc = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(rngc->clk);
+
+	return 0;
+}
+
+static int imx_rngc_resume(struct device *dev)
+{
+	struct imx_rngc *rngc = dev_get_drvdata(dev);
+
+	clk_prepare_enable(rngc->clk);
+
+	return 0;
+}
+
+static const struct dev_pm_ops imx_rngc_pm_ops = {
+	.suspend	= imx_rngc_suspend,
+	.resume		= imx_rngc_resume,
+};
+#endif
+
+static const struct of_device_id imx_rngc_dt_ids[] = {
+	{ .compatible = "fsl,imx25-rngb", .data = NULL, },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, imx_rngc_dt_ids);
+
+static struct platform_driver imx_rngc_driver = {
+	.driver = {
+		.name = "imx_rngc",
+#ifdef CONFIG_PM
+		.pm = &imx_rngc_pm_ops,
+#endif
+		.of_match_table = imx_rngc_dt_ids,
+	},
+	.remove = __exit_p(imx_rngc_remove),
+};
+
+module_platform_driver_probe(imx_rngc_driver, imx_rngc_probe);
+
+MODULE_AUTHOR("Freescale Semiconductor, Inc.");
+MODULE_DESCRIPTION("H/W RNGC driver for i.MX");
+MODULE_LICENSE("GPL");

+ 46 - 3
drivers/crypto/Kconfig

@@ -525,12 +525,26 @@ config CRYPTO_DEV_ATMEL_SHA
 	  To compile this driver as a module, choose M here: the module
 	  will be called atmel-sha.
 
+config CRYPTO_DEV_ATMEL_ECC
+	tristate "Support for Microchip / Atmel ECC hw accelerator"
+	depends on ARCH_AT91 || COMPILE_TEST
+	depends on I2C
+	select CRYPTO_ECDH
+	select CRC16
+	help
+	  Microhip / Atmel ECC hw accelerator.
+	  Select this if you want to use the Microchip / Atmel module for
+	  ECDH algorithm.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called atmel-ecc.
+
 config CRYPTO_DEV_CCP
-	bool "Support for AMD Cryptographic Coprocessor"
+	bool "Support for AMD Secure Processor"
 	depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM
 	help
-	  The AMD Cryptographic Coprocessor provides hardware offload support
-	  for encryption, hashing and related operations.
+	  The AMD Secure Processor provides support for the Cryptographic Coprocessor
+	  (CCP) and the Platform Security Processor (PSP) devices.
 
 if CRYPTO_DEV_CCP
 	source "drivers/crypto/ccp/Kconfig"
@@ -616,6 +630,14 @@ config CRYPTO_DEV_SUN4I_SS
 	  To compile this driver as a module, choose M here: the module
 	  will be called sun4i-ss.
 
+config CRYPTO_DEV_SUN4I_SS_PRNG
+	bool "Support for Allwinner Security System PRNG"
+	depends on CRYPTO_DEV_SUN4I_SS
+	select CRYPTO_RNG
+	help
+	  Select this option if you want to provide kernel-side support for
+	  the Pseudo-Random Number Generator found in the Security System.
+
 config CRYPTO_DEV_ROCKCHIP
 	tristate "Rockchip's Cryptographic Engine driver"
 	depends on OF && ARCH_ROCKCHIP
@@ -686,4 +708,25 @@ config CRYPTO_DEV_SAFEXCEL
 	  chain mode, AES cipher mode and SHA1/SHA224/SHA256/SHA512 hash
 	  algorithms.
 
+config CRYPTO_DEV_ARTPEC6
+	tristate "Support for Axis ARTPEC-6/7 hardware crypto acceleration."
+	depends on ARM && (ARCH_ARTPEC || COMPILE_TEST)
+	depends on HAS_DMA
+	depends on OF
+	select CRYPTO_AEAD
+	select CRYPTO_AES
+	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_CTR
+	select CRYPTO_HASH
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	select CRYPTO_SHA384
+	select CRYPTO_SHA512
+	help
+	  Enables the driver for the on-chip crypto accelerator
+	  of Axis ARTPEC SoCs.
+
+	  To compile this driver as a module, choose M here.
+
 endif # CRYPTO_HW

+ 3 - 1
drivers/crypto/Makefile

@@ -1,6 +1,7 @@
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
+obj-$(CONFIG_CRYPTO_DEV_ATMEL_ECC) += atmel-ecc.o
 obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o
 obj-$(CONFIG_CRYPTO_DEV_CAVIUM_ZIP) += cavium/
 obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
@@ -35,7 +36,7 @@ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
 obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
 obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
 obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
-obj-$(CONFIG_CRYPTO_DEV_STM32) += stm32/
+obj-$(CONFIG_ARCH_STM32) += stm32/
 obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
@@ -43,3 +44,4 @@ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
 obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
 obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
 obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/
+obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/

+ 781 - 0
drivers/crypto/atmel-ecc.c

@@ -0,0 +1,781 @@
+/*
+ * Microchip / Atmel ECC (I2C) driver.
+ *
+ * Copyright (c) 2017, Microchip Technology Inc.
+ * Author: Tudor Ambarus <tudor.ambarus@microchip.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/bitrev.h>
+#include <linux/crc16.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/ecdh.h>
+#include <crypto/kpp.h>
+#include "atmel-ecc.h"
+
+/* Used for binding tfm objects to i2c clients. */
+struct atmel_ecc_driver_data {
+	struct list_head i2c_client_list;
+	spinlock_t i2c_list_lock;
+} ____cacheline_aligned;
+
+static struct atmel_ecc_driver_data driver_data;
+
+/**
+ * atmel_ecc_i2c_client_priv - i2c_client private data
+ * @client              : pointer to i2c client device
+ * @i2c_client_list_node: part of i2c_client_list
+ * @lock                : lock for sending i2c commands
+ * @wake_token          : wake token array of zeros
+ * @wake_token_sz       : size in bytes of the wake_token
+ * @tfm_count           : number of active crypto transformations on i2c client
+ *
+ * Reads and writes from/to the i2c client are sequential. The first byte
+ * transmitted to the device is treated as the byte size. Any attempt to send
+ * more than this number of bytes will cause the device to not ACK those bytes.
+ * After the host writes a single command byte to the input buffer, reads are
+ * prohibited until after the device completes command execution. Use a mutex
+ * when sending i2c commands.
+ */
+struct atmel_ecc_i2c_client_priv {
+	struct i2c_client *client;
+	struct list_head i2c_client_list_node;
+	struct mutex lock;
+	u8 wake_token[WAKE_TOKEN_MAX_SIZE];
+	size_t wake_token_sz;
+	atomic_t tfm_count ____cacheline_aligned;
+};
+
+/**
+ * atmel_ecdh_ctx - transformation context
+ * @client     : pointer to i2c client device
+ * @fallback   : used for unsupported curves or when user wants to use its own
+ *               private key.
+ * @public_key : generated when calling set_secret(). It's the responsibility
+ *               of the user to not call set_secret() while
+ *               generate_public_key() or compute_shared_secret() are in flight.
+ * @curve_id   : elliptic curve id
+ * @n_sz       : size in bytes of the n prime
+ * @do_fallback: true when the device doesn't support the curve or when the user
+ *               wants to use its own private key.
+ */
+struct atmel_ecdh_ctx {
+	struct i2c_client *client;
+	struct crypto_kpp *fallback;
+	const u8 *public_key;
+	unsigned int curve_id;
+	size_t n_sz;
+	bool do_fallback;
+};
+
+/**
+ * atmel_ecc_work_data - data structure representing the work
+ * @ctx : transformation context.
+ * @cbk : pointer to a callback function to be invoked upon completion of this
+ *        request. This has the form:
+ *        callback(struct atmel_ecc_work_data *work_data, void *areq, u8 status)
+ *        where:
+ *        @work_data: data structure representing the work
+ *        @areq     : optional pointer to an argument passed with the original
+ *                    request.
+ *        @status   : status returned from the i2c client device or i2c error.
+ * @areq: optional pointer to a user argument for use at callback time.
+ * @work: describes the task to be executed.
+ * @cmd : structure used for communicating with the device.
+ */
+struct atmel_ecc_work_data {
+	struct atmel_ecdh_ctx *ctx;
+	void (*cbk)(struct atmel_ecc_work_data *work_data, void *areq,
+		    int status);
+	void *areq;
+	struct work_struct work;
+	struct atmel_ecc_cmd cmd;
+};
+
+static u16 atmel_ecc_crc16(u16 crc, const u8 *buffer, size_t len)
+{
+	return cpu_to_le16(bitrev16(crc16(crc, buffer, len)));
+}
+
+/**
+ * atmel_ecc_checksum() - Generate 16-bit CRC as required by ATMEL ECC.
+ * CRC16 verification of the count, opcode, param1, param2 and data bytes.
+ * The checksum is saved in little-endian format in the least significant
+ * two bytes of the command. CRC polynomial is 0x8005 and the initial register
+ * value should be zero.
+ *
+ * @cmd : structure used for communicating with the device.
+ */
+static void atmel_ecc_checksum(struct atmel_ecc_cmd *cmd)
+{
+	u8 *data = &cmd->count;
+	size_t len = cmd->count - CRC_SIZE;
+	u16 *crc16 = (u16 *)(data + len);
+
+	*crc16 = atmel_ecc_crc16(0, data, len);
+}
+
+static void atmel_ecc_init_read_cmd(struct atmel_ecc_cmd *cmd)
+{
+	cmd->word_addr = COMMAND;
+	cmd->opcode = OPCODE_READ;
+	/*
+	 * Read the word from Configuration zone that contains the lock bytes
+	 * (UserExtra, Selector, LockValue, LockConfig).
+	 */
+	cmd->param1 = CONFIG_ZONE;
+	cmd->param2 = DEVICE_LOCK_ADDR;
+	cmd->count = READ_COUNT;
+
+	atmel_ecc_checksum(cmd);
+
+	cmd->msecs = MAX_EXEC_TIME_READ;
+	cmd->rxsize = READ_RSP_SIZE;
+}
+
+static void atmel_ecc_init_genkey_cmd(struct atmel_ecc_cmd *cmd, u16 keyid)
+{
+	cmd->word_addr = COMMAND;
+	cmd->count = GENKEY_COUNT;
+	cmd->opcode = OPCODE_GENKEY;
+	cmd->param1 = GENKEY_MODE_PRIVATE;
+	/* a random private key will be generated and stored in slot keyID */
+	cmd->param2 = cpu_to_le16(keyid);
+
+	atmel_ecc_checksum(cmd);
+
+	cmd->msecs = MAX_EXEC_TIME_GENKEY;
+	cmd->rxsize = GENKEY_RSP_SIZE;
+}
+
+static int atmel_ecc_init_ecdh_cmd(struct atmel_ecc_cmd *cmd,
+				   struct scatterlist *pubkey)
+{
+	size_t copied;
+
+	cmd->word_addr = COMMAND;
+	cmd->count = ECDH_COUNT;
+	cmd->opcode = OPCODE_ECDH;
+	cmd->param1 = ECDH_PREFIX_MODE;
+	/* private key slot */
+	cmd->param2 = cpu_to_le16(DATA_SLOT_2);
+
+	/*
+	 * The device only supports NIST P256 ECC keys. The public key size will
+	 * always be the same. Use a macro for the key size to avoid unnecessary
+	 * computations.
+	 */
+	copied = sg_copy_to_buffer(pubkey, 1, cmd->data, ATMEL_ECC_PUBKEY_SIZE);
+	if (copied != ATMEL_ECC_PUBKEY_SIZE)
+		return -EINVAL;
+
+	atmel_ecc_checksum(cmd);
+
+	cmd->msecs = MAX_EXEC_TIME_ECDH;
+	cmd->rxsize = ECDH_RSP_SIZE;
+
+	return 0;
+}
+
+/*
+ * After wake and after execution of a command, there will be error, status, or
+ * result bytes in the device's output register that can be retrieved by the
+ * system. When the length of that group is four bytes, the codes returned are
+ * detailed in error_list.
+ */
+static int atmel_ecc_status(struct device *dev, u8 *status)
+{
+	size_t err_list_len = ARRAY_SIZE(error_list);
+	int i;
+	u8 err_id = status[1];
+
+	if (*status != STATUS_SIZE)
+		return 0;
+
+	if (err_id == STATUS_WAKE_SUCCESSFUL || err_id == STATUS_NOERR)
+		return 0;
+
+	for (i = 0; i < err_list_len; i++)
+		if (error_list[i].value == err_id)
+			break;
+
+	/* if err_id is not in the error_list then ignore it */
+	if (i != err_list_len) {
+		dev_err(dev, "%02x: %s:\n", err_id, error_list[i].error_text);
+		return err_id;
+	}
+
+	return 0;
+}
+
+static int atmel_ecc_wakeup(struct i2c_client *client)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+	u8 status[STATUS_RSP_SIZE];
+	int ret;
+
+	/*
+	 * The device ignores any levels or transitions on the SCL pin when the
+	 * device is idle, asleep or during waking up. Don't check for error
+	 * when waking up the device.
+	 */
+	i2c_master_send(client, i2c_priv->wake_token, i2c_priv->wake_token_sz);
+
+	/*
+	 * Wait to wake the device. Typical execution times for ecdh and genkey
+	 * are around tens of milliseconds. Delta is chosen to 50 microseconds.
+	 */
+	usleep_range(TWHI_MIN, TWHI_MAX);
+
+	ret = i2c_master_recv(client, status, STATUS_SIZE);
+	if (ret < 0)
+		return ret;
+
+	return atmel_ecc_status(&client->dev, status);
+}
+
+static int atmel_ecc_sleep(struct i2c_client *client)
+{
+	u8 sleep = SLEEP_TOKEN;
+
+	return i2c_master_send(client, &sleep, 1);
+}
+
+static void atmel_ecdh_done(struct atmel_ecc_work_data *work_data, void *areq,
+			    int status)
+{
+	struct kpp_request *req = areq;
+	struct atmel_ecdh_ctx *ctx = work_data->ctx;
+	struct atmel_ecc_cmd *cmd = &work_data->cmd;
+	size_t copied;
+	size_t n_sz = ctx->n_sz;
+
+	if (status)
+		goto free_work_data;
+
+	/* copy the shared secret */
+	copied = sg_copy_from_buffer(req->dst, 1, &cmd->data[RSP_DATA_IDX],
+				     n_sz);
+	if (copied != n_sz)
+		status = -EINVAL;
+
+	/* fall through */
+free_work_data:
+	kzfree(work_data);
+	kpp_request_complete(req, status);
+}
+
+/*
+ * atmel_ecc_send_receive() - send a command to the device and receive its
+ *                            response.
+ * @client: i2c client device
+ * @cmd   : structure used to communicate with the device
+ *
+ * After the device receives a Wake token, a watchdog counter starts within the
+ * device. After the watchdog timer expires, the device enters sleep mode
+ * regardless of whether some I/O transmission or command execution is in
+ * progress. If a command is attempted when insufficient time remains prior to
+ * watchdog timer execution, the device will return the watchdog timeout error
+ * code without attempting to execute the command. There is no way to reset the
+ * counter other than to put the device into sleep or idle mode and then
+ * wake it up again.
+ */
+static int atmel_ecc_send_receive(struct i2c_client *client,
+				  struct atmel_ecc_cmd *cmd)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+	int ret;
+
+	mutex_lock(&i2c_priv->lock);
+
+	ret = atmel_ecc_wakeup(client);
+	if (ret)
+		goto err;
+
+	/* send the command */
+	ret = i2c_master_send(client, (u8 *)cmd, cmd->count + WORD_ADDR_SIZE);
+	if (ret < 0)
+		goto err;
+
+	/* delay the appropriate amount of time for command to execute */
+	msleep(cmd->msecs);
+
+	/* receive the response */
+	ret = i2c_master_recv(client, cmd->data, cmd->rxsize);
+	if (ret < 0)
+		goto err;
+
+	/* put the device into low-power mode */
+	ret = atmel_ecc_sleep(client);
+	if (ret < 0)
+		goto err;
+
+	mutex_unlock(&i2c_priv->lock);
+	return atmel_ecc_status(&client->dev, cmd->data);
+err:
+	mutex_unlock(&i2c_priv->lock);
+	return ret;
+}
+
+static void atmel_ecc_work_handler(struct work_struct *work)
+{
+	struct atmel_ecc_work_data *work_data =
+			container_of(work, struct atmel_ecc_work_data, work);
+	struct atmel_ecc_cmd *cmd = &work_data->cmd;
+	struct i2c_client *client = work_data->ctx->client;
+	int status;
+
+	status = atmel_ecc_send_receive(client, cmd);
+	work_data->cbk(work_data, work_data->areq, status);
+}
+
+static void atmel_ecc_enqueue(struct atmel_ecc_work_data *work_data,
+			      void (*cbk)(struct atmel_ecc_work_data *work_data,
+					  void *areq, int status),
+			      void *areq)
+{
+	work_data->cbk = (void *)cbk;
+	work_data->areq = areq;
+
+	INIT_WORK(&work_data->work, atmel_ecc_work_handler);
+	schedule_work(&work_data->work);
+}
+
+static unsigned int atmel_ecdh_supported_curve(unsigned int curve_id)
+{
+	if (curve_id == ECC_CURVE_NIST_P256)
+		return ATMEL_ECC_NIST_P256_N_SIZE;
+
+	return 0;
+}
+
+/*
+ * A random private key is generated and stored in the device. The device
+ * returns the pair public key.
+ */
+static int atmel_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
+				 unsigned int len)
+{
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+	struct atmel_ecc_cmd *cmd;
+	void *public_key;
+	struct ecdh params;
+	int ret = -ENOMEM;
+
+	/* free the old public key, if any */
+	kfree(ctx->public_key);
+	/* make sure you don't free the old public key twice */
+	ctx->public_key = NULL;
+
+	if (crypto_ecdh_decode_key(buf, len, &params) < 0) {
+		dev_err(&ctx->client->dev, "crypto_ecdh_decode_key failed\n");
+		return -EINVAL;
+	}
+
+	ctx->n_sz = atmel_ecdh_supported_curve(params.curve_id);
+	if (!ctx->n_sz || params.key_size) {
+		/* fallback to ecdh software implementation */
+		ctx->do_fallback = true;
+		return crypto_kpp_set_secret(ctx->fallback, buf, len);
+	}
+
+	cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	/*
+	 * The device only supports NIST P256 ECC keys. The public key size will
+	 * always be the same. Use a macro for the key size to avoid unnecessary
+	 * computations.
+	 */
+	public_key = kmalloc(ATMEL_ECC_PUBKEY_SIZE, GFP_KERNEL);
+	if (!public_key)
+		goto free_cmd;
+
+	ctx->do_fallback = false;
+	ctx->curve_id = params.curve_id;
+
+	atmel_ecc_init_genkey_cmd(cmd, DATA_SLOT_2);
+
+	ret = atmel_ecc_send_receive(ctx->client, cmd);
+	if (ret)
+		goto free_public_key;
+
+	/* save the public key */
+	memcpy(public_key, &cmd->data[RSP_DATA_IDX], ATMEL_ECC_PUBKEY_SIZE);
+	ctx->public_key = public_key;
+
+	kfree(cmd);
+	return 0;
+
+free_public_key:
+	kfree(public_key);
+free_cmd:
+	kfree(cmd);
+	return ret;
+}
+
+static int atmel_ecdh_generate_public_key(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+	size_t copied;
+	int ret = 0;
+
+	if (ctx->do_fallback) {
+		kpp_request_set_tfm(req, ctx->fallback);
+		return crypto_kpp_generate_public_key(req);
+	}
+
+	/* public key was saved at private key generation */
+	copied = sg_copy_from_buffer(req->dst, 1, ctx->public_key,
+				     ATMEL_ECC_PUBKEY_SIZE);
+	if (copied != ATMEL_ECC_PUBKEY_SIZE)
+		ret = -EINVAL;
+
+	return ret;
+}
+
+static int atmel_ecdh_compute_shared_secret(struct kpp_request *req)
+{
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+	struct atmel_ecc_work_data *work_data;
+	gfp_t gfp;
+	int ret;
+
+	if (ctx->do_fallback) {
+		kpp_request_set_tfm(req, ctx->fallback);
+		return crypto_kpp_compute_shared_secret(req);
+	}
+
+	gfp = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL :
+							     GFP_ATOMIC;
+
+	work_data = kmalloc(sizeof(*work_data), gfp);
+	if (!work_data)
+		return -ENOMEM;
+
+	work_data->ctx = ctx;
+
+	ret = atmel_ecc_init_ecdh_cmd(&work_data->cmd, req->src);
+	if (ret)
+		goto free_work_data;
+
+	atmel_ecc_enqueue(work_data, atmel_ecdh_done, req);
+
+	return -EINPROGRESS;
+
+free_work_data:
+	kfree(work_data);
+	return ret;
+}
+
+static struct i2c_client *atmel_ecc_i2c_client_alloc(void)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv, *min_i2c_priv = NULL;
+	struct i2c_client *client = ERR_PTR(-ENODEV);
+	int min_tfm_cnt = INT_MAX;
+	int tfm_cnt;
+
+	spin_lock(&driver_data.i2c_list_lock);
+
+	if (list_empty(&driver_data.i2c_client_list)) {
+		spin_unlock(&driver_data.i2c_list_lock);
+		return ERR_PTR(-ENODEV);
+	}
+
+	list_for_each_entry(i2c_priv, &driver_data.i2c_client_list,
+			    i2c_client_list_node) {
+		tfm_cnt = atomic_read(&i2c_priv->tfm_count);
+		if (tfm_cnt < min_tfm_cnt) {
+			min_tfm_cnt = tfm_cnt;
+			min_i2c_priv = i2c_priv;
+		}
+		if (!min_tfm_cnt)
+			break;
+	}
+
+	if (min_i2c_priv) {
+		atomic_inc(&min_i2c_priv->tfm_count);
+		client = min_i2c_priv->client;
+	}
+
+	spin_unlock(&driver_data.i2c_list_lock);
+
+	return client;
+}
+
+static void atmel_ecc_i2c_client_free(struct i2c_client *client)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+
+	atomic_dec(&i2c_priv->tfm_count);
+}
+
+static int atmel_ecdh_init_tfm(struct crypto_kpp *tfm)
+{
+	const char *alg = kpp_alg_name(tfm);
+	struct crypto_kpp *fallback;
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+
+	ctx->client = atmel_ecc_i2c_client_alloc();
+	if (IS_ERR(ctx->client)) {
+		pr_err("tfm - i2c_client binding failed\n");
+		return PTR_ERR(ctx->client);
+	}
+
+	fallback = crypto_alloc_kpp(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback)) {
+		dev_err(&ctx->client->dev, "Failed to allocate transformation for '%s': %ld\n",
+			alg, PTR_ERR(fallback));
+		return PTR_ERR(fallback);
+	}
+
+	crypto_kpp_set_flags(fallback, crypto_kpp_get_flags(tfm));
+
+	dev_info(&ctx->client->dev, "Using '%s' as fallback implementation.\n",
+		 crypto_tfm_alg_driver_name(crypto_kpp_tfm(fallback)));
+
+	ctx->fallback = fallback;
+
+	return 0;
+}
+
+static void atmel_ecdh_exit_tfm(struct crypto_kpp *tfm)
+{
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+
+	kfree(ctx->public_key);
+	crypto_free_kpp(ctx->fallback);
+	atmel_ecc_i2c_client_free(ctx->client);
+}
+
+static unsigned int atmel_ecdh_max_size(struct crypto_kpp *tfm)
+{
+	struct atmel_ecdh_ctx *ctx = kpp_tfm_ctx(tfm);
+
+	if (ctx->fallback)
+		return crypto_kpp_maxsize(ctx->fallback);
+
+	/*
+	 * The device only supports NIST P256 ECC keys. The public key size will
+	 * always be the same. Use a macro for the key size to avoid unnecessary
+	 * computations.
+	 */
+	return ATMEL_ECC_PUBKEY_SIZE;
+}
+
+static struct kpp_alg atmel_ecdh = {
+	.set_secret = atmel_ecdh_set_secret,
+	.generate_public_key = atmel_ecdh_generate_public_key,
+	.compute_shared_secret = atmel_ecdh_compute_shared_secret,
+	.init = atmel_ecdh_init_tfm,
+	.exit = atmel_ecdh_exit_tfm,
+	.max_size = atmel_ecdh_max_size,
+	.base = {
+		.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+		.cra_name = "ecdh",
+		.cra_driver_name = "atmel-ecdh",
+		.cra_priority = ATMEL_ECC_PRIORITY,
+		.cra_module = THIS_MODULE,
+		.cra_ctxsize = sizeof(struct atmel_ecdh_ctx),
+	},
+};
+
+static inline size_t atmel_ecc_wake_token_sz(u32 bus_clk_rate)
+{
+	u32 no_of_bits = DIV_ROUND_UP(TWLO_USEC * bus_clk_rate, USEC_PER_SEC);
+
+	/* return the size of the wake_token in bytes */
+	return DIV_ROUND_UP(no_of_bits, 8);
+}
+
+static int device_sanity_check(struct i2c_client *client)
+{
+	struct atmel_ecc_cmd *cmd;
+	int ret;
+
+	cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	atmel_ecc_init_read_cmd(cmd);
+
+	ret = atmel_ecc_send_receive(client, cmd);
+	if (ret)
+		goto free_cmd;
+
+	/*
+	 * It is vital that the Configuration, Data and OTP zones be locked
+	 * prior to release into the field of the system containing the device.
+	 * Failure to lock these zones may permit modification of any secret
+	 * keys and may lead to other security problems.
+	 */
+	if (cmd->data[LOCK_CONFIG_IDX] || cmd->data[LOCK_VALUE_IDX]) {
+		dev_err(&client->dev, "Configuration or Data and OTP zones are unlocked!\n");
+		ret = -ENOTSUPP;
+	}
+
+	/* fall through */
+free_cmd:
+	kfree(cmd);
+	return ret;
+}
+
+static int atmel_ecc_probe(struct i2c_client *client,
+			   const struct i2c_device_id *id)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv;
+	struct device *dev = &client->dev;
+	int ret;
+	u32 bus_clk_rate;
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		dev_err(dev, "I2C_FUNC_I2C not supported\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(client->adapter->dev.of_node,
+				   "clock-frequency", &bus_clk_rate);
+	if (ret) {
+		dev_err(dev, "of: failed to read clock-frequency property\n");
+		return ret;
+	}
+
+	if (bus_clk_rate > 1000000L) {
+		dev_err(dev, "%d exceeds maximum supported clock frequency (1MHz)\n",
+			bus_clk_rate);
+		return -EINVAL;
+	}
+
+	i2c_priv = devm_kmalloc(dev, sizeof(*i2c_priv), GFP_KERNEL);
+	if (!i2c_priv)
+		return -ENOMEM;
+
+	i2c_priv->client = client;
+	mutex_init(&i2c_priv->lock);
+
+	/*
+	 * WAKE_TOKEN_MAX_SIZE was calculated for the maximum bus_clk_rate -
+	 * 1MHz. The previous bus_clk_rate check ensures us that wake_token_sz
+	 * will always be smaller than or equal to WAKE_TOKEN_MAX_SIZE.
+	 */
+	i2c_priv->wake_token_sz = atmel_ecc_wake_token_sz(bus_clk_rate);
+
+	memset(i2c_priv->wake_token, 0, sizeof(i2c_priv->wake_token));
+
+	atomic_set(&i2c_priv->tfm_count, 0);
+
+	i2c_set_clientdata(client, i2c_priv);
+
+	ret = device_sanity_check(client);
+	if (ret)
+		return ret;
+
+	spin_lock(&driver_data.i2c_list_lock);
+	list_add_tail(&i2c_priv->i2c_client_list_node,
+		      &driver_data.i2c_client_list);
+	spin_unlock(&driver_data.i2c_list_lock);
+
+	ret = crypto_register_kpp(&atmel_ecdh);
+	if (ret) {
+		spin_lock(&driver_data.i2c_list_lock);
+		list_del(&i2c_priv->i2c_client_list_node);
+		spin_unlock(&driver_data.i2c_list_lock);
+
+		dev_err(dev, "%s alg registration failed\n",
+			atmel_ecdh.base.cra_driver_name);
+	} else {
+		dev_info(dev, "atmel ecc algorithms registered in /proc/crypto\n");
+	}
+
+	return ret;
+}
+
+static int atmel_ecc_remove(struct i2c_client *client)
+{
+	struct atmel_ecc_i2c_client_priv *i2c_priv = i2c_get_clientdata(client);
+
+	/* Return EBUSY if i2c client already allocated. */
+	if (atomic_read(&i2c_priv->tfm_count)) {
+		dev_err(&client->dev, "Device is busy\n");
+		return -EBUSY;
+	}
+
+	crypto_unregister_kpp(&atmel_ecdh);
+
+	spin_lock(&driver_data.i2c_list_lock);
+	list_del(&i2c_priv->i2c_client_list_node);
+	spin_unlock(&driver_data.i2c_list_lock);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id atmel_ecc_dt_ids[] = {
+	{
+		.compatible = "atmel,atecc508a",
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(of, atmel_ecc_dt_ids);
+#endif
+
+static const struct i2c_device_id atmel_ecc_id[] = {
+	{ "atecc508a", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, atmel_ecc_id);
+
+static struct i2c_driver atmel_ecc_driver = {
+	.driver = {
+		.name	= "atmel-ecc",
+		.of_match_table = of_match_ptr(atmel_ecc_dt_ids),
+	},
+	.probe		= atmel_ecc_probe,
+	.remove		= atmel_ecc_remove,
+	.id_table	= atmel_ecc_id,
+};
+
+static int __init atmel_ecc_init(void)
+{
+	spin_lock_init(&driver_data.i2c_list_lock);
+	INIT_LIST_HEAD(&driver_data.i2c_client_list);
+	return i2c_add_driver(&atmel_ecc_driver);
+}
+
+static void __exit atmel_ecc_exit(void)
+{
+	flush_scheduled_work();
+	i2c_del_driver(&atmel_ecc_driver);
+}
+
+module_init(atmel_ecc_init);
+module_exit(atmel_ecc_exit);
+
+MODULE_AUTHOR("Tudor Ambarus <tudor.ambarus@microchip.com>");
+MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver");
+MODULE_LICENSE("GPL v2");

+ 128 - 0
drivers/crypto/atmel-ecc.h

@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2017, Microchip Technology Inc.
+ * Author: Tudor Ambarus <tudor.ambarus@microchip.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef __ATMEL_ECC_H__
+#define __ATMEL_ECC_H__
+
+#define ATMEL_ECC_PRIORITY		300
+
+#define COMMAND				0x03 /* packet function */
+#define SLEEP_TOKEN			0x01
+#define WAKE_TOKEN_MAX_SIZE		8
+
+/* Definitions of Data and Command sizes */
+#define WORD_ADDR_SIZE			1
+#define COUNT_SIZE			1
+#define CRC_SIZE			2
+#define CMD_OVERHEAD_SIZE		(COUNT_SIZE + CRC_SIZE)
+
+/* size in bytes of the n prime */
+#define ATMEL_ECC_NIST_P256_N_SIZE	32
+#define ATMEL_ECC_PUBKEY_SIZE		(2 * ATMEL_ECC_NIST_P256_N_SIZE)
+
+#define STATUS_RSP_SIZE			4
+#define ECDH_RSP_SIZE			(32 + CMD_OVERHEAD_SIZE)
+#define GENKEY_RSP_SIZE			(ATMEL_ECC_PUBKEY_SIZE + \
+					 CMD_OVERHEAD_SIZE)
+#define READ_RSP_SIZE			(4 + CMD_OVERHEAD_SIZE)
+#define MAX_RSP_SIZE			GENKEY_RSP_SIZE
+
+/**
+ * atmel_ecc_cmd - structure used for communicating with the device.
+ * @word_addr: indicates the function of the packet sent to the device. This
+ *             byte should have a value of COMMAND for normal operation.
+ * @count    : number of bytes to be transferred to (or from) the device.
+ * @opcode   : the command code.
+ * @param1   : the first parameter; always present.
+ * @param2   : the second parameter; always present.
+ * @data     : optional remaining input data. Includes a 2-byte CRC.
+ * @rxsize   : size of the data received from i2c client.
+ * @msecs    : command execution time in milliseconds
+ */
+struct atmel_ecc_cmd {
+	u8 word_addr;
+	u8 count;
+	u8 opcode;
+	u8 param1;
+	u16 param2;
+	u8 data[MAX_RSP_SIZE];
+	u8 msecs;
+	u16 rxsize;
+} __packed;
+
+/* Status/Error codes */
+#define STATUS_SIZE			0x04
+#define STATUS_NOERR			0x00
+#define STATUS_WAKE_SUCCESSFUL		0x11
+
+static const struct {
+	u8 value;
+	const char *error_text;
+} error_list[] = {
+	{ 0x01, "CheckMac or Verify miscompare" },
+	{ 0x03, "Parse Error" },
+	{ 0x05, "ECC Fault" },
+	{ 0x0F, "Execution Error" },
+	{ 0xEE, "Watchdog about to expire" },
+	{ 0xFF, "CRC or other communication error" },
+};
+
+/* Definitions for eeprom organization */
+#define CONFIG_ZONE			0
+
+/* Definitions for Indexes common to all commands */
+#define RSP_DATA_IDX			1 /* buffer index of data in response */
+#define DATA_SLOT_2			2 /* used for ECDH private key */
+
+/* Definitions for the device lock state */
+#define DEVICE_LOCK_ADDR		0x15
+#define LOCK_VALUE_IDX			(RSP_DATA_IDX + 2)
+#define LOCK_CONFIG_IDX			(RSP_DATA_IDX + 3)
+
+/*
+ * Wake High delay to data communication (microseconds). SDA should be stable
+ * high for this entire duration.
+ */
+#define TWHI_MIN			1500
+#define TWHI_MAX			1550
+
+/* Wake Low duration */
+#define TWLO_USEC			60
+
+/* Command execution time (milliseconds) */
+#define MAX_EXEC_TIME_ECDH		58
+#define MAX_EXEC_TIME_GENKEY		115
+#define MAX_EXEC_TIME_READ		1
+
+/* Command opcode */
+#define OPCODE_ECDH			0x43
+#define OPCODE_GENKEY			0x40
+#define OPCODE_READ			0x02
+
+/* Definitions for the READ Command */
+#define READ_COUNT			7
+
+/* Definitions for the GenKey Command */
+#define GENKEY_COUNT			7
+#define GENKEY_MODE_PRIVATE		0x04
+
+/* Definitions for the ECDH Command */
+#define ECDH_COUNT			71
+#define ECDH_PREFIX_MODE		0x00
+
+#endif /* __ATMEL_ECC_H__ */

+ 1 - 1
drivers/crypto/atmel-sha.c

@@ -2883,7 +2883,7 @@ sha_dd_err:
 
 static int atmel_sha_remove(struct platform_device *pdev)
 {
-	static struct atmel_sha_dev *sha_dd;
+	struct atmel_sha_dev *sha_dd;
 
 	sha_dd = platform_get_drvdata(pdev);
 	if (!sha_dd)

+ 1 - 1
drivers/crypto/atmel-tdes.c

@@ -1487,7 +1487,7 @@ tdes_dd_err:
 
 static int atmel_tdes_remove(struct platform_device *pdev)
 {
-	static struct atmel_tdes_dev *tdes_dd;
+	struct atmel_tdes_dev *tdes_dd;
 
 	tdes_dd = platform_get_drvdata(pdev);
 	if (!tdes_dd)

+ 1 - 0
drivers/crypto/axis/Makefile

@@ -0,0 +1 @@
+obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) := artpec6_crypto.o

+ 3192 - 0
drivers/crypto/axis/artpec6_crypto.c

@@ -0,0 +1,3192 @@
+/*
+ *   Driver for ARTPEC-6 crypto block using the kernel asynchronous crypto api.
+ *
+ *    Copyright (C) 2014-2017  Axis Communications AB
+ */
+#define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
+
+#include <linux/bitfield.h>
+#include <linux/crypto.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/fault-inject.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+#include <crypto/aes.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/sha.h>
+#include <crypto/xts.h>
+
+/* Max length of a line in all cache levels for Artpec SoCs. */
+#define ARTPEC_CACHE_LINE_MAX	32
+
+#define PDMA_OUT_CFG		0x0000
+#define PDMA_OUT_BUF_CFG	0x0004
+#define PDMA_OUT_CMD		0x0008
+#define PDMA_OUT_DESCRQ_PUSH	0x0010
+#define PDMA_OUT_DESCRQ_STAT	0x0014
+
+#define A6_PDMA_IN_CFG		0x0028
+#define A6_PDMA_IN_BUF_CFG	0x002c
+#define A6_PDMA_IN_CMD		0x0030
+#define A6_PDMA_IN_STATQ_PUSH	0x0038
+#define A6_PDMA_IN_DESCRQ_PUSH	0x0044
+#define A6_PDMA_IN_DESCRQ_STAT	0x0048
+#define A6_PDMA_INTR_MASK	0x0068
+#define A6_PDMA_ACK_INTR	0x006c
+#define A6_PDMA_MASKED_INTR	0x0074
+
+#define A7_PDMA_IN_CFG		0x002c
+#define A7_PDMA_IN_BUF_CFG	0x0030
+#define A7_PDMA_IN_CMD		0x0034
+#define A7_PDMA_IN_STATQ_PUSH	0x003c
+#define A7_PDMA_IN_DESCRQ_PUSH	0x0048
+#define A7_PDMA_IN_DESCRQ_STAT	0x004C
+#define A7_PDMA_INTR_MASK	0x006c
+#define A7_PDMA_ACK_INTR	0x0070
+#define A7_PDMA_MASKED_INTR	0x0078
+
+#define PDMA_OUT_CFG_EN				BIT(0)
+
+#define PDMA_OUT_BUF_CFG_DATA_BUF_SIZE		GENMASK(4, 0)
+#define PDMA_OUT_BUF_CFG_DESCR_BUF_SIZE		GENMASK(9, 5)
+
+#define PDMA_OUT_CMD_START			BIT(0)
+#define A6_PDMA_OUT_CMD_STOP			BIT(3)
+#define A7_PDMA_OUT_CMD_STOP			BIT(2)
+
+#define PDMA_OUT_DESCRQ_PUSH_LEN		GENMASK(5, 0)
+#define PDMA_OUT_DESCRQ_PUSH_ADDR		GENMASK(31, 6)
+
+#define PDMA_OUT_DESCRQ_STAT_LEVEL		GENMASK(3, 0)
+#define PDMA_OUT_DESCRQ_STAT_SIZE		GENMASK(7, 4)
+
+#define PDMA_IN_CFG_EN				BIT(0)
+
+#define PDMA_IN_BUF_CFG_DATA_BUF_SIZE		GENMASK(4, 0)
+#define PDMA_IN_BUF_CFG_DESCR_BUF_SIZE		GENMASK(9, 5)
+#define PDMA_IN_BUF_CFG_STAT_BUF_SIZE		GENMASK(14, 10)
+
+#define PDMA_IN_CMD_START			BIT(0)
+#define A6_PDMA_IN_CMD_FLUSH_STAT		BIT(2)
+#define A6_PDMA_IN_CMD_STOP			BIT(3)
+#define A7_PDMA_IN_CMD_FLUSH_STAT		BIT(1)
+#define A7_PDMA_IN_CMD_STOP			BIT(2)
+
+#define PDMA_IN_STATQ_PUSH_LEN			GENMASK(5, 0)
+#define PDMA_IN_STATQ_PUSH_ADDR			GENMASK(31, 6)
+
+#define PDMA_IN_DESCRQ_PUSH_LEN			GENMASK(5, 0)
+#define PDMA_IN_DESCRQ_PUSH_ADDR		GENMASK(31, 6)
+
+#define PDMA_IN_DESCRQ_STAT_LEVEL		GENMASK(3, 0)
+#define PDMA_IN_DESCRQ_STAT_SIZE		GENMASK(7, 4)
+
+#define A6_PDMA_INTR_MASK_IN_DATA		BIT(2)
+#define A6_PDMA_INTR_MASK_IN_EOP		BIT(3)
+#define A6_PDMA_INTR_MASK_IN_EOP_FLUSH		BIT(4)
+
+#define A7_PDMA_INTR_MASK_IN_DATA		BIT(3)
+#define A7_PDMA_INTR_MASK_IN_EOP		BIT(4)
+#define A7_PDMA_INTR_MASK_IN_EOP_FLUSH		BIT(5)
+
+#define A6_CRY_MD_OPER		GENMASK(19, 16)
+
+#define A6_CRY_MD_HASH_SEL_CTX	GENMASK(21, 20)
+#define A6_CRY_MD_HASH_HMAC_FIN	BIT(23)
+
+#define A6_CRY_MD_CIPHER_LEN	GENMASK(21, 20)
+#define A6_CRY_MD_CIPHER_DECR	BIT(22)
+#define A6_CRY_MD_CIPHER_TWEAK	BIT(23)
+#define A6_CRY_MD_CIPHER_DSEQ	BIT(24)
+
+#define A7_CRY_MD_OPER		GENMASK(11, 8)
+
+#define A7_CRY_MD_HASH_SEL_CTX	GENMASK(13, 12)
+#define A7_CRY_MD_HASH_HMAC_FIN	BIT(15)
+
+#define A7_CRY_MD_CIPHER_LEN	GENMASK(13, 12)
+#define A7_CRY_MD_CIPHER_DECR	BIT(14)
+#define A7_CRY_MD_CIPHER_TWEAK	BIT(15)
+#define A7_CRY_MD_CIPHER_DSEQ	BIT(16)
+
+/* DMA metadata constants */
+#define regk_crypto_aes_cbc     0x00000002
+#define regk_crypto_aes_ctr     0x00000003
+#define regk_crypto_aes_ecb     0x00000001
+#define regk_crypto_aes_gcm     0x00000004
+#define regk_crypto_aes_xts     0x00000005
+#define regk_crypto_cache       0x00000002
+#define a6_regk_crypto_dlkey    0x0000000a
+#define a7_regk_crypto_dlkey    0x0000000e
+#define regk_crypto_ext         0x00000001
+#define regk_crypto_hmac_sha1   0x00000007
+#define regk_crypto_hmac_sha256 0x00000009
+#define regk_crypto_hmac_sha384 0x0000000b
+#define regk_crypto_hmac_sha512 0x0000000d
+#define regk_crypto_init        0x00000000
+#define regk_crypto_key_128     0x00000000
+#define regk_crypto_key_192     0x00000001
+#define regk_crypto_key_256     0x00000002
+#define regk_crypto_null        0x00000000
+#define regk_crypto_sha1        0x00000006
+#define regk_crypto_sha256      0x00000008
+#define regk_crypto_sha384      0x0000000a
+#define regk_crypto_sha512      0x0000000c
+
+/* DMA descriptor structures */
+struct pdma_descr_ctrl  {
+	unsigned char short_descr : 1;
+	unsigned char pad1        : 1;
+	unsigned char eop         : 1;
+	unsigned char intr        : 1;
+	unsigned char short_len   : 3;
+	unsigned char pad2        : 1;
+} __packed;
+
+struct pdma_data_descr {
+	unsigned int len : 24;
+	unsigned int buf : 32;
+} __packed;
+
+struct pdma_short_descr {
+	unsigned char data[7];
+} __packed;
+
+struct pdma_descr {
+	struct pdma_descr_ctrl ctrl;
+	union {
+		struct pdma_data_descr   data;
+		struct pdma_short_descr  shrt;
+	};
+};
+
+struct pdma_stat_descr {
+	unsigned char pad1        : 1;
+	unsigned char pad2        : 1;
+	unsigned char eop         : 1;
+	unsigned char pad3        : 5;
+	unsigned int  len         : 24;
+};
+
+/* Each descriptor array can hold max 64 entries */
+#define PDMA_DESCR_COUNT	64
+
+#define MODULE_NAME   "Artpec-6 CA"
+
+/* Hash modes (including HMAC variants) */
+#define ARTPEC6_CRYPTO_HASH_SHA1	1
+#define ARTPEC6_CRYPTO_HASH_SHA256	2
+#define ARTPEC6_CRYPTO_HASH_SHA384	3
+#define ARTPEC6_CRYPTO_HASH_SHA512	4
+
+/* Crypto modes */
+#define ARTPEC6_CRYPTO_CIPHER_AES_ECB	1
+#define ARTPEC6_CRYPTO_CIPHER_AES_CBC	2
+#define ARTPEC6_CRYPTO_CIPHER_AES_CTR	3
+#define ARTPEC6_CRYPTO_CIPHER_AES_XTS	5
+
+/* The PDMA is a DMA-engine tightly coupled with a ciphering engine.
+ * It operates on a descriptor array with up to 64 descriptor entries.
+ * The arrays must be 64 byte aligned in memory.
+ *
+ * The ciphering unit has no registers and is completely controlled by
+ * a 4-byte metadata that is inserted at the beginning of each dma packet.
+ *
+ * A dma packet is a sequence of descriptors terminated by setting the .eop
+ * field in the final descriptor of the packet.
+ *
+ * Multiple packets are used for providing context data, key data and
+ * the plain/ciphertext.
+ *
+ *   PDMA Descriptors (Array)
+ *  +------+------+------+~~+-------+------+----
+ *  |  0   |  1   |  2   |~~| 11 EOP|  12  |  ....
+ *  +--+---+--+---+----+-+~~+-------+----+-+----
+ *     |      |        |       |         |
+ *     |      |        |       |         |
+ *   __|__  +-------++-------++-------+ +----+
+ *  | MD  | |Payload||Payload||Payload| | MD |
+ *  +-----+ +-------++-------++-------+ +----+
+ */
+
+struct artpec6_crypto_bounce_buffer {
+	struct list_head list;
+	size_t length;
+	struct scatterlist *sg;
+	size_t offset;
+	/* buf is aligned to ARTPEC_CACHE_LINE_MAX and
+	 * holds up to ARTPEC_CACHE_LINE_MAX bytes data.
+	 */
+	void *buf;
+};
+
+struct artpec6_crypto_dma_map {
+	dma_addr_t dma_addr;
+	size_t size;
+	enum dma_data_direction dir;
+};
+
+struct artpec6_crypto_dma_descriptors {
+	struct pdma_descr out[PDMA_DESCR_COUNT] __aligned(64);
+	struct pdma_descr in[PDMA_DESCR_COUNT] __aligned(64);
+	u32 stat[PDMA_DESCR_COUNT] __aligned(64);
+	struct list_head bounce_buffers;
+	/* Enough maps for all out/in buffers, and all three descr. arrays */
+	struct artpec6_crypto_dma_map maps[PDMA_DESCR_COUNT * 2 + 2];
+	dma_addr_t out_dma_addr;
+	dma_addr_t in_dma_addr;
+	dma_addr_t stat_dma_addr;
+	size_t out_cnt;
+	size_t in_cnt;
+	size_t map_count;
+};
+
+enum artpec6_crypto_variant {
+	ARTPEC6_CRYPTO,
+	ARTPEC7_CRYPTO,
+};
+
+struct artpec6_crypto {
+	void __iomem *base;
+	spinlock_t queue_lock;
+	struct list_head queue; /* waiting for pdma fifo space */
+	struct list_head pending; /* submitted to pdma fifo */
+	struct tasklet_struct task;
+	struct kmem_cache *dma_cache;
+	int pending_count;
+	struct timer_list timer;
+	enum artpec6_crypto_variant variant;
+	void *pad_buffer; /* cache-aligned block padding buffer */
+	void *zero_buffer;
+};
+
+enum artpec6_crypto_hash_flags {
+	HASH_FLAG_INIT_CTX = 2,
+	HASH_FLAG_UPDATE = 4,
+	HASH_FLAG_FINALIZE = 8,
+	HASH_FLAG_HMAC = 16,
+	HASH_FLAG_UPDATE_KEY = 32,
+};
+
+struct artpec6_crypto_req_common {
+	struct list_head list;
+	struct artpec6_crypto_dma_descriptors *dma;
+	struct crypto_async_request *req;
+	void (*complete)(struct crypto_async_request *req);
+	gfp_t gfp_flags;
+};
+
+struct artpec6_hash_request_context {
+	char partial_buffer[SHA512_BLOCK_SIZE];
+	char partial_buffer_out[SHA512_BLOCK_SIZE];
+	char key_buffer[SHA512_BLOCK_SIZE];
+	char pad_buffer[SHA512_BLOCK_SIZE + 32];
+	unsigned char digeststate[SHA512_DIGEST_SIZE];
+	size_t partial_bytes;
+	u64 digcnt;
+	u32 key_md;
+	u32 hash_md;
+	enum artpec6_crypto_hash_flags hash_flags;
+	struct artpec6_crypto_req_common common;
+};
+
+struct artpec6_hash_export_state {
+	char partial_buffer[SHA512_BLOCK_SIZE];
+	unsigned char digeststate[SHA512_DIGEST_SIZE];
+	size_t partial_bytes;
+	u64 digcnt;
+	int oper;
+	unsigned int hash_flags;
+};
+
+struct artpec6_hashalg_context {
+	char hmac_key[SHA512_BLOCK_SIZE];
+	size_t hmac_key_length;
+	struct crypto_shash *child_hash;
+};
+
+struct artpec6_crypto_request_context {
+	u32 cipher_md;
+	bool decrypt;
+	struct artpec6_crypto_req_common common;
+};
+
+struct artpec6_cryptotfm_context {
+	unsigned char aes_key[2*AES_MAX_KEY_SIZE];
+	size_t key_length;
+	u32 key_md;
+	int crypto_type;
+	struct crypto_skcipher *fallback;
+};
+
+struct artpec6_crypto_aead_hw_ctx {
+	__be64	aad_length_bits;
+	__be64  text_length_bits;
+	__u8	J0[AES_BLOCK_SIZE];
+};
+
+struct artpec6_crypto_aead_req_ctx {
+	struct artpec6_crypto_aead_hw_ctx hw_ctx;
+	u32 cipher_md;
+	bool decrypt;
+	struct artpec6_crypto_req_common common;
+	__u8 decryption_tag[AES_BLOCK_SIZE] ____cacheline_aligned;
+};
+
+/* The crypto framework makes it hard to avoid this global. */
+static struct device *artpec6_crypto_dev;
+
+static struct dentry *dbgfs_root;
+
+#ifdef CONFIG_FAULT_INJECTION
+static DECLARE_FAULT_ATTR(artpec6_crypto_fail_status_read);
+static DECLARE_FAULT_ATTR(artpec6_crypto_fail_dma_array_full);
+#endif
+
+enum {
+	ARTPEC6_CRYPTO_PREPARE_HASH_NO_START,
+	ARTPEC6_CRYPTO_PREPARE_HASH_START,
+};
+
+static int artpec6_crypto_prepare_aead(struct aead_request *areq);
+static int artpec6_crypto_prepare_crypto(struct skcipher_request *areq);
+static int artpec6_crypto_prepare_hash(struct ahash_request *areq);
+
+static void
+artpec6_crypto_complete_crypto(struct crypto_async_request *req);
+static void
+artpec6_crypto_complete_cbc_encrypt(struct crypto_async_request *req);
+static void
+artpec6_crypto_complete_cbc_decrypt(struct crypto_async_request *req);
+static void
+artpec6_crypto_complete_aead(struct crypto_async_request *req);
+static void
+artpec6_crypto_complete_hash(struct crypto_async_request *req);
+
+static int
+artpec6_crypto_common_destroy(struct artpec6_crypto_req_common *common);
+
+static void
+artpec6_crypto_start_dma(struct artpec6_crypto_req_common *common);
+
+struct artpec6_crypto_walk {
+	struct scatterlist *sg;
+	size_t offset;
+};
+
+static void artpec6_crypto_walk_init(struct artpec6_crypto_walk *awalk,
+				     struct scatterlist *sg)
+{
+	awalk->sg = sg;
+	awalk->offset = 0;
+}
+
+static size_t artpec6_crypto_walk_advance(struct artpec6_crypto_walk *awalk,
+					  size_t nbytes)
+{
+	while (nbytes && awalk->sg) {
+		size_t piece;
+
+		WARN_ON(awalk->offset > awalk->sg->length);
+
+		piece = min(nbytes, (size_t)awalk->sg->length - awalk->offset);
+		nbytes -= piece;
+		awalk->offset += piece;
+		if (awalk->offset == awalk->sg->length) {
+			awalk->sg = sg_next(awalk->sg);
+			awalk->offset = 0;
+		}
+
+	}
+
+	return nbytes;
+}
+
+static size_t
+artpec6_crypto_walk_chunklen(const struct artpec6_crypto_walk *awalk)
+{
+	WARN_ON(awalk->sg->length == awalk->offset);
+
+	return awalk->sg->length - awalk->offset;
+}
+
+static dma_addr_t
+artpec6_crypto_walk_chunk_phys(const struct artpec6_crypto_walk *awalk)
+{
+	return sg_phys(awalk->sg) + awalk->offset;
+}
+
+static void
+artpec6_crypto_copy_bounce_buffers(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct artpec6_crypto_bounce_buffer *b;
+	struct artpec6_crypto_bounce_buffer *next;
+
+	list_for_each_entry_safe(b, next, &dma->bounce_buffers, list) {
+		pr_debug("bounce entry %p: %zu bytes @ %zu from %p\n",
+			 b, b->length, b->offset, b->buf);
+		sg_pcopy_from_buffer(b->sg,
+				   1,
+				   b->buf,
+				   b->length,
+				   b->offset);
+
+		list_del(&b->list);
+		kfree(b);
+	}
+}
+
+static inline bool artpec6_crypto_busy(void)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	int fifo_count = ac->pending_count;
+
+	return fifo_count > 6;
+}
+
+static int artpec6_crypto_submit(struct artpec6_crypto_req_common *req)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	int ret = -EBUSY;
+
+	spin_lock_bh(&ac->queue_lock);
+
+	if (!artpec6_crypto_busy()) {
+		list_add_tail(&req->list, &ac->pending);
+		artpec6_crypto_start_dma(req);
+		ret = -EINPROGRESS;
+	} else if (req->req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG) {
+		list_add_tail(&req->list, &ac->queue);
+	} else {
+		artpec6_crypto_common_destroy(req);
+	}
+
+	spin_unlock_bh(&ac->queue_lock);
+
+	return ret;
+}
+
+static void artpec6_crypto_start_dma(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	void __iomem *base = ac->base;
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	u32 ind, statd, outd;
+
+	/* Make descriptor content visible to the DMA before starting it. */
+	wmb();
+
+	ind = FIELD_PREP(PDMA_IN_DESCRQ_PUSH_LEN, dma->in_cnt - 1) |
+	      FIELD_PREP(PDMA_IN_DESCRQ_PUSH_ADDR, dma->in_dma_addr >> 6);
+
+	statd = FIELD_PREP(PDMA_IN_STATQ_PUSH_LEN, dma->in_cnt - 1) |
+		FIELD_PREP(PDMA_IN_STATQ_PUSH_ADDR, dma->stat_dma_addr >> 6);
+
+	outd = FIELD_PREP(PDMA_OUT_DESCRQ_PUSH_LEN, dma->out_cnt - 1) |
+	       FIELD_PREP(PDMA_OUT_DESCRQ_PUSH_ADDR, dma->out_dma_addr >> 6);
+
+	if (variant == ARTPEC6_CRYPTO) {
+		writel_relaxed(ind, base + A6_PDMA_IN_DESCRQ_PUSH);
+		writel_relaxed(statd, base + A6_PDMA_IN_STATQ_PUSH);
+		writel_relaxed(PDMA_IN_CMD_START, base + A6_PDMA_IN_CMD);
+	} else {
+		writel_relaxed(ind, base + A7_PDMA_IN_DESCRQ_PUSH);
+		writel_relaxed(statd, base + A7_PDMA_IN_STATQ_PUSH);
+		writel_relaxed(PDMA_IN_CMD_START, base + A7_PDMA_IN_CMD);
+	}
+
+	writel_relaxed(outd, base + PDMA_OUT_DESCRQ_PUSH);
+	writel_relaxed(PDMA_OUT_CMD_START, base + PDMA_OUT_CMD);
+
+	ac->pending_count++;
+}
+
+static void
+artpec6_crypto_init_dma_operation(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+
+	dma->out_cnt = 0;
+	dma->in_cnt = 0;
+	dma->map_count = 0;
+	INIT_LIST_HEAD(&dma->bounce_buffers);
+}
+
+static bool fault_inject_dma_descr(void)
+{
+#ifdef CONFIG_FAULT_INJECTION
+	return should_fail(&artpec6_crypto_fail_dma_array_full, 1);
+#else
+	return false;
+#endif
+}
+
+/** artpec6_crypto_setup_out_descr_phys - Setup an out channel with a
+ *                                        physical address
+ *
+ * @addr: The physical address of the data buffer
+ * @len:  The length of the data buffer
+ * @eop:  True if this is the last buffer in the packet
+ *
+ * @return 0 on success or -ENOSPC if there are no more descriptors available
+ */
+static int
+artpec6_crypto_setup_out_descr_phys(struct artpec6_crypto_req_common *common,
+				    dma_addr_t addr, size_t len, bool eop)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (dma->out_cnt >= PDMA_DESCR_COUNT ||
+	    fault_inject_dma_descr()) {
+		pr_err("No free OUT DMA descriptors available!\n");
+		return -ENOSPC;
+	}
+
+	d = &dma->out[dma->out_cnt++];
+	memset(d, 0, sizeof(*d));
+
+	d->ctrl.short_descr = 0;
+	d->ctrl.eop = eop;
+	d->data.len = len;
+	d->data.buf = addr;
+	return 0;
+}
+
+/** artpec6_crypto_setup_out_descr_short - Setup a short out descriptor
+ *
+ * @dst: The virtual address of the data
+ * @len: The length of the data, must be between 1 to 7 bytes
+ * @eop: True if this is the last buffer in the packet
+ *
+ * @return 0 on success
+ *	-ENOSPC if no more descriptors are available
+ *	-EINVAL if the data length exceeds 7 bytes
+ */
+static int
+artpec6_crypto_setup_out_descr_short(struct artpec6_crypto_req_common *common,
+				     void *dst, unsigned int len, bool eop)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (dma->out_cnt >= PDMA_DESCR_COUNT ||
+	    fault_inject_dma_descr()) {
+		pr_err("No free OUT DMA descriptors available!\n");
+		return -ENOSPC;
+	} else if (len > 7 || len < 1) {
+		return -EINVAL;
+	}
+	d = &dma->out[dma->out_cnt++];
+	memset(d, 0, sizeof(*d));
+
+	d->ctrl.short_descr = 1;
+	d->ctrl.short_len = len;
+	d->ctrl.eop = eop;
+	memcpy(d->shrt.data, dst, len);
+	return 0;
+}
+
+static int artpec6_crypto_dma_map_page(struct artpec6_crypto_req_common *common,
+				      struct page *page, size_t offset,
+				      size_t size,
+				      enum dma_data_direction dir,
+				      dma_addr_t *dma_addr_out)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct device *dev = artpec6_crypto_dev;
+	struct artpec6_crypto_dma_map *map;
+	dma_addr_t dma_addr;
+
+	*dma_addr_out = 0;
+
+	if (dma->map_count >= ARRAY_SIZE(dma->maps))
+		return -ENOMEM;
+
+	dma_addr = dma_map_page(dev, page, offset, size, dir);
+	if (dma_mapping_error(dev, dma_addr))
+		return -ENOMEM;
+
+	map = &dma->maps[dma->map_count++];
+	map->size = size;
+	map->dma_addr = dma_addr;
+	map->dir = dir;
+
+	*dma_addr_out = dma_addr;
+
+	return 0;
+}
+
+static int
+artpec6_crypto_dma_map_single(struct artpec6_crypto_req_common *common,
+			      void *ptr, size_t size,
+			      enum dma_data_direction dir,
+			      dma_addr_t *dma_addr_out)
+{
+	struct page *page = virt_to_page(ptr);
+	size_t offset = (uintptr_t)ptr & ~PAGE_MASK;
+
+	return artpec6_crypto_dma_map_page(common, page, offset, size, dir,
+					  dma_addr_out);
+}
+
+static int
+artpec6_crypto_dma_map_descs(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	int ret;
+
+	ret = artpec6_crypto_dma_map_single(common, dma->in,
+				sizeof(dma->in[0]) * dma->in_cnt,
+				DMA_TO_DEVICE, &dma->in_dma_addr);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_dma_map_single(common, dma->out,
+				sizeof(dma->out[0]) * dma->out_cnt,
+				DMA_TO_DEVICE, &dma->out_dma_addr);
+	if (ret)
+		return ret;
+
+	/* We only read one stat descriptor */
+	dma->stat[dma->in_cnt - 1] = 0;
+
+	/*
+	 * DMA_BIDIRECTIONAL since we need our zeroing of the stat descriptor
+	 * to be written.
+	 */
+	return artpec6_crypto_dma_map_single(common,
+				dma->stat + dma->in_cnt - 1,
+				sizeof(dma->stat[0]),
+				DMA_BIDIRECTIONAL,
+				&dma->stat_dma_addr);
+}
+
+static void
+artpec6_crypto_dma_unmap_all(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct device *dev = artpec6_crypto_dev;
+	int i;
+
+	for (i = 0; i < dma->map_count; i++) {
+		struct artpec6_crypto_dma_map *map = &dma->maps[i];
+
+		dma_unmap_page(dev, map->dma_addr, map->size, map->dir);
+	}
+
+	dma->map_count = 0;
+}
+
+/** artpec6_crypto_setup_out_descr - Setup an out descriptor
+ *
+ * @dst: The virtual address of the data
+ * @len: The length of the data
+ * @eop: True if this is the last buffer in the packet
+ * @use_short: If this is true and the data length is 7 bytes or less then
+ *	a short descriptor will be used
+ *
+ * @return 0 on success
+ *	Any errors from artpec6_crypto_setup_out_descr_short() or
+ *	setup_out_descr_phys()
+ */
+static int
+artpec6_crypto_setup_out_descr(struct artpec6_crypto_req_common *common,
+			       void *dst, unsigned int len, bool eop,
+			       bool use_short)
+{
+	if (use_short && len < 7) {
+		return artpec6_crypto_setup_out_descr_short(common, dst, len,
+							    eop);
+	} else {
+		int ret;
+		dma_addr_t dma_addr;
+
+		ret = artpec6_crypto_dma_map_single(common, dst, len,
+						   DMA_TO_DEVICE,
+						   &dma_addr);
+		if (ret)
+			return ret;
+
+		return artpec6_crypto_setup_out_descr_phys(common, dma_addr,
+							   len, eop);
+	}
+}
+
+/** artpec6_crypto_setup_in_descr_phys - Setup an in channel with a
+ *                                       physical address
+ *
+ * @addr: The physical address of the data buffer
+ * @len:  The length of the data buffer
+ * @intr: True if an interrupt should be fired after HW processing of this
+ *	  descriptor
+ *
+ */
+static int
+artpec6_crypto_setup_in_descr_phys(struct artpec6_crypto_req_common *common,
+			       dma_addr_t addr, unsigned int len, bool intr)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (dma->in_cnt >= PDMA_DESCR_COUNT ||
+	    fault_inject_dma_descr()) {
+		pr_err("No free IN DMA descriptors available!\n");
+		return -ENOSPC;
+	}
+	d = &dma->in[dma->in_cnt++];
+	memset(d, 0, sizeof(*d));
+
+	d->ctrl.intr = intr;
+	d->data.len = len;
+	d->data.buf = addr;
+	return 0;
+}
+
+/** artpec6_crypto_setup_in_descr - Setup an in channel descriptor
+ *
+ * @buffer: The virtual address to of the data buffer
+ * @len:    The length of the data buffer
+ * @last:   If this is the last data buffer in the request (i.e. an interrupt
+ *	    is needed
+ *
+ * Short descriptors are not used for the in channel
+ */
+static int
+artpec6_crypto_setup_in_descr(struct artpec6_crypto_req_common *common,
+			  void *buffer, unsigned int len, bool last)
+{
+	dma_addr_t dma_addr;
+	int ret;
+
+	ret = artpec6_crypto_dma_map_single(common, buffer, len,
+					   DMA_FROM_DEVICE, &dma_addr);
+	if (ret)
+		return ret;
+
+	return artpec6_crypto_setup_in_descr_phys(common, dma_addr, len, last);
+}
+
+static struct artpec6_crypto_bounce_buffer *
+artpec6_crypto_alloc_bounce(gfp_t flags)
+{
+	void *base;
+	size_t alloc_size = sizeof(struct artpec6_crypto_bounce_buffer) +
+			    2 * ARTPEC_CACHE_LINE_MAX;
+	struct artpec6_crypto_bounce_buffer *bbuf = kzalloc(alloc_size, flags);
+
+	if (!bbuf)
+		return NULL;
+
+	base = bbuf + 1;
+	bbuf->buf = PTR_ALIGN(base, ARTPEC_CACHE_LINE_MAX);
+	return bbuf;
+}
+
+static int setup_bounce_buffer_in(struct artpec6_crypto_req_common *common,
+				  struct artpec6_crypto_walk *walk, size_t size)
+{
+	struct artpec6_crypto_bounce_buffer *bbuf;
+	int ret;
+
+	bbuf = artpec6_crypto_alloc_bounce(common->gfp_flags);
+	if (!bbuf)
+		return -ENOMEM;
+
+	bbuf->length = size;
+	bbuf->sg = walk->sg;
+	bbuf->offset = walk->offset;
+
+	ret =  artpec6_crypto_setup_in_descr(common, bbuf->buf, size, false);
+	if (ret) {
+		kfree(bbuf);
+		return ret;
+	}
+
+	pr_debug("BOUNCE %zu offset %zu\n", size, walk->offset);
+	list_add_tail(&bbuf->list, &common->dma->bounce_buffers);
+	return 0;
+}
+
+static int
+artpec6_crypto_setup_sg_descrs_in(struct artpec6_crypto_req_common *common,
+				  struct artpec6_crypto_walk *walk,
+				  size_t count)
+{
+	size_t chunk;
+	int ret;
+	dma_addr_t addr;
+
+	while (walk->sg && count) {
+		chunk = min(count, artpec6_crypto_walk_chunklen(walk));
+		addr = artpec6_crypto_walk_chunk_phys(walk);
+
+		/* When destination buffers are not aligned to the cache line
+		 * size we need bounce buffers. The DMA-API requires that the
+		 * entire line is owned by the DMA buffer and this holds also
+		 * for the case when coherent DMA is used.
+		 */
+		if (!IS_ALIGNED(addr, ARTPEC_CACHE_LINE_MAX)) {
+			chunk = min_t(dma_addr_t, chunk,
+				      ALIGN(addr, ARTPEC_CACHE_LINE_MAX) -
+				      addr);
+
+			pr_debug("CHUNK-b %pad:%zu\n", &addr, chunk);
+			ret = setup_bounce_buffer_in(common, walk, chunk);
+		} else if (chunk < ARTPEC_CACHE_LINE_MAX) {
+			pr_debug("CHUNK-b %pad:%zu\n", &addr, chunk);
+			ret = setup_bounce_buffer_in(common, walk, chunk);
+		} else {
+			dma_addr_t dma_addr;
+
+			chunk = chunk & ~(ARTPEC_CACHE_LINE_MAX-1);
+
+			pr_debug("CHUNK %pad:%zu\n", &addr, chunk);
+
+			ret = artpec6_crypto_dma_map_page(common,
+							 sg_page(walk->sg),
+							 walk->sg->offset +
+							 walk->offset,
+							 chunk,
+							 DMA_FROM_DEVICE,
+							 &dma_addr);
+			if (ret)
+				return ret;
+
+			ret = artpec6_crypto_setup_in_descr_phys(common,
+								 dma_addr,
+								 chunk, false);
+		}
+
+		if (ret)
+			return ret;
+
+		count = count - chunk;
+		artpec6_crypto_walk_advance(walk, chunk);
+	}
+
+	if (count)
+		pr_err("EOL unexpected %zu bytes left\n", count);
+
+	return count ? -EINVAL : 0;
+}
+
+static int
+artpec6_crypto_setup_sg_descrs_out(struct artpec6_crypto_req_common *common,
+				   struct artpec6_crypto_walk *walk,
+				   size_t count)
+{
+	size_t chunk;
+	int ret;
+	dma_addr_t addr;
+
+	while (walk->sg && count) {
+		chunk = min(count, artpec6_crypto_walk_chunklen(walk));
+		addr = artpec6_crypto_walk_chunk_phys(walk);
+
+		pr_debug("OUT-CHUNK %pad:%zu\n", &addr, chunk);
+
+		if (addr & 3) {
+			char buf[3];
+
+			chunk = min_t(size_t, chunk, (4-(addr&3)));
+
+			sg_pcopy_to_buffer(walk->sg, 1, buf, chunk,
+					   walk->offset);
+
+			ret = artpec6_crypto_setup_out_descr_short(common, buf,
+								   chunk,
+								   false);
+		} else {
+			dma_addr_t dma_addr;
+
+			ret = artpec6_crypto_dma_map_page(common,
+							 sg_page(walk->sg),
+							 walk->sg->offset +
+							 walk->offset,
+							 chunk,
+							 DMA_TO_DEVICE,
+							 &dma_addr);
+			if (ret)
+				return ret;
+
+			ret = artpec6_crypto_setup_out_descr_phys(common,
+								 dma_addr,
+								 chunk, false);
+		}
+
+		if (ret)
+			return ret;
+
+		count = count - chunk;
+		artpec6_crypto_walk_advance(walk, chunk);
+	}
+
+	if (count)
+		pr_err("EOL unexpected %zu bytes left\n", count);
+
+	return count ? -EINVAL : 0;
+}
+
+
+/** artpec6_crypto_terminate_out_descrs - Set the EOP on the last out descriptor
+ *
+ * If the out descriptor list is non-empty, then the eop flag on the
+ * last used out descriptor will be set.
+ *
+ * @return  0 on success
+ *	-EINVAL if the out descriptor is empty or has overflown
+ */
+static int
+artpec6_crypto_terminate_out_descrs(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (!dma->out_cnt || dma->out_cnt > PDMA_DESCR_COUNT) {
+		pr_err("%s: OUT descriptor list is %s\n",
+			MODULE_NAME, dma->out_cnt ? "empty" : "full");
+		return -EINVAL;
+
+	}
+
+	d = &dma->out[dma->out_cnt-1];
+	d->ctrl.eop = 1;
+
+	return 0;
+}
+
+/** artpec6_crypto_terminate_in_descrs - Set the interrupt flag on the last
+ *                                       in descriptor
+ *
+ * See artpec6_crypto_terminate_out_descrs() for return values
+ */
+static int
+artpec6_crypto_terminate_in_descrs(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto_dma_descriptors *dma = common->dma;
+	struct pdma_descr *d;
+
+	if (!dma->in_cnt || dma->in_cnt > PDMA_DESCR_COUNT) {
+		pr_err("%s: IN descriptor list is %s\n",
+			MODULE_NAME, dma->in_cnt ? "empty" : "full");
+		return -EINVAL;
+	}
+
+	d = &dma->in[dma->in_cnt-1];
+	d->ctrl.intr = 1;
+	return 0;
+}
+
+/** create_hash_pad - Create a Secure Hash conformant pad
+ *
+ * @dst:      The destination buffer to write the pad. Must be at least 64 bytes
+ * @dgstlen:  The total length of the hash digest in bytes
+ * @bitcount: The total length of the digest in bits
+ *
+ * @return The total number of padding bytes written to @dst
+ */
+static size_t
+create_hash_pad(int oper, unsigned char *dst, u64 dgstlen, u64 bitcount)
+{
+	unsigned int mod, target, diff, pad_bytes, size_bytes;
+	__be64 bits = __cpu_to_be64(bitcount);
+
+	switch (oper) {
+	case regk_crypto_sha1:
+	case regk_crypto_sha256:
+	case regk_crypto_hmac_sha1:
+	case regk_crypto_hmac_sha256:
+		target = 448 / 8;
+		mod = 512 / 8;
+		size_bytes = 8;
+		break;
+	default:
+		target = 896 / 8;
+		mod = 1024 / 8;
+		size_bytes = 16;
+		break;
+	}
+
+	target -= 1;
+	diff = dgstlen & (mod - 1);
+	pad_bytes = diff > target ? target + mod - diff : target - diff;
+
+	memset(dst + 1, 0, pad_bytes);
+	dst[0] = 0x80;
+
+	if (size_bytes == 16) {
+		memset(dst + 1 + pad_bytes, 0, 8);
+		memcpy(dst + 1 + pad_bytes + 8, &bits, 8);
+	} else {
+		memcpy(dst + 1 + pad_bytes, &bits, 8);
+	}
+
+	return pad_bytes + size_bytes + 1;
+}
+
+static int artpec6_crypto_common_init(struct artpec6_crypto_req_common *common,
+		struct crypto_async_request *parent,
+		void (*complete)(struct crypto_async_request *req),
+		struct scatterlist *dstsg, unsigned int nbytes)
+{
+	gfp_t flags;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+
+	flags = (parent->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		 GFP_KERNEL : GFP_ATOMIC;
+
+	common->gfp_flags = flags;
+	common->dma = kmem_cache_alloc(ac->dma_cache, flags);
+	if (!common->dma)
+		return -ENOMEM;
+
+	common->req = parent;
+	common->complete = complete;
+	return 0;
+}
+
+static void
+artpec6_crypto_bounce_destroy(struct artpec6_crypto_dma_descriptors *dma)
+{
+	struct artpec6_crypto_bounce_buffer *b;
+	struct artpec6_crypto_bounce_buffer *next;
+
+	list_for_each_entry_safe(b, next, &dma->bounce_buffers, list) {
+		kfree(b);
+	}
+}
+
+static int
+artpec6_crypto_common_destroy(struct artpec6_crypto_req_common *common)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+
+	artpec6_crypto_dma_unmap_all(common);
+	artpec6_crypto_bounce_destroy(common->dma);
+	kmem_cache_free(ac->dma_cache, common->dma);
+	common->dma = NULL;
+	return 0;
+}
+
+/*
+ * Ciphering functions.
+ */
+static int artpec6_crypto_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher);
+	struct artpec6_crypto_request_context *req_ctx = NULL;
+	void (*complete)(struct crypto_async_request *req);
+	int ret;
+
+	req_ctx = skcipher_request_ctx(req);
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+	case ARTPEC6_CRYPTO_CIPHER_AES_ECB:
+	case ARTPEC6_CRYPTO_CIPHER_AES_XTS:
+		req_ctx->decrypt = 0;
+		break;
+	default:
+		break;
+	}
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+		complete = artpec6_crypto_complete_cbc_encrypt;
+		break;
+	default:
+		complete = artpec6_crypto_complete_crypto;
+		break;
+	}
+
+	ret = artpec6_crypto_common_init(&req_ctx->common,
+				  &req->base,
+				  complete,
+				  req->dst, req->cryptlen);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_prepare_crypto(req);
+	if (ret) {
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		return ret;
+	}
+
+	return artpec6_crypto_submit(&req_ctx->common);
+}
+
+static int artpec6_crypto_decrypt(struct skcipher_request *req)
+{
+	int ret;
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher);
+	struct artpec6_crypto_request_context *req_ctx = NULL;
+	void (*complete)(struct crypto_async_request *req);
+
+	req_ctx = skcipher_request_ctx(req);
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+	case ARTPEC6_CRYPTO_CIPHER_AES_ECB:
+	case ARTPEC6_CRYPTO_CIPHER_AES_XTS:
+		req_ctx->decrypt = 1;
+		break;
+	default:
+		break;
+	}
+
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+		complete = artpec6_crypto_complete_cbc_decrypt;
+		break;
+	default:
+		complete = artpec6_crypto_complete_crypto;
+		break;
+	}
+
+	ret = artpec6_crypto_common_init(&req_ctx->common, &req->base,
+				  complete,
+				  req->dst, req->cryptlen);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_prepare_crypto(req);
+	if (ret) {
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		return ret;
+	}
+
+	return artpec6_crypto_submit(&req_ctx->common);
+}
+
+static int
+artpec6_crypto_ctr_crypt(struct skcipher_request *req, bool encrypt)
+{
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher);
+	size_t iv_len = crypto_skcipher_ivsize(cipher);
+	unsigned int counter = be32_to_cpup((__be32 *)
+					    (req->iv + iv_len - 4));
+	unsigned int nblks = ALIGN(req->cryptlen, AES_BLOCK_SIZE) /
+			     AES_BLOCK_SIZE;
+
+	/*
+	 * The hardware uses only the last 32-bits as the counter while the
+	 * kernel tests (aes_ctr_enc_tv_template[4] for example) expect that
+	 * the whole IV is a counter.  So fallback if the counter is going to
+	 * overlow.
+	 */
+	if (counter + nblks < counter) {
+		int ret;
+
+		pr_debug("counter %x will overflow (nblks %u), falling back\n",
+			 counter, counter + nblks);
+
+		ret = crypto_skcipher_setkey(ctx->fallback, ctx->aes_key,
+					     ctx->key_length);
+		if (ret)
+			return ret;
+
+		{
+			SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+
+			skcipher_request_set_tfm(subreq, ctx->fallback);
+			skcipher_request_set_callback(subreq, req->base.flags,
+						      NULL, NULL);
+			skcipher_request_set_crypt(subreq, req->src, req->dst,
+						   req->cryptlen, req->iv);
+			ret = encrypt ? crypto_skcipher_encrypt(subreq)
+				      : crypto_skcipher_decrypt(subreq);
+			skcipher_request_zero(subreq);
+		}
+		return ret;
+	}
+
+	return encrypt ? artpec6_crypto_encrypt(req)
+		       : artpec6_crypto_decrypt(req);
+}
+
+static int artpec6_crypto_ctr_encrypt(struct skcipher_request *req)
+{
+	return artpec6_crypto_ctr_crypt(req, true);
+}
+
+static int artpec6_crypto_ctr_decrypt(struct skcipher_request *req)
+{
+	return artpec6_crypto_ctr_crypt(req, false);
+}
+
+/*
+ * AEAD functions
+ */
+static int artpec6_crypto_aead_init(struct crypto_aead *tfm)
+{
+	struct artpec6_cryptotfm_context *tfm_ctx = crypto_aead_ctx(tfm);
+
+	memset(tfm_ctx, 0, sizeof(*tfm_ctx));
+
+	crypto_aead_set_reqsize(tfm,
+				sizeof(struct artpec6_crypto_aead_req_ctx));
+
+	return 0;
+}
+
+static int artpec6_crypto_aead_set_key(struct crypto_aead *tfm, const u8 *key,
+			       unsigned int len)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_tfm_ctx(&tfm->base);
+
+	if (len != 16 && len != 24 && len != 32) {
+		crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -1;
+	}
+
+	ctx->key_length = len;
+
+	memcpy(ctx->aes_key, key, len);
+	return 0;
+}
+
+static int artpec6_crypto_aead_encrypt(struct aead_request *req)
+{
+	int ret;
+	struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(req);
+
+	req_ctx->decrypt = false;
+	ret = artpec6_crypto_common_init(&req_ctx->common, &req->base,
+				  artpec6_crypto_complete_aead,
+				  NULL, 0);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_prepare_aead(req);
+	if (ret) {
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		return ret;
+	}
+
+	return artpec6_crypto_submit(&req_ctx->common);
+}
+
+static int artpec6_crypto_aead_decrypt(struct aead_request *req)
+{
+	int ret;
+	struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(req);
+
+	req_ctx->decrypt = true;
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	ret = artpec6_crypto_common_init(&req_ctx->common,
+				  &req->base,
+				  artpec6_crypto_complete_aead,
+				  NULL, 0);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_prepare_aead(req);
+	if (ret) {
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		return ret;
+	}
+
+	return artpec6_crypto_submit(&req_ctx->common);
+}
+
+static int artpec6_crypto_prepare_hash(struct ahash_request *areq)
+{
+	struct artpec6_hashalg_context *ctx = crypto_tfm_ctx(areq->base.tfm);
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(areq);
+	size_t digestsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(areq));
+	size_t contextsize = digestsize == SHA384_DIGEST_SIZE ?
+		SHA512_DIGEST_SIZE : digestsize;
+	size_t blocksize = crypto_tfm_alg_blocksize(
+		crypto_ahash_tfm(crypto_ahash_reqtfm(areq)));
+	struct artpec6_crypto_req_common *common = &req_ctx->common;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	u32 sel_ctx;
+	bool ext_ctx = false;
+	bool run_hw = false;
+	int error = 0;
+
+	artpec6_crypto_init_dma_operation(common);
+
+	/* Upload HMAC key, must be first the first packet */
+	if (req_ctx->hash_flags & HASH_FLAG_HMAC) {
+		if (variant == ARTPEC6_CRYPTO) {
+			req_ctx->key_md = FIELD_PREP(A6_CRY_MD_OPER,
+						     a6_regk_crypto_dlkey);
+		} else {
+			req_ctx->key_md = FIELD_PREP(A7_CRY_MD_OPER,
+						     a7_regk_crypto_dlkey);
+		}
+
+		/* Copy and pad up the key */
+		memcpy(req_ctx->key_buffer, ctx->hmac_key,
+		       ctx->hmac_key_length);
+		memset(req_ctx->key_buffer + ctx->hmac_key_length, 0,
+		       blocksize - ctx->hmac_key_length);
+
+		error = artpec6_crypto_setup_out_descr(common,
+					(void *)&req_ctx->key_md,
+					sizeof(req_ctx->key_md), false, false);
+		if (error)
+			return error;
+
+		error = artpec6_crypto_setup_out_descr(common,
+					req_ctx->key_buffer, blocksize,
+					true, false);
+		if (error)
+			return error;
+	}
+
+	if (!(req_ctx->hash_flags & HASH_FLAG_INIT_CTX)) {
+		/* Restore context */
+		sel_ctx = regk_crypto_ext;
+		ext_ctx = true;
+	} else {
+		sel_ctx = regk_crypto_init;
+	}
+
+	if (variant == ARTPEC6_CRYPTO) {
+		req_ctx->hash_md &= ~A6_CRY_MD_HASH_SEL_CTX;
+		req_ctx->hash_md |= FIELD_PREP(A6_CRY_MD_HASH_SEL_CTX, sel_ctx);
+
+		/* If this is the final round, set the final flag */
+		if (req_ctx->hash_flags & HASH_FLAG_FINALIZE)
+			req_ctx->hash_md |= A6_CRY_MD_HASH_HMAC_FIN;
+	} else {
+		req_ctx->hash_md &= ~A7_CRY_MD_HASH_SEL_CTX;
+		req_ctx->hash_md |= FIELD_PREP(A7_CRY_MD_HASH_SEL_CTX, sel_ctx);
+
+		/* If this is the final round, set the final flag */
+		if (req_ctx->hash_flags & HASH_FLAG_FINALIZE)
+			req_ctx->hash_md |= A7_CRY_MD_HASH_HMAC_FIN;
+	}
+
+	/* Setup up metadata descriptors */
+	error = artpec6_crypto_setup_out_descr(common,
+				(void *)&req_ctx->hash_md,
+				sizeof(req_ctx->hash_md), false, false);
+	if (error)
+		return error;
+
+	error = artpec6_crypto_setup_in_descr(common, ac->pad_buffer, 4, false);
+	if (error)
+		return error;
+
+	if (ext_ctx) {
+		error = artpec6_crypto_setup_out_descr(common,
+					req_ctx->digeststate,
+					contextsize, false, false);
+
+		if (error)
+			return error;
+	}
+
+	if (req_ctx->hash_flags & HASH_FLAG_UPDATE) {
+		size_t done_bytes = 0;
+		size_t total_bytes = areq->nbytes + req_ctx->partial_bytes;
+		size_t ready_bytes = round_down(total_bytes, blocksize);
+		struct artpec6_crypto_walk walk;
+
+		run_hw = ready_bytes > 0;
+		if (req_ctx->partial_bytes && ready_bytes) {
+			/* We have a partial buffer and will at least some bytes
+			 * to the HW. Empty this partial buffer before tackling
+			 * the SG lists
+			 */
+			memcpy(req_ctx->partial_buffer_out,
+				req_ctx->partial_buffer,
+				req_ctx->partial_bytes);
+
+			error = artpec6_crypto_setup_out_descr(common,
+						req_ctx->partial_buffer_out,
+						req_ctx->partial_bytes,
+						false, true);
+			if (error)
+				return error;
+
+			/* Reset partial buffer */
+			done_bytes += req_ctx->partial_bytes;
+			req_ctx->partial_bytes = 0;
+		}
+
+		artpec6_crypto_walk_init(&walk, areq->src);
+
+		error = artpec6_crypto_setup_sg_descrs_out(common, &walk,
+							   ready_bytes -
+							   done_bytes);
+		if (error)
+			return error;
+
+		if (walk.sg) {
+			size_t sg_skip = ready_bytes - done_bytes;
+			size_t sg_rem = areq->nbytes - sg_skip;
+
+			sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
+					   req_ctx->partial_buffer +
+					   req_ctx->partial_bytes,
+					   sg_rem, sg_skip);
+
+			req_ctx->partial_bytes += sg_rem;
+		}
+
+		req_ctx->digcnt += ready_bytes;
+		req_ctx->hash_flags &= ~(HASH_FLAG_UPDATE);
+	}
+
+	/* Finalize */
+	if (req_ctx->hash_flags & HASH_FLAG_FINALIZE) {
+		bool needtrim = contextsize != digestsize;
+		size_t hash_pad_len;
+		u64 digest_bits;
+		u32 oper;
+
+		if (variant == ARTPEC6_CRYPTO)
+			oper = FIELD_GET(A6_CRY_MD_OPER, req_ctx->hash_md);
+		else
+			oper = FIELD_GET(A7_CRY_MD_OPER, req_ctx->hash_md);
+
+		/* Write out the partial buffer if present */
+		if (req_ctx->partial_bytes) {
+			memcpy(req_ctx->partial_buffer_out,
+			       req_ctx->partial_buffer,
+			       req_ctx->partial_bytes);
+			error = artpec6_crypto_setup_out_descr(common,
+						req_ctx->partial_buffer_out,
+						req_ctx->partial_bytes,
+						false, true);
+			if (error)
+				return error;
+
+			req_ctx->digcnt += req_ctx->partial_bytes;
+			req_ctx->partial_bytes = 0;
+		}
+
+		if (req_ctx->hash_flags & HASH_FLAG_HMAC)
+			digest_bits = 8 * (req_ctx->digcnt + blocksize);
+		else
+			digest_bits = 8 * req_ctx->digcnt;
+
+		/* Add the hash pad */
+		hash_pad_len = create_hash_pad(oper, req_ctx->pad_buffer,
+					       req_ctx->digcnt, digest_bits);
+		error = artpec6_crypto_setup_out_descr(common,
+						      req_ctx->pad_buffer,
+						      hash_pad_len, false,
+						      true);
+		req_ctx->digcnt = 0;
+
+		if (error)
+			return error;
+
+		/* Descriptor for the final result */
+		error = artpec6_crypto_setup_in_descr(common, areq->result,
+						      digestsize,
+						      !needtrim);
+		if (error)
+			return error;
+
+		if (needtrim) {
+			/* Discard the extra context bytes for SHA-384 */
+			error = artpec6_crypto_setup_in_descr(common,
+					req_ctx->partial_buffer,
+					digestsize - contextsize, true);
+			if (error)
+				return error;
+		}
+
+	} else { /* This is not the final operation for this request */
+		if (!run_hw)
+			return ARTPEC6_CRYPTO_PREPARE_HASH_NO_START;
+
+		/* Save the result to the context */
+		error = artpec6_crypto_setup_in_descr(common,
+						      req_ctx->digeststate,
+						      contextsize, false);
+		if (error)
+			return error;
+		/* fall through */
+	}
+
+	req_ctx->hash_flags &= ~(HASH_FLAG_INIT_CTX | HASH_FLAG_UPDATE |
+				 HASH_FLAG_FINALIZE);
+
+	error = artpec6_crypto_terminate_in_descrs(common);
+	if (error)
+		return error;
+
+	error = artpec6_crypto_terminate_out_descrs(common);
+	if (error)
+		return error;
+
+	error = artpec6_crypto_dma_map_descs(common);
+	if (error)
+		return error;
+
+	return ARTPEC6_CRYPTO_PREPARE_HASH_START;
+}
+
+
+static int artpec6_crypto_aes_ecb_init(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	tfm->reqsize = sizeof(struct artpec6_crypto_request_context);
+	ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_ECB;
+
+	return 0;
+}
+
+static int artpec6_crypto_aes_ctr_init(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->fallback = crypto_alloc_skcipher(crypto_tfm_alg_name(&tfm->base),
+					      0,
+					      CRYPTO_ALG_ASYNC |
+					      CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(ctx->fallback))
+		return PTR_ERR(ctx->fallback);
+
+	tfm->reqsize = sizeof(struct artpec6_crypto_request_context);
+	ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_CTR;
+
+	return 0;
+}
+
+static int artpec6_crypto_aes_cbc_init(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	tfm->reqsize = sizeof(struct artpec6_crypto_request_context);
+	ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_CBC;
+
+	return 0;
+}
+
+static int artpec6_crypto_aes_xts_init(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	tfm->reqsize = sizeof(struct artpec6_crypto_request_context);
+	ctx->crypto_type = ARTPEC6_CRYPTO_CIPHER_AES_XTS;
+
+	return 0;
+}
+
+static void artpec6_crypto_aes_exit(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	memset(ctx, 0, sizeof(*ctx));
+}
+
+static void artpec6_crypto_aes_ctr_exit(struct crypto_skcipher *tfm)
+{
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_skcipher(ctx->fallback);
+	artpec6_crypto_aes_exit(tfm);
+}
+
+static int
+artpec6_crypto_cipher_set_key(struct crypto_skcipher *cipher, const u8 *key,
+			      unsigned int keylen)
+{
+	struct artpec6_cryptotfm_context *ctx =
+		crypto_skcipher_ctx(cipher);
+
+	switch (keylen) {
+	case 16:
+	case 24:
+	case 32:
+		break;
+	default:
+		crypto_skcipher_set_flags(cipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->aes_key, key, keylen);
+	ctx->key_length = keylen;
+	return 0;
+}
+
+static int
+artpec6_crypto_xts_set_key(struct crypto_skcipher *cipher, const u8 *key,
+			      unsigned int keylen)
+{
+	struct artpec6_cryptotfm_context *ctx =
+		crypto_skcipher_ctx(cipher);
+	int ret;
+
+	ret = xts_check_key(&cipher->base, key, keylen);
+	if (ret)
+		return ret;
+
+	switch (keylen) {
+	case 32:
+	case 48:
+	case 64:
+		break;
+	default:
+		crypto_skcipher_set_flags(cipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->aes_key, key, keylen);
+	ctx->key_length = keylen;
+	return 0;
+}
+
+/** artpec6_crypto_process_crypto - Prepare an async block cipher crypto request
+ *
+ * @req: The asynch request to process
+ *
+ * @return 0 if the dma job was successfully prepared
+ *	  <0 on error
+ *
+ * This function sets up the PDMA descriptors for a block cipher request.
+ *
+ * The required padding is added for AES-CTR using a statically defined
+ * buffer.
+ *
+ * The PDMA descriptor list will be as follows:
+ *
+ * OUT: [KEY_MD][KEY][EOP]<CIPHER_MD>[IV]<data_0>...[data_n][AES-CTR_pad]<eop>
+ * IN:  <CIPHER_MD><data_0>...[data_n]<intr>
+ *
+ */
+static int artpec6_crypto_prepare_crypto(struct skcipher_request *areq)
+{
+	int ret;
+	struct artpec6_crypto_walk walk;
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
+	struct artpec6_cryptotfm_context *ctx = crypto_skcipher_ctx(cipher);
+	struct artpec6_crypto_request_context *req_ctx = NULL;
+	size_t iv_len = crypto_skcipher_ivsize(cipher);
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	struct artpec6_crypto_req_common *common;
+	bool cipher_decr = false;
+	size_t cipher_klen;
+	u32 cipher_len = 0; /* Same as regk_crypto_key_128 for NULL crypto */
+	u32 oper;
+
+	req_ctx = skcipher_request_ctx(areq);
+	common = &req_ctx->common;
+
+	artpec6_crypto_init_dma_operation(common);
+
+	if (variant == ARTPEC6_CRYPTO)
+		ctx->key_md = FIELD_PREP(A6_CRY_MD_OPER, a6_regk_crypto_dlkey);
+	else
+		ctx->key_md = FIELD_PREP(A7_CRY_MD_OPER, a7_regk_crypto_dlkey);
+
+	ret = artpec6_crypto_setup_out_descr(common, (void *)&ctx->key_md,
+					     sizeof(ctx->key_md), false, false);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_setup_out_descr(common, ctx->aes_key,
+					      ctx->key_length, true, false);
+	if (ret)
+		return ret;
+
+	req_ctx->cipher_md = 0;
+
+	if (ctx->crypto_type == ARTPEC6_CRYPTO_CIPHER_AES_XTS)
+		cipher_klen = ctx->key_length/2;
+	else
+		cipher_klen =  ctx->key_length;
+
+	/* Metadata */
+	switch (cipher_klen) {
+	case 16:
+		cipher_len = regk_crypto_key_128;
+		break;
+	case 24:
+		cipher_len = regk_crypto_key_192;
+		break;
+	case 32:
+		cipher_len = regk_crypto_key_256;
+		break;
+	default:
+		pr_err("%s: Invalid key length %d!\n",
+			MODULE_NAME, ctx->key_length);
+		return -EINVAL;
+	}
+
+	switch (ctx->crypto_type) {
+	case ARTPEC6_CRYPTO_CIPHER_AES_ECB:
+		oper = regk_crypto_aes_ecb;
+		cipher_decr = req_ctx->decrypt;
+		break;
+
+	case ARTPEC6_CRYPTO_CIPHER_AES_CBC:
+		oper = regk_crypto_aes_cbc;
+		cipher_decr = req_ctx->decrypt;
+		break;
+
+	case ARTPEC6_CRYPTO_CIPHER_AES_CTR:
+		oper = regk_crypto_aes_ctr;
+		cipher_decr = false;
+		break;
+
+	case ARTPEC6_CRYPTO_CIPHER_AES_XTS:
+		oper = regk_crypto_aes_xts;
+		cipher_decr = req_ctx->decrypt;
+
+		if (variant == ARTPEC6_CRYPTO)
+			req_ctx->cipher_md |= A6_CRY_MD_CIPHER_DSEQ;
+		else
+			req_ctx->cipher_md |= A7_CRY_MD_CIPHER_DSEQ;
+		break;
+
+	default:
+		pr_err("%s: Invalid cipher mode %d!\n",
+			MODULE_NAME, ctx->crypto_type);
+		return -EINVAL;
+	}
+
+	if (variant == ARTPEC6_CRYPTO) {
+		req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_OPER, oper);
+		req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_CIPHER_LEN,
+						 cipher_len);
+		if (cipher_decr)
+			req_ctx->cipher_md |= A6_CRY_MD_CIPHER_DECR;
+	} else {
+		req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_OPER, oper);
+		req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_CIPHER_LEN,
+						 cipher_len);
+		if (cipher_decr)
+			req_ctx->cipher_md |= A7_CRY_MD_CIPHER_DECR;
+	}
+
+	ret = artpec6_crypto_setup_out_descr(common,
+					    &req_ctx->cipher_md,
+					    sizeof(req_ctx->cipher_md),
+					    false, false);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_setup_in_descr(common, ac->pad_buffer, 4, false);
+	if (ret)
+		return ret;
+
+	if (iv_len) {
+		ret = artpec6_crypto_setup_out_descr(common, areq->iv, iv_len,
+						     false, false);
+		if (ret)
+			return ret;
+	}
+	/* Data out */
+	artpec6_crypto_walk_init(&walk, areq->src);
+	ret = artpec6_crypto_setup_sg_descrs_out(common, &walk, areq->cryptlen);
+	if (ret)
+		return ret;
+
+	/* Data in */
+	artpec6_crypto_walk_init(&walk, areq->dst);
+	ret = artpec6_crypto_setup_sg_descrs_in(common, &walk, areq->cryptlen);
+	if (ret)
+		return ret;
+
+	/* CTR-mode padding required by the HW. */
+	if (ctx->crypto_type == ARTPEC6_CRYPTO_CIPHER_AES_CTR ||
+	    ctx->crypto_type == ARTPEC6_CRYPTO_CIPHER_AES_XTS) {
+		size_t pad = ALIGN(areq->cryptlen, AES_BLOCK_SIZE) -
+			     areq->cryptlen;
+
+		if (pad) {
+			ret = artpec6_crypto_setup_out_descr(common,
+							     ac->pad_buffer,
+							     pad, false, false);
+			if (ret)
+				return ret;
+
+			ret = artpec6_crypto_setup_in_descr(common,
+							    ac->pad_buffer, pad,
+							    false);
+			if (ret)
+				return ret;
+		}
+	}
+
+	ret = artpec6_crypto_terminate_out_descrs(common);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_terminate_in_descrs(common);
+	if (ret)
+		return ret;
+
+	return artpec6_crypto_dma_map_descs(common);
+}
+
+static int artpec6_crypto_prepare_aead(struct aead_request *areq)
+{
+	size_t count;
+	int ret;
+	size_t input_length;
+	struct artpec6_cryptotfm_context *ctx = crypto_tfm_ctx(areq->base.tfm);
+	struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(areq);
+	struct crypto_aead *cipher = crypto_aead_reqtfm(areq);
+	struct artpec6_crypto_req_common *common = &req_ctx->common;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	u32 md_cipher_len;
+
+	artpec6_crypto_init_dma_operation(common);
+
+	/* Key */
+	if (variant == ARTPEC6_CRYPTO) {
+		ctx->key_md = FIELD_PREP(A6_CRY_MD_OPER,
+					 a6_regk_crypto_dlkey);
+	} else {
+		ctx->key_md = FIELD_PREP(A7_CRY_MD_OPER,
+					 a7_regk_crypto_dlkey);
+	}
+	ret = artpec6_crypto_setup_out_descr(common, (void *)&ctx->key_md,
+					     sizeof(ctx->key_md), false, false);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_setup_out_descr(common, ctx->aes_key,
+					     ctx->key_length, true, false);
+	if (ret)
+		return ret;
+
+	req_ctx->cipher_md = 0;
+
+	switch (ctx->key_length) {
+	case 16:
+		md_cipher_len = regk_crypto_key_128;
+		break;
+	case 24:
+		md_cipher_len = regk_crypto_key_192;
+		break;
+	case 32:
+		md_cipher_len = regk_crypto_key_256;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (variant == ARTPEC6_CRYPTO) {
+		req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_OPER,
+						 regk_crypto_aes_gcm);
+		req_ctx->cipher_md |= FIELD_PREP(A6_CRY_MD_CIPHER_LEN,
+						 md_cipher_len);
+		if (req_ctx->decrypt)
+			req_ctx->cipher_md |= A6_CRY_MD_CIPHER_DECR;
+	} else {
+		req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_OPER,
+						 regk_crypto_aes_gcm);
+		req_ctx->cipher_md |= FIELD_PREP(A7_CRY_MD_CIPHER_LEN,
+						 md_cipher_len);
+		if (req_ctx->decrypt)
+			req_ctx->cipher_md |= A7_CRY_MD_CIPHER_DECR;
+	}
+
+	ret = artpec6_crypto_setup_out_descr(common,
+					    (void *) &req_ctx->cipher_md,
+					    sizeof(req_ctx->cipher_md), false,
+					    false);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_setup_in_descr(common, ac->pad_buffer, 4, false);
+	if (ret)
+		return ret;
+
+	/* For the decryption, cryptlen includes the tag. */
+	input_length = areq->cryptlen;
+	if (req_ctx->decrypt)
+		input_length -= AES_BLOCK_SIZE;
+
+	/* Prepare the context buffer */
+	req_ctx->hw_ctx.aad_length_bits =
+		__cpu_to_be64(8*areq->assoclen);
+
+	req_ctx->hw_ctx.text_length_bits =
+		__cpu_to_be64(8*input_length);
+
+	memcpy(req_ctx->hw_ctx.J0, areq->iv, crypto_aead_ivsize(cipher));
+	// The HW omits the initial increment of the counter field.
+	crypto_inc(req_ctx->hw_ctx.J0+12, 4);
+
+	ret = artpec6_crypto_setup_out_descr(common, &req_ctx->hw_ctx,
+		sizeof(struct artpec6_crypto_aead_hw_ctx), false, false);
+	if (ret)
+		return ret;
+
+	{
+		struct artpec6_crypto_walk walk;
+
+		artpec6_crypto_walk_init(&walk, areq->src);
+
+		/* Associated data */
+		count = areq->assoclen;
+		ret = artpec6_crypto_setup_sg_descrs_out(common, &walk, count);
+		if (ret)
+			return ret;
+
+		if (!IS_ALIGNED(areq->assoclen, 16)) {
+			size_t assoc_pad = 16 - (areq->assoclen % 16);
+			/* The HW mandates zero padding here */
+			ret = artpec6_crypto_setup_out_descr(common,
+							     ac->zero_buffer,
+							     assoc_pad, false,
+							     false);
+			if (ret)
+				return ret;
+		}
+
+		/* Data to crypto */
+		count = input_length;
+		ret = artpec6_crypto_setup_sg_descrs_out(common, &walk, count);
+		if (ret)
+			return ret;
+
+		if (!IS_ALIGNED(input_length, 16)) {
+			size_t crypto_pad = 16 - (input_length % 16);
+			/* The HW mandates zero padding here */
+			ret = artpec6_crypto_setup_out_descr(common,
+							     ac->zero_buffer,
+							     crypto_pad,
+							     false,
+							     false);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/* Data from crypto */
+	{
+		struct artpec6_crypto_walk walk;
+		size_t output_len = areq->cryptlen;
+
+		if (req_ctx->decrypt)
+			output_len -= AES_BLOCK_SIZE;
+
+		artpec6_crypto_walk_init(&walk, areq->dst);
+
+		/* skip associated data in the output */
+		count = artpec6_crypto_walk_advance(&walk, areq->assoclen);
+		if (count)
+			return -EINVAL;
+
+		count = output_len;
+		ret = artpec6_crypto_setup_sg_descrs_in(common, &walk, count);
+		if (ret)
+			return ret;
+
+		/* Put padding between the cryptotext and the auth tag */
+		if (!IS_ALIGNED(output_len, 16)) {
+			size_t crypto_pad = 16 - (output_len % 16);
+
+			ret = artpec6_crypto_setup_in_descr(common,
+							    ac->pad_buffer,
+							    crypto_pad, false);
+			if (ret)
+				return ret;
+		}
+
+		/* The authentication tag shall follow immediately after
+		 * the output ciphertext. For decryption it is put in a context
+		 * buffer for later compare against the input tag.
+		 */
+		count = AES_BLOCK_SIZE;
+
+		if (req_ctx->decrypt) {
+			ret = artpec6_crypto_setup_in_descr(common,
+				req_ctx->decryption_tag, count, false);
+			if (ret)
+				return ret;
+
+		} else {
+			ret = artpec6_crypto_setup_sg_descrs_in(common, &walk,
+								count);
+			if (ret)
+				return ret;
+		}
+
+	}
+
+	ret = artpec6_crypto_terminate_in_descrs(common);
+	if (ret)
+		return ret;
+
+	ret = artpec6_crypto_terminate_out_descrs(common);
+	if (ret)
+		return ret;
+
+	return artpec6_crypto_dma_map_descs(common);
+}
+
+static void artpec6_crypto_process_queue(struct artpec6_crypto *ac)
+{
+	struct artpec6_crypto_req_common *req;
+
+	while (!list_empty(&ac->queue) && !artpec6_crypto_busy()) {
+		req = list_first_entry(&ac->queue,
+				       struct artpec6_crypto_req_common,
+				       list);
+		list_move_tail(&req->list, &ac->pending);
+		artpec6_crypto_start_dma(req);
+
+		req->req->complete(req->req, -EINPROGRESS);
+	}
+
+	/*
+	 * In some cases, the hardware can raise an in_eop_flush interrupt
+	 * before actually updating the status, so we have an timer which will
+	 * recheck the status on timeout.  Since the cases are expected to be
+	 * very rare, we use a relatively large timeout value.  There should be
+	 * no noticeable negative effect if we timeout spuriously.
+	 */
+	if (ac->pending_count)
+		mod_timer(&ac->timer, jiffies + msecs_to_jiffies(100));
+	else
+		del_timer(&ac->timer);
+}
+
+static void artpec6_crypto_timeout(unsigned long data)
+{
+	struct artpec6_crypto *ac = (struct artpec6_crypto *) data;
+
+	dev_info_ratelimited(artpec6_crypto_dev, "timeout\n");
+
+	tasklet_schedule(&ac->task);
+}
+
+static void artpec6_crypto_task(unsigned long data)
+{
+	struct artpec6_crypto *ac = (struct artpec6_crypto *)data;
+	struct artpec6_crypto_req_common *req;
+	struct artpec6_crypto_req_common *n;
+
+	if (list_empty(&ac->pending)) {
+		pr_debug("Spurious IRQ\n");
+		return;
+	}
+
+	spin_lock_bh(&ac->queue_lock);
+
+	list_for_each_entry_safe(req, n, &ac->pending, list) {
+		struct artpec6_crypto_dma_descriptors *dma = req->dma;
+		u32 stat;
+
+		dma_sync_single_for_cpu(artpec6_crypto_dev, dma->stat_dma_addr,
+					sizeof(dma->stat[0]),
+					DMA_BIDIRECTIONAL);
+
+		stat = req->dma->stat[req->dma->in_cnt-1];
+
+		/* A non-zero final status descriptor indicates
+		 * this job has finished.
+		 */
+		pr_debug("Request %p status is %X\n", req, stat);
+		if (!stat)
+			break;
+
+		/* Allow testing of timeout handling with fault injection */
+#ifdef CONFIG_FAULT_INJECTION
+		if (should_fail(&artpec6_crypto_fail_status_read, 1))
+			continue;
+#endif
+
+		pr_debug("Completing request %p\n", req);
+
+		list_del(&req->list);
+
+		artpec6_crypto_dma_unmap_all(req);
+		artpec6_crypto_copy_bounce_buffers(req);
+
+		ac->pending_count--;
+		artpec6_crypto_common_destroy(req);
+		req->complete(req->req);
+	}
+
+	artpec6_crypto_process_queue(ac);
+
+	spin_unlock_bh(&ac->queue_lock);
+}
+
+static void artpec6_crypto_complete_crypto(struct crypto_async_request *req)
+{
+	req->complete(req, 0);
+}
+
+static void
+artpec6_crypto_complete_cbc_decrypt(struct crypto_async_request *req)
+{
+	struct skcipher_request *cipher_req = container_of(req,
+		struct skcipher_request, base);
+
+	scatterwalk_map_and_copy(cipher_req->iv, cipher_req->src,
+				 cipher_req->cryptlen - AES_BLOCK_SIZE,
+				 AES_BLOCK_SIZE, 0);
+	req->complete(req, 0);
+}
+
+static void
+artpec6_crypto_complete_cbc_encrypt(struct crypto_async_request *req)
+{
+	struct skcipher_request *cipher_req = container_of(req,
+		struct skcipher_request, base);
+
+	scatterwalk_map_and_copy(cipher_req->iv, cipher_req->dst,
+				 cipher_req->cryptlen - AES_BLOCK_SIZE,
+				 AES_BLOCK_SIZE, 0);
+	req->complete(req, 0);
+}
+
+static void artpec6_crypto_complete_aead(struct crypto_async_request *req)
+{
+	int result = 0;
+
+	/* Verify GCM hashtag. */
+	struct aead_request *areq = container_of(req,
+		struct aead_request, base);
+	struct artpec6_crypto_aead_req_ctx *req_ctx = aead_request_ctx(areq);
+
+	if (req_ctx->decrypt) {
+		u8 input_tag[AES_BLOCK_SIZE];
+
+		sg_pcopy_to_buffer(areq->src,
+				   sg_nents(areq->src),
+				   input_tag,
+				   AES_BLOCK_SIZE,
+				   areq->assoclen + areq->cryptlen -
+				   AES_BLOCK_SIZE);
+
+		if (memcmp(req_ctx->decryption_tag,
+			   input_tag,
+			   AES_BLOCK_SIZE)) {
+			pr_debug("***EBADMSG:\n");
+			print_hex_dump_debug("ref:", DUMP_PREFIX_ADDRESS, 32, 1,
+					     input_tag, AES_BLOCK_SIZE, true);
+			print_hex_dump_debug("out:", DUMP_PREFIX_ADDRESS, 32, 1,
+					     req_ctx->decryption_tag,
+					     AES_BLOCK_SIZE, true);
+
+			result = -EBADMSG;
+		}
+	}
+
+	req->complete(req, result);
+}
+
+static void artpec6_crypto_complete_hash(struct crypto_async_request *req)
+{
+	req->complete(req, 0);
+}
+
+
+/*------------------- Hash functions -----------------------------------------*/
+static int
+artpec6_crypto_hash_set_key(struct crypto_ahash *tfm,
+		    const u8 *key, unsigned int keylen)
+{
+	struct artpec6_hashalg_context *tfm_ctx = crypto_tfm_ctx(&tfm->base);
+	size_t blocksize;
+	int ret;
+
+	if (!keylen) {
+		pr_err("Invalid length (%d) of HMAC key\n",
+			keylen);
+		return -EINVAL;
+	}
+
+	memset(tfm_ctx->hmac_key, 0, sizeof(tfm_ctx->hmac_key));
+
+	blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+
+	if (keylen > blocksize) {
+		SHASH_DESC_ON_STACK(hdesc, tfm_ctx->child_hash);
+
+		hdesc->tfm = tfm_ctx->child_hash;
+		hdesc->flags = crypto_ahash_get_flags(tfm) &
+			       CRYPTO_TFM_REQ_MAY_SLEEP;
+
+		tfm_ctx->hmac_key_length = blocksize;
+		ret = crypto_shash_digest(hdesc, key, keylen,
+					  tfm_ctx->hmac_key);
+		if (ret)
+			return ret;
+
+	} else {
+		memcpy(tfm_ctx->hmac_key, key, keylen);
+		tfm_ctx->hmac_key_length = keylen;
+	}
+
+	return 0;
+}
+
+static int
+artpec6_crypto_init_hash(struct ahash_request *req, u8 type, int hmac)
+{
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+	u32 oper;
+
+	memset(req_ctx, 0, sizeof(*req_ctx));
+
+	req_ctx->hash_flags = HASH_FLAG_INIT_CTX;
+	if (hmac)
+		req_ctx->hash_flags |= (HASH_FLAG_HMAC | HASH_FLAG_UPDATE_KEY);
+
+	switch (type) {
+	case ARTPEC6_CRYPTO_HASH_SHA1:
+		oper = hmac ? regk_crypto_hmac_sha1 : regk_crypto_sha1;
+		break;
+	case ARTPEC6_CRYPTO_HASH_SHA256:
+		oper = hmac ? regk_crypto_hmac_sha256 : regk_crypto_sha256;
+		break;
+	case ARTPEC6_CRYPTO_HASH_SHA384:
+		oper = hmac ? regk_crypto_hmac_sha384 : regk_crypto_sha384;
+		break;
+	case ARTPEC6_CRYPTO_HASH_SHA512:
+		oper = hmac ? regk_crypto_hmac_sha512 : regk_crypto_sha512;
+		break;
+
+	default:
+		pr_err("%s: Unsupported hash type 0x%x\n", MODULE_NAME, type);
+		return -EINVAL;
+	}
+
+	if (variant == ARTPEC6_CRYPTO)
+		req_ctx->hash_md = FIELD_PREP(A6_CRY_MD_OPER, oper);
+	else
+		req_ctx->hash_md = FIELD_PREP(A7_CRY_MD_OPER, oper);
+
+	return 0;
+}
+
+static int artpec6_crypto_prepare_submit_hash(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+	int ret;
+
+	if (!req_ctx->common.dma) {
+		ret = artpec6_crypto_common_init(&req_ctx->common,
+					  &req->base,
+					  artpec6_crypto_complete_hash,
+					  NULL, 0);
+
+		if (ret)
+			return ret;
+	}
+
+	ret = artpec6_crypto_prepare_hash(req);
+	switch (ret) {
+	case ARTPEC6_CRYPTO_PREPARE_HASH_START:
+		ret = artpec6_crypto_submit(&req_ctx->common);
+		break;
+
+	case ARTPEC6_CRYPTO_PREPARE_HASH_NO_START:
+		ret = 0;
+		/* Fallthrough */
+
+	default:
+		artpec6_crypto_common_destroy(&req_ctx->common);
+		break;
+	}
+
+	return ret;
+}
+
+static int artpec6_crypto_hash_final(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	req_ctx->hash_flags |= HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_hash_update(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_sha1_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA1, 0);
+}
+
+static int artpec6_crypto_sha1_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA1, 0);
+
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_sha256_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 0);
+}
+
+static int artpec6_crypto_sha256_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 0);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int __maybe_unused artpec6_crypto_sha384_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 0);
+}
+
+static int __maybe_unused
+artpec6_crypto_sha384_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 0);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_sha512_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 0);
+}
+
+static int artpec6_crypto_sha512_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 0);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_hmac_sha256_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 1);
+}
+
+static int __maybe_unused
+artpec6_crypto_hmac_sha384_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 1);
+}
+
+static int artpec6_crypto_hmac_sha512_init(struct ahash_request *req)
+{
+	return artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 1);
+}
+
+static int artpec6_crypto_hmac_sha256_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA256, 1);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int __maybe_unused
+artpec6_crypto_hmac_sha384_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA384, 1);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_hmac_sha512_digest(struct ahash_request *req)
+{
+	struct artpec6_hash_request_context *req_ctx = ahash_request_ctx(req);
+
+	artpec6_crypto_init_hash(req, ARTPEC6_CRYPTO_HASH_SHA512, 1);
+	req_ctx->hash_flags |= HASH_FLAG_UPDATE | HASH_FLAG_FINALIZE;
+
+	return artpec6_crypto_prepare_submit_hash(req);
+}
+
+static int artpec6_crypto_ahash_init_common(struct crypto_tfm *tfm,
+				    const char *base_hash_name)
+{
+	struct artpec6_hashalg_context *tfm_ctx = crypto_tfm_ctx(tfm);
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct artpec6_hash_request_context));
+	memset(tfm_ctx, 0, sizeof(*tfm_ctx));
+
+	if (base_hash_name) {
+		struct crypto_shash *child;
+
+		child = crypto_alloc_shash(base_hash_name, 0,
+					   CRYPTO_ALG_NEED_FALLBACK);
+
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+
+		tfm_ctx->child_hash = child;
+	}
+
+	return 0;
+}
+
+static int artpec6_crypto_ahash_init(struct crypto_tfm *tfm)
+{
+	return artpec6_crypto_ahash_init_common(tfm, NULL);
+}
+
+static int artpec6_crypto_ahash_init_hmac_sha256(struct crypto_tfm *tfm)
+{
+	return artpec6_crypto_ahash_init_common(tfm, "sha256");
+}
+
+static int __maybe_unused
+artpec6_crypto_ahash_init_hmac_sha384(struct crypto_tfm *tfm)
+{
+	return artpec6_crypto_ahash_init_common(tfm, "sha384");
+}
+
+static int artpec6_crypto_ahash_init_hmac_sha512(struct crypto_tfm *tfm)
+{
+	return artpec6_crypto_ahash_init_common(tfm, "sha512");
+}
+
+static void artpec6_crypto_ahash_exit(struct crypto_tfm *tfm)
+{
+	struct artpec6_hashalg_context *tfm_ctx = crypto_tfm_ctx(tfm);
+
+	if (tfm_ctx->child_hash)
+		crypto_free_shash(tfm_ctx->child_hash);
+
+	memset(tfm_ctx->hmac_key, 0, sizeof(tfm_ctx->hmac_key));
+	tfm_ctx->hmac_key_length = 0;
+}
+
+static int artpec6_crypto_hash_export(struct ahash_request *req, void *out)
+{
+	const struct artpec6_hash_request_context *ctx = ahash_request_ctx(req);
+	struct artpec6_hash_export_state *state = out;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+
+	BUILD_BUG_ON(sizeof(state->partial_buffer) !=
+		     sizeof(ctx->partial_buffer));
+	BUILD_BUG_ON(sizeof(state->digeststate) != sizeof(ctx->digeststate));
+
+	state->digcnt = ctx->digcnt;
+	state->partial_bytes = ctx->partial_bytes;
+	state->hash_flags = ctx->hash_flags;
+
+	if (variant == ARTPEC6_CRYPTO)
+		state->oper = FIELD_GET(A6_CRY_MD_OPER, ctx->hash_md);
+	else
+		state->oper = FIELD_GET(A7_CRY_MD_OPER, ctx->hash_md);
+
+	memcpy(state->partial_buffer, ctx->partial_buffer,
+	       sizeof(state->partial_buffer));
+	memcpy(state->digeststate, ctx->digeststate,
+	       sizeof(state->digeststate));
+
+	return 0;
+}
+
+static int artpec6_crypto_hash_import(struct ahash_request *req, const void *in)
+{
+	struct artpec6_hash_request_context *ctx = ahash_request_ctx(req);
+	const struct artpec6_hash_export_state *state = in;
+	struct artpec6_crypto *ac = dev_get_drvdata(artpec6_crypto_dev);
+	enum artpec6_crypto_variant variant = ac->variant;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	ctx->digcnt = state->digcnt;
+	ctx->partial_bytes = state->partial_bytes;
+	ctx->hash_flags = state->hash_flags;
+
+	if (variant == ARTPEC6_CRYPTO)
+		ctx->hash_md = FIELD_PREP(A6_CRY_MD_OPER, state->oper);
+	else
+		ctx->hash_md = FIELD_PREP(A7_CRY_MD_OPER, state->oper);
+
+	memcpy(ctx->partial_buffer, state->partial_buffer,
+	       sizeof(state->partial_buffer));
+	memcpy(ctx->digeststate, state->digeststate,
+	       sizeof(state->digeststate));
+
+	return 0;
+}
+
+static int init_crypto_hw(struct artpec6_crypto *ac)
+{
+	enum artpec6_crypto_variant variant = ac->variant;
+	void __iomem *base = ac->base;
+	u32 out_descr_buf_size;
+	u32 out_data_buf_size;
+	u32 in_data_buf_size;
+	u32 in_descr_buf_size;
+	u32 in_stat_buf_size;
+	u32 in, out;
+
+	/*
+	 * The PDMA unit contains 1984 bytes of internal memory for the OUT
+	 * channels and 1024 bytes for the IN channel. This is an elastic
+	 * memory used to internally store the descriptors and data. The values
+	 * ares specified in 64 byte incremements.  Trustzone buffers are not
+	 * used at this stage.
+	 */
+	out_data_buf_size = 16;  /* 1024 bytes for data */
+	out_descr_buf_size = 15; /* 960 bytes for descriptors */
+	in_data_buf_size = 8;    /* 512 bytes for data */
+	in_descr_buf_size = 4;   /* 256 bytes for descriptors */
+	in_stat_buf_size = 4;   /* 256 bytes for stat descrs */
+
+	BUILD_BUG_ON_MSG((out_data_buf_size
+				+ out_descr_buf_size) * 64 > 1984,
+			  "Invalid OUT configuration");
+
+	BUILD_BUG_ON_MSG((in_data_buf_size
+				+ in_descr_buf_size
+				+ in_stat_buf_size) * 64 > 1024,
+			  "Invalid IN configuration");
+
+	in = FIELD_PREP(PDMA_IN_BUF_CFG_DATA_BUF_SIZE, in_data_buf_size) |
+	     FIELD_PREP(PDMA_IN_BUF_CFG_DESCR_BUF_SIZE, in_descr_buf_size) |
+	     FIELD_PREP(PDMA_IN_BUF_CFG_STAT_BUF_SIZE, in_stat_buf_size);
+
+	out = FIELD_PREP(PDMA_OUT_BUF_CFG_DATA_BUF_SIZE, out_data_buf_size) |
+	      FIELD_PREP(PDMA_OUT_BUF_CFG_DESCR_BUF_SIZE, out_descr_buf_size);
+
+	writel_relaxed(out, base + PDMA_OUT_BUF_CFG);
+	writel_relaxed(PDMA_OUT_CFG_EN, base + PDMA_OUT_CFG);
+
+	if (variant == ARTPEC6_CRYPTO) {
+		writel_relaxed(in, base + A6_PDMA_IN_BUF_CFG);
+		writel_relaxed(PDMA_IN_CFG_EN, base + A6_PDMA_IN_CFG);
+		writel_relaxed(A6_PDMA_INTR_MASK_IN_DATA |
+			       A6_PDMA_INTR_MASK_IN_EOP_FLUSH,
+			       base + A6_PDMA_INTR_MASK);
+	} else {
+		writel_relaxed(in, base + A7_PDMA_IN_BUF_CFG);
+		writel_relaxed(PDMA_IN_CFG_EN, base + A7_PDMA_IN_CFG);
+		writel_relaxed(A7_PDMA_INTR_MASK_IN_DATA |
+			       A7_PDMA_INTR_MASK_IN_EOP_FLUSH,
+			       base + A7_PDMA_INTR_MASK);
+	}
+
+	return 0;
+}
+
+static void artpec6_crypto_disable_hw(struct artpec6_crypto *ac)
+{
+	enum artpec6_crypto_variant variant = ac->variant;
+	void __iomem *base = ac->base;
+
+	if (variant == ARTPEC6_CRYPTO) {
+		writel_relaxed(A6_PDMA_IN_CMD_STOP, base + A6_PDMA_IN_CMD);
+		writel_relaxed(0, base + A6_PDMA_IN_CFG);
+		writel_relaxed(A6_PDMA_OUT_CMD_STOP, base + PDMA_OUT_CMD);
+	} else {
+		writel_relaxed(A7_PDMA_IN_CMD_STOP, base + A7_PDMA_IN_CMD);
+		writel_relaxed(0, base + A7_PDMA_IN_CFG);
+		writel_relaxed(A7_PDMA_OUT_CMD_STOP, base + PDMA_OUT_CMD);
+	}
+
+	writel_relaxed(0, base + PDMA_OUT_CFG);
+
+}
+
+static irqreturn_t artpec6_crypto_irq(int irq, void *dev_id)
+{
+	struct artpec6_crypto *ac = dev_id;
+	enum artpec6_crypto_variant variant = ac->variant;
+	void __iomem *base = ac->base;
+	u32 mask_in_data, mask_in_eop_flush;
+	u32 in_cmd_flush_stat, in_cmd_reg;
+	u32 ack_intr_reg;
+	u32 ack = 0;
+	u32 intr;
+
+	if (variant == ARTPEC6_CRYPTO) {
+		intr = readl_relaxed(base + A6_PDMA_MASKED_INTR);
+		mask_in_data = A6_PDMA_INTR_MASK_IN_DATA;
+		mask_in_eop_flush = A6_PDMA_INTR_MASK_IN_EOP_FLUSH;
+		in_cmd_flush_stat = A6_PDMA_IN_CMD_FLUSH_STAT;
+		in_cmd_reg = A6_PDMA_IN_CMD;
+		ack_intr_reg = A6_PDMA_ACK_INTR;
+	} else {
+		intr = readl_relaxed(base + A7_PDMA_MASKED_INTR);
+		mask_in_data = A7_PDMA_INTR_MASK_IN_DATA;
+		mask_in_eop_flush = A7_PDMA_INTR_MASK_IN_EOP_FLUSH;
+		in_cmd_flush_stat = A7_PDMA_IN_CMD_FLUSH_STAT;
+		in_cmd_reg = A7_PDMA_IN_CMD;
+		ack_intr_reg = A7_PDMA_ACK_INTR;
+	}
+
+	/* We get two interrupt notifications from each job.
+	 * The in_data means all data was sent to memory and then
+	 * we request a status flush command to write the per-job
+	 * status to its status vector. This ensures that the
+	 * tasklet can detect exactly how many submitted jobs
+	 * that have finished.
+	 */
+	if (intr & mask_in_data)
+		ack |= mask_in_data;
+
+	if (intr & mask_in_eop_flush)
+		ack |= mask_in_eop_flush;
+	else
+		writel_relaxed(in_cmd_flush_stat, base + in_cmd_reg);
+
+	writel_relaxed(ack, base + ack_intr_reg);
+
+	if (intr & mask_in_eop_flush)
+		tasklet_schedule(&ac->task);
+
+	return IRQ_HANDLED;
+}
+
+/*------------------- Algorithm definitions ----------------------------------*/
+
+/* Hashes */
+static struct ahash_alg hash_algos[] = {
+	/* SHA-1 */
+	{
+		.init = artpec6_crypto_sha1_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_sha1_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.halg.digestsize = SHA1_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "sha1",
+			.cra_driver_name = "artpec-sha1",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA1_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* SHA-256 */
+	{
+		.init = artpec6_crypto_sha256_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_sha256_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.halg.digestsize = SHA256_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "sha256",
+			.cra_driver_name = "artpec-sha256",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA256_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* HMAC SHA-256 */
+	{
+		.init = artpec6_crypto_hmac_sha256_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_hmac_sha256_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.setkey = artpec6_crypto_hash_set_key,
+		.halg.digestsize = SHA256_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "hmac(sha256)",
+			.cra_driver_name = "artpec-hmac-sha256",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA256_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init_hmac_sha256,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+};
+
+static struct ahash_alg artpec7_hash_algos[] = {
+	/* SHA-384 */
+	{
+		.init = artpec6_crypto_sha384_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_sha384_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.halg.digestsize = SHA384_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "sha384",
+			.cra_driver_name = "artpec-sha384",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA384_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* HMAC SHA-384 */
+	{
+		.init = artpec6_crypto_hmac_sha384_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_hmac_sha384_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.setkey = artpec6_crypto_hash_set_key,
+		.halg.digestsize = SHA384_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "hmac(sha384)",
+			.cra_driver_name = "artpec-hmac-sha384",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA384_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init_hmac_sha384,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* SHA-512 */
+	{
+		.init = artpec6_crypto_sha512_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_sha512_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.halg.digestsize = SHA512_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "sha512",
+			.cra_driver_name = "artpec-sha512",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA512_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+	/* HMAC SHA-512 */
+	{
+		.init = artpec6_crypto_hmac_sha512_init,
+		.update = artpec6_crypto_hash_update,
+		.final = artpec6_crypto_hash_final,
+		.digest = artpec6_crypto_hmac_sha512_digest,
+		.import = artpec6_crypto_hash_import,
+		.export = artpec6_crypto_hash_export,
+		.setkey = artpec6_crypto_hash_set_key,
+		.halg.digestsize = SHA512_DIGEST_SIZE,
+		.halg.statesize = sizeof(struct artpec6_hash_export_state),
+		.halg.base = {
+			.cra_name = "hmac(sha512)",
+			.cra_driver_name = "artpec-hmac-sha512",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize = SHA512_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_hashalg_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+			.cra_init = artpec6_crypto_ahash_init_hmac_sha512,
+			.cra_exit = artpec6_crypto_ahash_exit,
+		}
+	},
+};
+
+/* Crypto */
+static struct skcipher_alg crypto_algos[] = {
+	/* AES - ECB */
+	{
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "artpec6-ecb-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.setkey = artpec6_crypto_cipher_set_key,
+		.encrypt = artpec6_crypto_encrypt,
+		.decrypt = artpec6_crypto_decrypt,
+		.init = artpec6_crypto_aes_ecb_init,
+		.exit = artpec6_crypto_aes_exit,
+	},
+	/* AES - CTR */
+	{
+		.base = {
+			.cra_name = "ctr(aes)",
+			.cra_driver_name = "artpec6-ctr-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize = AES_BLOCK_SIZE,
+		.setkey = artpec6_crypto_cipher_set_key,
+		.encrypt = artpec6_crypto_ctr_encrypt,
+		.decrypt = artpec6_crypto_ctr_decrypt,
+		.init = artpec6_crypto_aes_ctr_init,
+		.exit = artpec6_crypto_aes_ctr_exit,
+	},
+	/* AES - CBC */
+	{
+		.base = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "artpec6-cbc-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_ASYNC,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize = AES_BLOCK_SIZE,
+		.setkey = artpec6_crypto_cipher_set_key,
+		.encrypt = artpec6_crypto_encrypt,
+		.decrypt = artpec6_crypto_decrypt,
+		.init = artpec6_crypto_aes_cbc_init,
+		.exit = artpec6_crypto_aes_exit
+	},
+	/* AES - XTS */
+	{
+		.base = {
+			.cra_name = "xts(aes)",
+			.cra_driver_name = "artpec6-xts-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				     CRYPTO_ALG_ASYNC,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = 2*AES_MIN_KEY_SIZE,
+		.max_keysize = 2*AES_MAX_KEY_SIZE,
+		.ivsize = 16,
+		.setkey = artpec6_crypto_xts_set_key,
+		.encrypt = artpec6_crypto_encrypt,
+		.decrypt = artpec6_crypto_decrypt,
+		.init = artpec6_crypto_aes_xts_init,
+		.exit = artpec6_crypto_aes_exit,
+	},
+};
+
+static struct aead_alg aead_algos[] = {
+	{
+		.init   = artpec6_crypto_aead_init,
+		.setkey = artpec6_crypto_aead_set_key,
+		.encrypt = artpec6_crypto_aead_encrypt,
+		.decrypt = artpec6_crypto_aead_decrypt,
+		.ivsize = AES_BLOCK_SIZE,
+		.maxauthsize = AES_BLOCK_SIZE,
+
+		.base = {
+			.cra_name = "gcm(aes)",
+			.cra_driver_name = "artpec-gcm-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct artpec6_cryptotfm_context),
+			.cra_alignmask = 3,
+			.cra_module = THIS_MODULE,
+		},
+	}
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+struct dbgfs_u32 {
+	char *name;
+	mode_t mode;
+	u32 *flag;
+	char *desc;
+};
+
+static void artpec6_crypto_init_debugfs(void)
+{
+	dbgfs_root = debugfs_create_dir("artpec6_crypto", NULL);
+
+	if (!dbgfs_root || IS_ERR(dbgfs_root)) {
+		dbgfs_root = NULL;
+		pr_err("%s: Could not initialise debugfs!\n", MODULE_NAME);
+		return;
+	}
+
+#ifdef CONFIG_FAULT_INJECTION
+	fault_create_debugfs_attr("fail_status_read", dbgfs_root,
+				  &artpec6_crypto_fail_status_read);
+
+	fault_create_debugfs_attr("fail_dma_array_full", dbgfs_root,
+				  &artpec6_crypto_fail_dma_array_full);
+#endif
+}
+
+static void artpec6_crypto_free_debugfs(void)
+{
+	if (!dbgfs_root)
+		return;
+
+	debugfs_remove_recursive(dbgfs_root);
+	dbgfs_root = NULL;
+}
+#endif
+
+static const struct of_device_id artpec6_crypto_of_match[] = {
+	{ .compatible = "axis,artpec6-crypto", .data = (void *)ARTPEC6_CRYPTO },
+	{ .compatible = "axis,artpec7-crypto", .data = (void *)ARTPEC7_CRYPTO },
+	{}
+};
+MODULE_DEVICE_TABLE(of, artpec6_crypto_of_match);
+
+static int artpec6_crypto_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	enum artpec6_crypto_variant variant;
+	struct artpec6_crypto *ac;
+	struct device *dev = &pdev->dev;
+	void __iomem *base;
+	struct resource *res;
+	int irq;
+	int err;
+
+	if (artpec6_crypto_dev)
+		return -ENODEV;
+
+	match = of_match_node(artpec6_crypto_of_match, dev->of_node);
+	if (!match)
+		return -EINVAL;
+
+	variant = (enum artpec6_crypto_variant)match->data;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return -ENODEV;
+
+	ac = devm_kzalloc(&pdev->dev, sizeof(struct artpec6_crypto),
+			  GFP_KERNEL);
+	if (!ac)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, ac);
+	ac->variant = variant;
+
+	spin_lock_init(&ac->queue_lock);
+	INIT_LIST_HEAD(&ac->queue);
+	INIT_LIST_HEAD(&ac->pending);
+	setup_timer(&ac->timer, artpec6_crypto_timeout, (unsigned long) ac);
+
+	ac->base = base;
+
+	ac->dma_cache = kmem_cache_create("artpec6_crypto_dma",
+		sizeof(struct artpec6_crypto_dma_descriptors),
+		64,
+		0,
+		NULL);
+	if (!ac->dma_cache)
+		return -ENOMEM;
+
+#ifdef CONFIG_DEBUG_FS
+	artpec6_crypto_init_debugfs();
+#endif
+
+	tasklet_init(&ac->task, artpec6_crypto_task,
+		     (unsigned long)ac);
+
+	ac->pad_buffer = devm_kzalloc(&pdev->dev, 2 * ARTPEC_CACHE_LINE_MAX,
+				      GFP_KERNEL);
+	if (!ac->pad_buffer)
+		return -ENOMEM;
+	ac->pad_buffer = PTR_ALIGN(ac->pad_buffer, ARTPEC_CACHE_LINE_MAX);
+
+	ac->zero_buffer = devm_kzalloc(&pdev->dev, 2 * ARTPEC_CACHE_LINE_MAX,
+				      GFP_KERNEL);
+	if (!ac->zero_buffer)
+		return -ENOMEM;
+	ac->zero_buffer = PTR_ALIGN(ac->zero_buffer, ARTPEC_CACHE_LINE_MAX);
+
+	err = init_crypto_hw(ac);
+	if (err)
+		goto free_cache;
+
+	err = devm_request_irq(&pdev->dev, irq, artpec6_crypto_irq, 0,
+			       "artpec6-crypto", ac);
+	if (err)
+		goto disable_hw;
+
+	artpec6_crypto_dev = &pdev->dev;
+
+	err = crypto_register_ahashes(hash_algos, ARRAY_SIZE(hash_algos));
+	if (err) {
+		dev_err(dev, "Failed to register ahashes\n");
+		goto disable_hw;
+	}
+
+	if (variant != ARTPEC6_CRYPTO) {
+		err = crypto_register_ahashes(artpec7_hash_algos,
+					      ARRAY_SIZE(artpec7_hash_algos));
+		if (err) {
+			dev_err(dev, "Failed to register ahashes\n");
+			goto unregister_ahashes;
+		}
+	}
+
+	err = crypto_register_skciphers(crypto_algos, ARRAY_SIZE(crypto_algos));
+	if (err) {
+		dev_err(dev, "Failed to register ciphers\n");
+		goto unregister_a7_ahashes;
+	}
+
+	err = crypto_register_aeads(aead_algos, ARRAY_SIZE(aead_algos));
+	if (err) {
+		dev_err(dev, "Failed to register aeads\n");
+		goto unregister_algs;
+	}
+
+	return 0;
+
+unregister_algs:
+	crypto_unregister_skciphers(crypto_algos, ARRAY_SIZE(crypto_algos));
+unregister_a7_ahashes:
+	if (variant != ARTPEC6_CRYPTO)
+		crypto_unregister_ahashes(artpec7_hash_algos,
+					  ARRAY_SIZE(artpec7_hash_algos));
+unregister_ahashes:
+	crypto_unregister_ahashes(hash_algos, ARRAY_SIZE(hash_algos));
+disable_hw:
+	artpec6_crypto_disable_hw(ac);
+free_cache:
+	kmem_cache_destroy(ac->dma_cache);
+	return err;
+}
+
+static int artpec6_crypto_remove(struct platform_device *pdev)
+{
+	struct artpec6_crypto *ac = platform_get_drvdata(pdev);
+	int irq = platform_get_irq(pdev, 0);
+
+	crypto_unregister_ahashes(hash_algos, ARRAY_SIZE(hash_algos));
+	if (ac->variant != ARTPEC6_CRYPTO)
+		crypto_unregister_ahashes(artpec7_hash_algos,
+					  ARRAY_SIZE(artpec7_hash_algos));
+	crypto_unregister_skciphers(crypto_algos, ARRAY_SIZE(crypto_algos));
+	crypto_unregister_aeads(aead_algos, ARRAY_SIZE(aead_algos));
+
+	tasklet_disable(&ac->task);
+	devm_free_irq(&pdev->dev, irq, ac);
+	tasklet_kill(&ac->task);
+	del_timer_sync(&ac->timer);
+
+	artpec6_crypto_disable_hw(ac);
+
+	kmem_cache_destroy(ac->dma_cache);
+#ifdef CONFIG_DEBUG_FS
+	artpec6_crypto_free_debugfs();
+#endif
+	return 0;
+}
+
+static struct platform_driver artpec6_crypto_driver = {
+	.probe   = artpec6_crypto_probe,
+	.remove  = artpec6_crypto_remove,
+	.driver  = {
+		.name  = "artpec6-crypto",
+		.owner = THIS_MODULE,
+		.of_match_table = artpec6_crypto_of_match,
+	},
+};
+
+module_platform_driver(artpec6_crypto_driver);
+
+MODULE_AUTHOR("Axis Communications AB");
+MODULE_DESCRIPTION("ARTPEC-6 Crypto driver");
+MODULE_LICENSE("GPL");

+ 54 - 60
drivers/crypto/bcm/cipher.c

@@ -90,8 +90,6 @@ static int aead_pri = 150;
 module_param(aead_pri, int, 0644);
 MODULE_PARM_DESC(aead_pri, "Priority for AEAD algos");
 
-#define MAX_SPUS 16
-
 /* A type 3 BCM header, expected to precede the SPU header for SPU-M.
  * Bits 3 and 4 in the first byte encode the channel number (the dma ringset).
  * 0x60 - ring 0
@@ -120,7 +118,7 @@ static u8 select_channel(void)
 {
 	u8 chan_idx = atomic_inc_return(&iproc_priv.next_chan);
 
-	return chan_idx % iproc_priv.spu.num_spu;
+	return chan_idx % iproc_priv.spu.num_chan;
 }
 
 /**
@@ -4528,8 +4526,13 @@ static void spu_functions_register(struct device *dev,
  */
 static int spu_mb_init(struct device *dev)
 {
-	struct mbox_client *mcl = &iproc_priv.mcl[iproc_priv.spu.num_spu];
-	int err;
+	struct mbox_client *mcl = &iproc_priv.mcl;
+	int err, i;
+
+	iproc_priv.mbox = devm_kcalloc(dev, iproc_priv.spu.num_chan,
+				  sizeof(struct mbox_chan *), GFP_KERNEL);
+	if (!iproc_priv.mbox)
+		return -ENOMEM;
 
 	mcl->dev = dev;
 	mcl->tx_block = false;
@@ -4538,25 +4541,33 @@ static int spu_mb_init(struct device *dev)
 	mcl->rx_callback = spu_rx_callback;
 	mcl->tx_done = NULL;
 
-	iproc_priv.mbox[iproc_priv.spu.num_spu] =
-			mbox_request_channel(mcl, 0);
-	if (IS_ERR(iproc_priv.mbox[iproc_priv.spu.num_spu])) {
-		err = (int)PTR_ERR(iproc_priv.mbox[iproc_priv.spu.num_spu]);
-		dev_err(dev,
-			"Mbox channel %d request failed with err %d",
-			iproc_priv.spu.num_spu, err);
-		iproc_priv.mbox[iproc_priv.spu.num_spu] = NULL;
-		return err;
+	for (i = 0; i < iproc_priv.spu.num_chan; i++) {
+		iproc_priv.mbox[i] = mbox_request_channel(mcl, i);
+		if (IS_ERR(iproc_priv.mbox[i])) {
+			err = (int)PTR_ERR(iproc_priv.mbox[i]);
+			dev_err(dev,
+				"Mbox channel %d request failed with err %d",
+				i, err);
+			iproc_priv.mbox[i] = NULL;
+			goto free_channels;
+		}
 	}
 
 	return 0;
+free_channels:
+	for (i = 0; i < iproc_priv.spu.num_chan; i++) {
+		if (iproc_priv.mbox[i])
+			mbox_free_channel(iproc_priv.mbox[i]);
+	}
+
+	return err;
 }
 
 static void spu_mb_release(struct platform_device *pdev)
 {
 	int i;
 
-	for (i = 0; i < iproc_priv.spu.num_spu; i++)
+	for (i = 0; i < iproc_priv.spu.num_chan; i++)
 		mbox_free_channel(iproc_priv.mbox[i]);
 }
 
@@ -4567,7 +4578,7 @@ static void spu_counters_init(void)
 
 	atomic_set(&iproc_priv.session_count, 0);
 	atomic_set(&iproc_priv.stream_count, 0);
-	atomic_set(&iproc_priv.next_chan, (int)iproc_priv.spu.num_spu);
+	atomic_set(&iproc_priv.next_chan, (int)iproc_priv.spu.num_chan);
 	atomic64_set(&iproc_priv.bytes_in, 0);
 	atomic64_set(&iproc_priv.bytes_out, 0);
 	for (i = 0; i < SPU_OP_NUM; i++) {
@@ -4809,47 +4820,38 @@ static int spu_dt_read(struct platform_device *pdev)
 	struct resource *spu_ctrl_regs;
 	const struct of_device_id *match;
 	const struct spu_type_subtype *matched_spu_type;
-	void __iomem *spu_reg_vbase[MAX_SPUS];
-	int err;
+	struct device_node *dn = pdev->dev.of_node;
+	int err, i;
 
-	match = of_match_device(of_match_ptr(bcm_spu_dt_ids), dev);
-	matched_spu_type = match->data;
+	/* Count number of mailbox channels */
+	spu->num_chan = of_count_phandle_with_args(dn, "mboxes", "#mbox-cells");
 
-	if (iproc_priv.spu.num_spu > 1) {
-		/* If this is 2nd or later SPU, make sure it's same type */
-		if ((spu->spu_type != matched_spu_type->type) ||
-		    (spu->spu_subtype != matched_spu_type->subtype)) {
-			err = -EINVAL;
-			dev_err(&pdev->dev, "Multiple SPU types not allowed");
-			return err;
-		}
-	} else {
-		/* Record type of first SPU */
-		spu->spu_type = matched_spu_type->type;
-		spu->spu_subtype = matched_spu_type->subtype;
+	match = of_match_device(of_match_ptr(bcm_spu_dt_ids), dev);
+	if (!match) {
+		dev_err(&pdev->dev, "Failed to match device\n");
+		return -ENODEV;
 	}
 
-	/* Get and map SPU registers */
-	spu_ctrl_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!spu_ctrl_regs) {
-		err = -EINVAL;
-		dev_err(&pdev->dev, "Invalid/missing registers for SPU\n");
-		return err;
-	}
+	matched_spu_type = match->data;
 
-	spu_reg_vbase[iproc_priv.spu.num_spu] =
-				devm_ioremap_resource(dev, spu_ctrl_regs);
-	if (IS_ERR(spu_reg_vbase[iproc_priv.spu.num_spu])) {
-		err = PTR_ERR(spu_reg_vbase[iproc_priv.spu.num_spu]);
-		dev_err(&pdev->dev, "Failed to map registers: %d\n",
-			err);
-		spu_reg_vbase[iproc_priv.spu.num_spu] = NULL;
-		return err;
-	}
+	spu->spu_type = matched_spu_type->type;
+	spu->spu_subtype = matched_spu_type->subtype;
 
-	dev_dbg(dev, "SPU %d detected.", iproc_priv.spu.num_spu);
+	i = 0;
+	for (i = 0; (i < MAX_SPUS) && ((spu_ctrl_regs =
+		platform_get_resource(pdev, IORESOURCE_MEM, i)) != NULL); i++) {
 
-	spu->reg_vbase[iproc_priv.spu.num_spu] = spu_reg_vbase;
+		spu->reg_vbase[i] = devm_ioremap_resource(dev, spu_ctrl_regs);
+		if (IS_ERR(spu->reg_vbase[i])) {
+			err = PTR_ERR(spu->reg_vbase[i]);
+			dev_err(&pdev->dev, "Failed to map registers: %d\n",
+				err);
+			spu->reg_vbase[i] = NULL;
+			return err;
+		}
+	}
+	spu->num_spu = i;
+	dev_dbg(dev, "Device has %d SPUs", spu->num_spu);
 
 	return 0;
 }
@@ -4860,8 +4862,8 @@ int bcm_spu_probe(struct platform_device *pdev)
 	struct spu_hw *spu = &iproc_priv.spu;
 	int err = 0;
 
-	iproc_priv.pdev[iproc_priv.spu.num_spu] = pdev;
-	platform_set_drvdata(iproc_priv.pdev[iproc_priv.spu.num_spu],
+	iproc_priv.pdev  = pdev;
+	platform_set_drvdata(iproc_priv.pdev,
 			     &iproc_priv);
 
 	err = spu_dt_read(pdev);
@@ -4872,12 +4874,6 @@ int bcm_spu_probe(struct platform_device *pdev)
 	if (err < 0)
 		goto failure;
 
-	iproc_priv.spu.num_spu++;
-
-	/* If already initialized, we've just added another SPU and are done */
-	if (iproc_priv.inited)
-		return 0;
-
 	if (spu->spu_type == SPU_TYPE_SPUM)
 		iproc_priv.bcm_hdr_len = 8;
 	else if (spu->spu_type == SPU_TYPE_SPU2)
@@ -4893,8 +4889,6 @@ int bcm_spu_probe(struct platform_device *pdev)
 	if (err < 0)
 		goto fail_reg;
 
-	iproc_priv.inited = true;
-
 	return 0;
 
 fail_reg:

+ 7 - 6
drivers/crypto/bcm/cipher.h

@@ -427,10 +427,13 @@ struct spu_hw {
 
 	/* The number of SPUs on this platform */
 	u32 num_spu;
+
+	/* The number of SPU channels on this platform */
+	u32 num_chan;
 };
 
 struct device_private {
-	struct platform_device *pdev[MAX_SPUS];
+	struct platform_device *pdev;
 
 	struct spu_hw spu;
 
@@ -470,12 +473,10 @@ struct device_private {
 	/* Number of ICV check failures for AEAD messages */
 	atomic_t bad_icv;
 
-	struct mbox_client mcl[MAX_SPUS];
-	/* Array of mailbox channel pointers, one for each channel */
-	struct mbox_chan *mbox[MAX_SPUS];
+	struct mbox_client mcl;
 
-	/* Driver initialized */
-	bool inited;
+	/* Array of mailbox channel pointers, one for each channel */
+	struct mbox_chan **mbox;
 };
 
 extern struct device_private iproc_priv;

+ 15 - 51
drivers/crypto/caam/caamalg.c

@@ -81,40 +81,6 @@
 #define debug(format, arg...)
 #endif
 
-#ifdef DEBUG
-#include <linux/highmem.h>
-
-static void dbg_dump_sg(const char *level, const char *prefix_str,
-			int prefix_type, int rowsize, int groupsize,
-			struct scatterlist *sg, size_t tlen, bool ascii)
-{
-	struct scatterlist *it;
-	void *it_page;
-	size_t len;
-	void *buf;
-
-	for (it = sg; it != NULL && tlen > 0 ; it = sg_next(sg)) {
-		/*
-		 * make sure the scatterlist's page
-		 * has a valid virtual memory mapping
-		 */
-		it_page = kmap_atomic(sg_page(it));
-		if (unlikely(!it_page)) {
-			printk(KERN_ERR "dbg_dump_sg: kmap failed\n");
-			return;
-		}
-
-		buf = it_page + it->offset;
-		len = min_t(size_t, tlen, it->length);
-		print_hex_dump(level, prefix_str, prefix_type, rowsize,
-			       groupsize, buf, len, ascii);
-		tlen -= len;
-
-		kunmap_atomic(it_page);
-	}
-}
-#endif
-
 static struct list_head alg_list;
 
 struct caam_alg_entry {
@@ -898,10 +864,10 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
 	print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       edesc->src_nents > 1 ? 100 : ivsize, 1);
-	dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 #endif
+	caam_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+		     edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 
 	ablkcipher_unmap(jrdev, edesc, req);
 
@@ -937,10 +903,10 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 	print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       ivsize, 1);
-	dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 #endif
+	caam_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+		     edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 
 	ablkcipher_unmap(jrdev, edesc, req);
 
@@ -1107,10 +1073,10 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
 		       ivsize, 1);
 	pr_err("asked=%d, nbytes%d\n",
 	       (int)edesc->src_nents > 1 ? 100 : req->nbytes, req->nbytes);
-	dbg_dump_sg(KERN_ERR, "src    @"__stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		    edesc->src_nents > 1 ? 100 : req->nbytes, 1);
 #endif
+	caam_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+		     edesc->src_nents > 1 ? 100 : req->nbytes, 1);
 
 	len = desc_len(sh_desc);
 	init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
@@ -1164,10 +1130,10 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
 	print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       ivsize, 1);
-	dbg_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		    edesc->src_nents > 1 ? 100 : req->nbytes, 1);
 #endif
+	caam_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+		     edesc->src_nents > 1 ? 100 : req->nbytes, 1);
 
 	len = desc_len(sh_desc);
 	init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
@@ -1449,11 +1415,9 @@ static int aead_decrypt(struct aead_request *req)
 	u32 *desc;
 	int ret = 0;
 
-#ifdef DEBUG
-	dbg_dump_sg(KERN_ERR, "dec src@"__stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-		    req->assoclen + req->cryptlen, 1);
-#endif
+	caam_dump_sg(KERN_ERR, "dec src@" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+		     req->assoclen + req->cryptlen, 1);
 
 	/* allocate extended descriptor */
 	edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN,

+ 2 - 3
drivers/crypto/caam/caamalg_desc.c

@@ -599,7 +599,7 @@ void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
 
 	/* skip key loading if they are loaded due to sharing */
 	key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-				   JUMP_COND_SHRD | JUMP_COND_SELF);
+				   JUMP_COND_SHRD);
 	if (cdata->key_inline)
 		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
 				  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
@@ -688,8 +688,7 @@ void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
 
 	/* skip key loading if they are loaded due to sharing */
 	key_jump_cmd = append_jump(desc, JUMP_JSL |
-				   JUMP_TEST_ALL | JUMP_COND_SHRD |
-				   JUMP_COND_SELF);
+				   JUMP_TEST_ALL | JUMP_COND_SHRD);
 	if (cdata->key_inline)
 		append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
 				  cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);

+ 47 - 8
drivers/crypto/caam/caamalg_qi.c

@@ -12,7 +12,6 @@
 #include "intern.h"
 #include "desc_constr.h"
 #include "error.h"
-#include "sg_sw_sec4.h"
 #include "sg_sw_qm.h"
 #include "key_gen.h"
 #include "qi.h"
@@ -399,6 +398,7 @@ badkey:
  * @iv_dma: dma address of iv for checking continuity and link table
  * @qm_sg_bytes: length of dma mapped h/w link table
  * @qm_sg_dma: bus physical mapped address of h/w link table
+ * @assoclen: associated data length, in CAAM endianness
  * @assoclen_dma: bus physical mapped address of req->assoclen
  * @drv_req: driver-specific request structure
  * @sgt: the h/w link table
@@ -409,8 +409,12 @@ struct aead_edesc {
 	dma_addr_t iv_dma;
 	int qm_sg_bytes;
 	dma_addr_t qm_sg_dma;
+	unsigned int assoclen;
 	dma_addr_t assoclen_dma;
 	struct caam_drv_req drv_req;
+#define CAAM_QI_MAX_AEAD_SG						\
+	((CAAM_QI_MEMCACHE_SIZE - offsetof(struct aead_edesc, sgt)) /	\
+	 sizeof(struct qm_sg_entry))
 	struct qm_sg_entry sgt[0];
 };
 
@@ -431,6 +435,9 @@ struct ablkcipher_edesc {
 	int qm_sg_bytes;
 	dma_addr_t qm_sg_dma;
 	struct caam_drv_req drv_req;
+#define CAAM_QI_MAX_ABLKCIPHER_SG					    \
+	((CAAM_QI_MEMCACHE_SIZE - offsetof(struct ablkcipher_edesc, sgt)) / \
+	 sizeof(struct qm_sg_entry))
 	struct qm_sg_entry sgt[0];
 };
 
@@ -660,6 +667,14 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	 */
 	qm_sg_ents = 1 + !!ivsize + mapped_src_nents +
 		     (mapped_dst_nents > 1 ? mapped_dst_nents : 0);
+	if (unlikely(qm_sg_ents > CAAM_QI_MAX_AEAD_SG)) {
+		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+			qm_sg_ents, CAAM_QI_MAX_AEAD_SG);
+		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, op_type, 0, 0);
+		qi_cache_free(edesc);
+		return ERR_PTR(-ENOMEM);
+	}
 	sg_table = &edesc->sgt[0];
 	qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
 
@@ -670,7 +685,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	edesc->drv_req.cbk = aead_done;
 	edesc->drv_req.drv_ctx = drv_ctx;
 
-	edesc->assoclen_dma = dma_map_single(qidev, &req->assoclen, 4,
+	edesc->assoclen = cpu_to_caam32(req->assoclen);
+	edesc->assoclen_dma = dma_map_single(qidev, &edesc->assoclen, 4,
 					     DMA_TO_DEVICE);
 	if (dma_mapping_error(qidev, edesc->assoclen_dma)) {
 		dev_err(qidev, "unable to map assoclen\n");
@@ -776,9 +792,9 @@ static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *caam_ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *qidev = caam_ctx->qidev;
-#ifdef DEBUG
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 
+#ifdef DEBUG
 	dev_err(qidev, "%s %d: status 0x%x\n", __func__, __LINE__, status);
 #endif
 
@@ -791,14 +807,21 @@ static void ablkcipher_done(struct caam_drv_req *drv_req, u32 status)
 	print_hex_dump(KERN_ERR, "dstiv  @" __stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       edesc->src_nents > 1 ? 100 : ivsize, 1);
-	dbg_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
-		    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
+	caam_dump_sg(KERN_ERR, "dst    @" __stringify(__LINE__)": ",
+		     DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+		     edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 #endif
 
 	ablkcipher_unmap(qidev, edesc, req);
 	qi_cache_free(edesc);
 
+	/*
+	 * The crypto API expects us to set the IV (req->info) to the last
+	 * ciphertext block. This is used e.g. by the CTS mode.
+	 */
+	scatterwalk_map_and_copy(req->info, req->dst, req->nbytes - ivsize,
+				 ivsize, 0);
+
 	ablkcipher_request_complete(req, status);
 }
 
@@ -880,6 +903,15 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	}
 	dst_sg_idx = qm_sg_ents;
 
+	qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
+	if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
+		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+			qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
+		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, op_type, 0, 0);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	/* allocate space for base edesc and link tables */
 	edesc = qi_cache_alloc(GFP_DMA | flags);
 	if (unlikely(!edesc)) {
@@ -892,7 +924,6 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	edesc->src_nents = src_nents;
 	edesc->dst_nents = dst_nents;
 	edesc->iv_dma = iv_dma;
-	qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
 	sg_table = &edesc->sgt[0];
 	edesc->qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
 	edesc->drv_req.app_ctx = req;
@@ -1026,6 +1057,14 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 		qm_sg_ents += 1 + mapped_dst_nents;
 	}
 
+	if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
+		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+			qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
+		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
+			   iv_dma, ivsize, GIVENCRYPT, 0, 0);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	/* allocate space for base edesc and link tables */
 	edesc = qi_cache_alloc(GFP_DMA | flags);
 	if (!edesc) {
@@ -1968,7 +2007,7 @@ static struct caam_aead_alg driver_aeads[] = {
 				.cra_name = "echainiv(authenc(hmac(sha256),"
 					    "cbc(des)))",
 				.cra_driver_name = "echainiv-authenc-"
-						   "hmac-sha256-cbc-desi-"
+						   "hmac-sha256-cbc-des-"
 						   "caam-qi",
 				.cra_blocksize = DES_BLOCK_SIZE,
 			},

+ 3 - 4
drivers/crypto/caam/caamhash.c

@@ -791,8 +791,8 @@ static int ahash_update_ctx(struct ahash_request *req)
 							 to_hash - *buflen,
 							 *next_buflen, 0);
 		} else {
-			(edesc->sec4_sg + sec4_sg_src_index - 1)->len |=
-				cpu_to_caam32(SEC4_SG_LEN_FIN);
+			sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index -
+					    1);
 		}
 
 		desc = edesc->hw_desc;
@@ -882,8 +882,7 @@ static int ahash_final_ctx(struct ahash_request *req)
 	if (ret)
 		goto unmap_ctx;
 
-	(edesc->sec4_sg + sec4_sg_src_index - 1)->len |=
-		cpu_to_caam32(SEC4_SG_LEN_FIN);
+	sg_to_sec4_set_last(edesc->sec4_sg + sec4_sg_src_index - 1);
 
 	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 					    sec4_sg_bytes, DMA_TO_DEVICE);

+ 1 - 5
drivers/crypto/caam/caamrng.c

@@ -285,11 +285,7 @@ static int caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev)
 	if (err)
 		return err;
 
-	err = caam_init_buf(ctx, 1);
-	if (err)
-		return err;
-
-	return 0;
+	return caam_init_buf(ctx, 1);
 }
 
 static struct hwrng caam_rng = {

+ 59 - 68
drivers/crypto/caam/ctrl.c

@@ -17,6 +17,8 @@
 
 bool caam_little_end;
 EXPORT_SYMBOL(caam_little_end);
+bool caam_dpaa2;
+EXPORT_SYMBOL(caam_dpaa2);
 
 #ifdef CONFIG_CAAM_QI
 #include "qi.h"
@@ -319,8 +321,11 @@ static int caam_remove(struct platform_device *pdev)
 		caam_qi_shutdown(ctrlpriv->qidev);
 #endif
 
-	/* De-initialize RNG state handles initialized by this driver. */
-	if (ctrlpriv->rng4_sh_init)
+	/*
+	 * De-initialize RNG state handles initialized by this driver.
+	 * In case of DPAA 2.x, RNG is managed by MC firmware.
+	 */
+	if (!caam_dpaa2 && ctrlpriv->rng4_sh_init)
 		deinstantiate_rng(ctrldev, ctrlpriv->rng4_sh_init);
 
 	/* Shut down debug views */
@@ -444,7 +449,6 @@ static int caam_probe(struct platform_device *pdev)
 
 	dev = &pdev->dev;
 	dev_set_drvdata(dev, ctrlpriv);
-	ctrlpriv->pdev = pdev;
 	nprop = pdev->dev.of_node;
 
 	/* Enable clocking */
@@ -553,12 +557,17 @@ static int caam_probe(struct platform_device *pdev)
 
 	/*
 	 * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
-	 * long pointers in master configuration register
+	 * long pointers in master configuration register.
+	 * In case of DPAA 2.x, Management Complex firmware performs
+	 * the configuration.
 	 */
-	clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_LONG_PTR,
-		      MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF |
-		      MCFGR_WDENABLE | MCFGR_LARGE_BURST |
-		      (sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0));
+	caam_dpaa2 = !!(comp_params & CTPR_MS_DPAA2);
+	if (!caam_dpaa2)
+		clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_LONG_PTR,
+			      MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF |
+			      MCFGR_WDENABLE | MCFGR_LARGE_BURST |
+			      (sizeof(dma_addr_t) == sizeof(u64) ?
+			       MCFGR_LONG_PTR : 0));
 
 	/*
 	 *  Read the Compile Time paramters and SCFGR to determine
@@ -587,7 +596,9 @@ static int caam_probe(struct platform_device *pdev)
 			      JRSTART_JR3_START);
 
 	if (sizeof(dma_addr_t) == sizeof(u64)) {
-		if (of_device_is_compatible(nprop, "fsl,sec-v5.0"))
+		if (caam_dpaa2)
+			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(49));
+		else if (of_device_is_compatible(nprop, "fsl,sec-v5.0"))
 			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
 		else
 			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36));
@@ -630,11 +641,9 @@ static int caam_probe(struct platform_device *pdev)
 			ring++;
 		}
 
-	/* Check to see if QI present. If so, enable */
-	ctrlpriv->qi_present =
-			!!(rd_reg32(&ctrl->perfmon.comp_parms_ms) &
-			   CTPR_MS_QI_MASK);
-	if (ctrlpriv->qi_present) {
+	/* Check to see if (DPAA 1.x) QI present. If so, enable */
+	ctrlpriv->qi_present = !!(comp_params & CTPR_MS_QI_MASK);
+	if (ctrlpriv->qi_present && !caam_dpaa2) {
 		ctrlpriv->qi = (struct caam_queue_if __iomem __force *)
 			       ((__force uint8_t *)ctrl +
 				 BLOCK_OFFSET * QI_BLOCK_NUMBER
@@ -662,8 +671,10 @@ static int caam_probe(struct platform_device *pdev)
 	/*
 	 * If SEC has RNG version >= 4 and RNG state handle has not been
 	 * already instantiated, do RNG instantiation
+	 * In case of DPAA 2.x, RNG is managed by MC firmware.
 	 */
-	if ((cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) {
+	if (!caam_dpaa2 &&
+	    (cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) {
 		ctrlpriv->rng4_sh_init =
 			rd_reg32(&ctrl->r4tst[0].rdsta);
 		/*
@@ -731,63 +742,43 @@ static int caam_probe(struct platform_device *pdev)
 	/* Report "alive" for developer to see */
 	dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id,
 		 caam_get_era());
-	dev_info(dev, "job rings = %d, qi = %d\n",
-		 ctrlpriv->total_jobrs, ctrlpriv->qi_present);
+	dev_info(dev, "job rings = %d, qi = %d, dpaa2 = %s\n",
+		 ctrlpriv->total_jobrs, ctrlpriv->qi_present,
+		 caam_dpaa2 ? "yes" : "no");
 
 #ifdef CONFIG_DEBUG_FS
-
-	ctrlpriv->ctl_rq_dequeued =
-		debugfs_create_file("rq_dequeued",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->req_dequeued,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ob_enc_req =
-		debugfs_create_file("ob_rq_encrypted",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ob_enc_req,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ib_dec_req =
-		debugfs_create_file("ib_rq_decrypted",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ib_dec_req,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ob_enc_bytes =
-		debugfs_create_file("ob_bytes_encrypted",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ob_enc_bytes,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ob_prot_bytes =
-		debugfs_create_file("ob_bytes_protected",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ob_prot_bytes,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ib_dec_bytes =
-		debugfs_create_file("ib_bytes_decrypted",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ib_dec_bytes,
-				    &caam_fops_u64_ro);
-	ctrlpriv->ctl_ib_valid_bytes =
-		debugfs_create_file("ib_bytes_validated",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->ib_valid_bytes,
-				    &caam_fops_u64_ro);
+	debugfs_create_file("rq_dequeued", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->req_dequeued,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ob_rq_encrypted", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ob_enc_req,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ib_rq_decrypted", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ib_dec_req,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ob_bytes_encrypted", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ob_enc_bytes,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ob_bytes_protected", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ob_prot_bytes,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ib_bytes_decrypted", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ib_dec_bytes,
+			    &caam_fops_u64_ro);
+	debugfs_create_file("ib_bytes_validated", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->ib_valid_bytes,
+			    &caam_fops_u64_ro);
 
 	/* Controller level - global status values */
-	ctrlpriv->ctl_faultaddr =
-		debugfs_create_file("fault_addr",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->faultaddr,
-				    &caam_fops_u32_ro);
-	ctrlpriv->ctl_faultdetail =
-		debugfs_create_file("fault_detail",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->faultdetail,
-				    &caam_fops_u32_ro);
-	ctrlpriv->ctl_faultstatus =
-		debugfs_create_file("fault_status",
-				    S_IRUSR | S_IRGRP | S_IROTH,
-				    ctrlpriv->ctl, &perfmon->status,
-				    &caam_fops_u32_ro);
+	debugfs_create_file("fault_addr", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->faultaddr,
+			    &caam_fops_u32_ro);
+	debugfs_create_file("fault_detail", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->faultdetail,
+			    &caam_fops_u32_ro);
+	debugfs_create_file("fault_status", S_IRUSR | S_IRGRP | S_IROTH,
+			    ctrlpriv->ctl, &perfmon->status,
+			    &caam_fops_u32_ro);
 
 	/* Internal covering keys (useful in non-secure mode only) */
 	ctrlpriv->ctl_kek_wrap.data = (__force void *)&ctrlpriv->ctrl->kek[0];

+ 2 - 0
drivers/crypto/caam/ctrl.h

@@ -10,4 +10,6 @@
 /* Prototypes for backend-level services exposed to APIs */
 int caam_get_era(void);
 
+extern bool caam_dpaa2;
+
 #endif /* CTRL_H */

+ 40 - 0
drivers/crypto/caam/error.c

@@ -9,6 +9,46 @@
 #include "desc.h"
 #include "error.h"
 
+#ifdef DEBUG
+#include <linux/highmem.h>
+
+void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type,
+		  int rowsize, int groupsize, struct scatterlist *sg,
+		  size_t tlen, bool ascii)
+{
+	struct scatterlist *it;
+	void *it_page;
+	size_t len;
+	void *buf;
+
+	for (it = sg; it && tlen > 0 ; it = sg_next(sg)) {
+		/*
+		 * make sure the scatterlist's page
+		 * has a valid virtual memory mapping
+		 */
+		it_page = kmap_atomic(sg_page(it));
+		if (unlikely(!it_page)) {
+			pr_err("caam_dump_sg: kmap failed\n");
+			return;
+		}
+
+		buf = it_page + it->offset;
+		len = min_t(size_t, tlen, it->length);
+		print_hex_dump(level, prefix_str, prefix_type, rowsize,
+			       groupsize, buf, len, ascii);
+		tlen -= len;
+
+		kunmap_atomic(it_page);
+	}
+}
+#else
+void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type,
+		  int rowsize, int groupsize, struct scatterlist *sg,
+		  size_t tlen, bool ascii)
+{}
+#endif /* DEBUG */
+EXPORT_SYMBOL(caam_dump_sg);
+
 static const struct {
 	u8 value;
 	const char *error_text;

+ 4 - 0
drivers/crypto/caam/error.h

@@ -8,4 +8,8 @@
 #define CAAM_ERROR_H
 #define CAAM_ERROR_STR_MAX 302
 void caam_jr_strstatus(struct device *jrdev, u32 status);
+
+void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type,
+		  int rowsize, int groupsize, struct scatterlist *sg,
+		  size_t tlen, bool ascii);
 #endif /* CAAM_ERROR_H */

+ 0 - 11
drivers/crypto/caam/intern.h

@@ -64,12 +64,9 @@ struct caam_drv_private_jr {
  * Driver-private storage for a single CAAM block instance
  */
 struct caam_drv_private {
-
-	struct device *dev;
 #ifdef CONFIG_CAAM_QI
 	struct device *qidev;
 #endif
-	struct platform_device *pdev;
 
 	/* Physical-presence section */
 	struct caam_ctrl __iomem *ctrl; /* controller region */
@@ -105,16 +102,8 @@ struct caam_drv_private {
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dfs_root;
 	struct dentry *ctl; /* controller dir */
-	struct dentry *ctl_rq_dequeued, *ctl_ob_enc_req, *ctl_ib_dec_req;
-	struct dentry *ctl_ob_enc_bytes, *ctl_ob_prot_bytes;
-	struct dentry *ctl_ib_dec_bytes, *ctl_ib_valid_bytes;
-	struct dentry *ctl_faultaddr, *ctl_faultdetail, *ctl_faultstatus;
-
 	struct debugfs_blob_wrapper ctl_kek_wrap, ctl_tkek_wrap, ctl_tdsk_wrap;
 	struct dentry *ctl_kek, *ctl_tkek, *ctl_tdsk;
-#ifdef CONFIG_CAAM_QI
-	struct dentry *qi_congested;
-#endif
 #endif
 };
 

+ 6 - 1
drivers/crypto/caam/jr.c

@@ -9,6 +9,7 @@
 #include <linux/of_address.h>
 
 #include "compat.h"
+#include "ctrl.h"
 #include "regs.h"
 #include "jr.h"
 #include "desc.h"
@@ -499,7 +500,11 @@ static int caam_jr_probe(struct platform_device *pdev)
 	jrpriv->rregs = (struct caam_job_ring __iomem __force *)ctrl;
 
 	if (sizeof(dma_addr_t) == sizeof(u64)) {
-		if (of_device_is_compatible(nprop, "fsl,sec-v5.0-job-ring"))
+		if (caam_dpaa2)
+			error = dma_set_mask_and_coherent(jrdev,
+							  DMA_BIT_MASK(49));
+		else if (of_device_is_compatible(nprop,
+						 "fsl,sec-v5.0-job-ring"))
 			error = dma_set_mask_and_coherent(jrdev,
 							  DMA_BIT_MASK(40));
 		else

+ 12 - 18
drivers/crypto/caam/qi.c

@@ -24,9 +24,6 @@
  */
 #define MAX_RSP_FQ_BACKLOG_PER_CPU	256
 
-/* Length of a single buffer in the QI driver memory cache */
-#define CAAM_QI_MEMCACHE_SIZE	512
-
 #define CAAM_QI_ENQUEUE_RETRIES	10000
 
 #define CAAM_NAPI_WEIGHT	63
@@ -55,6 +52,7 @@ struct caam_qi_pcpu_priv {
 } ____cacheline_aligned;
 
 static DEFINE_PER_CPU(struct caam_qi_pcpu_priv, pcpu_qipriv);
+static DEFINE_PER_CPU(int, last_cpu);
 
 /*
  * caam_qi_priv - CAAM QI backend private params
@@ -203,8 +201,8 @@ static struct qman_fq *create_caam_req_fq(struct device *qidev,
 		goto init_req_fq_fail;
 	}
 
-	dev_info(qidev, "Allocated request FQ %u for CPU %u\n", req_fq->fqid,
-		 smp_processor_id());
+	dev_dbg(qidev, "Allocated request FQ %u for CPU %u\n", req_fq->fqid,
+		smp_processor_id());
 	return req_fq;
 
 init_req_fq_fail:
@@ -277,6 +275,7 @@ empty_fq:
 		dev_err(qidev, "OOS of FQID: %u failed\n", fq->fqid);
 
 	qman_destroy_fq(fq);
+	kfree(fq);
 
 	return ret;
 }
@@ -342,8 +341,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc)
 		drv_ctx->req_fq = old_fq;
 
 		if (kill_fq(qidev, new_fq))
-			dev_warn(qidev, "New CAAM FQ: %u kill failed\n",
-				 new_fq->fqid);
+			dev_warn(qidev, "New CAAM FQ kill failed\n");
 
 		return ret;
 	}
@@ -373,10 +371,9 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc)
 		drv_ctx->req_fq = old_fq;
 
 		if (kill_fq(qidev, new_fq))
-			dev_warn(qidev, "New CAAM FQ: %u kill failed\n",
-				 new_fq->fqid);
+			dev_warn(qidev, "New CAAM FQ kill failed\n");
 	} else if (kill_fq(qidev, old_fq)) {
-		dev_warn(qidev, "Old CAAM FQ: %u kill failed\n", old_fq->fqid);
+		dev_warn(qidev, "Old CAAM FQ kill failed\n");
 	}
 
 	return 0;
@@ -392,7 +389,6 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev,
 	dma_addr_t hwdesc;
 	struct caam_drv_ctx *drv_ctx;
 	const cpumask_t *cpus = qman_affine_cpus();
-	static DEFINE_PER_CPU(int, last_cpu);
 
 	num_words = desc_len(sh_desc);
 	if (num_words > MAX_SDLEN) {
@@ -511,7 +507,6 @@ int caam_qi_shutdown(struct device *qidev)
 
 		if (kill_fq(qidev, per_cpu(pcpu_qipriv.rsp_fq, i)))
 			dev_err(qidev, "Rsp FQ kill failed, cpu: %d\n", i);
-		kfree(per_cpu(pcpu_qipriv.rsp_fq, i));
 	}
 
 	/*
@@ -646,7 +641,7 @@ static int alloc_rsp_fq_cpu(struct device *qidev, unsigned int cpu)
 
 	per_cpu(pcpu_qipriv.rsp_fq, cpu) = fq;
 
-	dev_info(qidev, "Allocated response FQ %u for CPU %u", fq->fqid, cpu);
+	dev_dbg(qidev, "Allocated response FQ %u for CPU %u", fq->fqid, cpu);
 	return 0;
 }
 
@@ -679,7 +674,7 @@ static int init_cgr(struct device *qidev)
 		return ret;
 	}
 
-	dev_info(qidev, "Congestion threshold set to %llu\n", val);
+	dev_dbg(qidev, "Congestion threshold set to %llu\n", val);
 	return 0;
 }
 
@@ -737,6 +732,7 @@ int caam_qi_init(struct platform_device *caam_pdev)
 	qi_pdev = platform_device_register_full(&qi_pdev_info);
 	if (IS_ERR(qi_pdev))
 		return PTR_ERR(qi_pdev);
+	set_dma_ops(&qi_pdev->dev, get_dma_ops(ctrldev));
 
 	ctrlpriv = dev_get_drvdata(ctrldev);
 	qidev = &qi_pdev->dev;
@@ -795,10 +791,8 @@ int caam_qi_init(struct platform_device *caam_pdev)
 	/* Done with the CGRs; restore the cpus allowed mask */
 	set_cpus_allowed_ptr(current, &old_cpumask);
 #ifdef CONFIG_DEBUG_FS
-	ctrlpriv->qi_congested = debugfs_create_file("qi_congested", 0444,
-						     ctrlpriv->ctl,
-						     &times_congested,
-						     &caam_fops_u64_ro);
+	debugfs_create_file("qi_congested", 0444, ctrlpriv->ctl,
+			    &times_congested, &caam_fops_u64_ro);
 #endif
 	dev_info(qidev, "Linux CAAM Queue I/F driver initialised\n");
 	return 0;

+ 3 - 0
drivers/crypto/caam/qi.h

@@ -39,6 +39,9 @@
  */
 #define MAX_SDLEN	((CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN) / CAAM_CMD_SZ)
 
+/* Length of a single buffer in the QI driver memory cache */
+#define CAAM_QI_MEMCACHE_SIZE	768
+
 extern bool caam_congested __read_mostly;
 
 /*

+ 1 - 0
drivers/crypto/caam/regs.h

@@ -293,6 +293,7 @@ struct caam_perfmon {
 	u32 cha_rev_ls;		/* CRNR - CHA Rev No. Least significant half*/
 #define CTPR_MS_QI_SHIFT	25
 #define CTPR_MS_QI_MASK		(0x1ull << CTPR_MS_QI_SHIFT)
+#define CTPR_MS_DPAA2		BIT(13)
 #define CTPR_MS_VIRT_EN_INCL	0x00000001
 #define CTPR_MS_VIRT_EN_POR	0x00000002
 #define CTPR_MS_PG_SZ_MASK	0x10

+ 81 - 0
drivers/crypto/caam/sg_sw_qm2.h

@@ -0,0 +1,81 @@
+/*
+ * Copyright 2015-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017 NXP
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the names of the above-listed copyright holders nor the
+ *	 names of any contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SG_SW_QM2_H_
+#define _SG_SW_QM2_H_
+
+#include "../../../drivers/staging/fsl-mc/include/dpaa2-fd.h"
+
+static inline void dma_to_qm_sg_one(struct dpaa2_sg_entry *qm_sg_ptr,
+				    dma_addr_t dma, u32 len, u16 offset)
+{
+	dpaa2_sg_set_addr(qm_sg_ptr, dma);
+	dpaa2_sg_set_format(qm_sg_ptr, dpaa2_sg_single);
+	dpaa2_sg_set_final(qm_sg_ptr, false);
+	dpaa2_sg_set_len(qm_sg_ptr, len);
+	dpaa2_sg_set_bpid(qm_sg_ptr, 0);
+	dpaa2_sg_set_offset(qm_sg_ptr, offset);
+}
+
+/*
+ * convert scatterlist to h/w link table format
+ * but does not have final bit; instead, returns last entry
+ */
+static inline struct dpaa2_sg_entry *
+sg_to_qm_sg(struct scatterlist *sg, int sg_count,
+	    struct dpaa2_sg_entry *qm_sg_ptr, u16 offset)
+{
+	while (sg_count && sg) {
+		dma_to_qm_sg_one(qm_sg_ptr, sg_dma_address(sg),
+				 sg_dma_len(sg), offset);
+		qm_sg_ptr++;
+		sg = sg_next(sg);
+		sg_count--;
+	}
+	return qm_sg_ptr - 1;
+}
+
+/*
+ * convert scatterlist to h/w link table format
+ * scatterlist must have been previously dma mapped
+ */
+static inline void sg_to_qm_sg_last(struct scatterlist *sg, int sg_count,
+				    struct dpaa2_sg_entry *qm_sg_ptr,
+				    u16 offset)
+{
+	qm_sg_ptr = sg_to_qm_sg(sg, sg_count, qm_sg_ptr, offset);
+	dpaa2_sg_set_final(qm_sg_ptr, true);
+}
+
+#endif /* _SG_SW_QM2_H_ */

+ 25 - 18
drivers/crypto/caam/sg_sw_sec4.h

@@ -5,7 +5,13 @@
  *
  */
 
+#ifndef _SG_SW_SEC4_H_
+#define _SG_SW_SEC4_H_
+
+#include "ctrl.h"
 #include "regs.h"
+#include "sg_sw_qm2.h"
+#include "../../../drivers/staging/fsl-mc/include/dpaa2-fd.h"
 
 struct sec4_sg_entry {
 	u64 ptr;
@@ -19,9 +25,15 @@ struct sec4_sg_entry {
 static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
 				      dma_addr_t dma, u32 len, u16 offset)
 {
-	sec4_sg_ptr->ptr = cpu_to_caam_dma64(dma);
-	sec4_sg_ptr->len = cpu_to_caam32(len);
-	sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK);
+	if (caam_dpaa2) {
+		dma_to_qm_sg_one((struct dpaa2_sg_entry *)sec4_sg_ptr, dma, len,
+				 offset);
+	} else {
+		sec4_sg_ptr->ptr = cpu_to_caam_dma64(dma);
+		sec4_sg_ptr->len = cpu_to_caam32(len);
+		sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset &
+							 SEC4_SG_OFFSET_MASK);
+	}
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr,
@@ -47,6 +59,14 @@ sg_to_sec4_sg(struct scatterlist *sg, int sg_count,
 	return sec4_sg_ptr - 1;
 }
 
+static inline void sg_to_sec4_set_last(struct sec4_sg_entry *sec4_sg_ptr)
+{
+	if (caam_dpaa2)
+		dpaa2_sg_set_final((struct dpaa2_sg_entry *)sec4_sg_ptr, true);
+	else
+		sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+}
+
 /*
  * convert scatterlist to h/w link table format
  * scatterlist must have been previously dma mapped
@@ -56,20 +76,7 @@ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count,
 				      u16 offset)
 {
 	sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset);
-	sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+	sg_to_sec4_set_last(sec4_sg_ptr);
 }
 
-static inline struct sec4_sg_entry *sg_to_sec4_sg_len(
-	struct scatterlist *sg, unsigned int total,
-	struct sec4_sg_entry *sec4_sg_ptr)
-{
-	do {
-		unsigned int len = min(sg_dma_len(sg), total);
-
-		dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg), len, 0);
-		sec4_sg_ptr++;
-		sg = sg_next(sg);
-		total -= len;
-	} while (total);
-	return sec4_sg_ptr - 1;
-}
+#endif /* _SG_SW_SEC4_H_ */

+ 9 - 4
drivers/crypto/cavium/cpt/cptpf_main.c

@@ -268,8 +268,10 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae)
 	mcode = &cpt->mcode[cpt->next_mc_idx];
 	memcpy(mcode->version, (u8 *)fw_entry->data, CPT_UCODE_VERSION_SZ);
 	mcode->code_size = ntohl(ucode->code_length) * 2;
-	if (!mcode->code_size)
-		return -EINVAL;
+	if (!mcode->code_size) {
+		ret = -EINVAL;
+		goto fw_release;
+	}
 
 	mcode->is_ae = is_ae;
 	mcode->core_mask = 0ULL;
@@ -280,7 +282,8 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae)
 					  &mcode->phys_base, GFP_KERNEL);
 	if (!mcode->code) {
 		dev_err(dev, "Unable to allocate space for microcode");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto fw_release;
 	}
 
 	memcpy((void *)mcode->code, (void *)(fw_entry->data + sizeof(*ucode)),
@@ -302,12 +305,14 @@ static int cpt_ucode_load_fw(struct cpt_device *cpt, const u8 *fw, bool is_ae)
 	ret = do_cpt_init(cpt, mcode);
 	if (ret) {
 		dev_err(dev, "do_cpt_init failed with ret: %d\n", ret);
-		return ret;
+		goto fw_release;
 	}
 
 	dev_info(dev, "Microcode Loaded %s\n", mcode->version);
 	mcode->is_mc_valid = 1;
 	cpt->next_mc_idx++;
+
+fw_release:
 	release_firmware(fw_entry);
 
 	return ret;

+ 3 - 1
drivers/crypto/cavium/nitrox/nitrox_main.c

@@ -513,8 +513,10 @@ static int nitrox_probe(struct pci_dev *pdev,
 	pci_set_master(pdev);
 
 	ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
-	if (!ndev)
+	if (!ndev) {
+		err = -ENOMEM;
 		goto ndev_fail;
+	}
 
 	pci_set_drvdata(pdev, ndev);
 	ndev->pdev = pdev;

+ 15 - 7
drivers/crypto/ccp/Kconfig

@@ -1,25 +1,33 @@
 config CRYPTO_DEV_CCP_DD
-	tristate "Cryptographic Coprocessor device driver"
-	depends on CRYPTO_DEV_CCP
+	tristate "Secure Processor device driver"
 	default m
+	help
+	  Provides AMD Secure Processor device driver.
+	  If you choose 'M' here, this module will be called ccp.
+
+config CRYPTO_DEV_SP_CCP
+	bool "Cryptographic Coprocessor device"
+	default y
+	depends on CRYPTO_DEV_CCP_DD
 	select HW_RANDOM
 	select DMA_ENGINE
 	select DMADEVICES
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 	help
-	  Provides the interface to use the AMD Cryptographic Coprocessor
-	  which can be used to offload encryption operations such as SHA,
-	  AES and more. If you choose 'M' here, this module will be called
-	  ccp.
+	  Provides the support for AMD Cryptographic Coprocessor (CCP) device
+	  which can be used to offload encryption operations such as SHA, AES
+	  and more.
 
 config CRYPTO_DEV_CCP_CRYPTO
 	tristate "Encryption and hashing offload support"
-	depends on CRYPTO_DEV_CCP_DD
 	default m
+	depends on CRYPTO_DEV_CCP_DD
+	depends on CRYPTO_DEV_SP_CCP
 	select CRYPTO_HASH
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AUTHENC
+	select CRYPTO_RSA
 	help
 	  Support for using the cryptographic API with the AMD Cryptographic
 	  Coprocessor. This module supports offload of SHA and AES algorithms.

+ 4 - 3
drivers/crypto/ccp/Makefile

@@ -1,12 +1,12 @@
 obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
-ccp-objs := ccp-dev.o \
+ccp-objs  := sp-dev.o sp-platform.o
+ccp-$(CONFIG_CRYPTO_DEV_SP_CCP) += ccp-dev.o \
 	    ccp-ops.o \
 	    ccp-dev-v3.o \
 	    ccp-dev-v5.o \
-	    ccp-platform.o \
 	    ccp-dmaengine.o \
 	    ccp-debugfs.o
-ccp-$(CONFIG_PCI) += ccp-pci.o
+ccp-$(CONFIG_PCI) += sp-pci.o
 
 obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
 ccp-crypto-objs := ccp-crypto-main.o \
@@ -15,4 +15,5 @@ ccp-crypto-objs := ccp-crypto-main.o \
 		   ccp-crypto-aes-xts.o \
 		   ccp-crypto-aes-galois.o \
 		   ccp-crypto-des3.o \
+		   ccp-crypto-rsa.o \
 		   ccp-crypto-sha.o

+ 1 - 1
drivers/crypto/ccp/ccp-crypto-aes-galois.c

@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) AES GCM crypto API support
  *
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
  *
  * Author: Gary R Hook <gary.hook@amd.com>
  *

+ 53 - 43
drivers/crypto/ccp/ccp-crypto-aes-xts.c

@@ -1,8 +1,9 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support
  *
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
+ * Author: Gary R Hook <gary.hook@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -15,6 +16,7 @@
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
 #include <crypto/aes.h>
+#include <crypto/xts.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 
@@ -37,46 +39,26 @@ struct ccp_unit_size_map {
 	u32 value;
 };
 
-static struct ccp_unit_size_map unit_size_map[] = {
+static struct ccp_unit_size_map xts_unit_sizes[] = {
 	{
-		.size	= 4096,
-		.value	= CCP_XTS_AES_UNIT_SIZE_4096,
-	},
-	{
-		.size	= 2048,
-		.value	= CCP_XTS_AES_UNIT_SIZE_2048,
-	},
-	{
-		.size	= 1024,
-		.value	= CCP_XTS_AES_UNIT_SIZE_1024,
+		.size   = 16,
+		.value	= CCP_XTS_AES_UNIT_SIZE_16,
 	},
 	{
-		.size	= 512,
+		.size   = 512,
 		.value	= CCP_XTS_AES_UNIT_SIZE_512,
 	},
 	{
-		.size	= 256,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
-	},
-	{
-		.size	= 128,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
-	},
-	{
-		.size	= 64,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
-	},
-	{
-		.size	= 32,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
+		.size   = 1024,
+		.value	= CCP_XTS_AES_UNIT_SIZE_1024,
 	},
 	{
-		.size	= 16,
-		.value	= CCP_XTS_AES_UNIT_SIZE_16,
+		.size   = 2048,
+		.value	= CCP_XTS_AES_UNIT_SIZE_2048,
 	},
 	{
-		.size	= 1,
-		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
+		.size   = 4096,
+		.value	= CCP_XTS_AES_UNIT_SIZE_4096,
 	},
 };
 
@@ -96,15 +78,26 @@ static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret)
 static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 			      unsigned int key_len)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
+	struct crypto_tfm *xfm = crypto_ablkcipher_tfm(tfm);
+	struct ccp_ctx *ctx = crypto_tfm_ctx(xfm);
+	unsigned int ccpversion = ccp_version();
+	int ret;
 
-	/* Only support 128-bit AES key with a 128-bit Tweak key,
-	 * otherwise use the fallback
+	ret = xts_check_key(xfm, key, key_len);
+	if (ret)
+		return ret;
+
+	/* Version 3 devices support 128-bit keys; version 5 devices can
+	 * accommodate 128- and 256-bit keys.
 	 */
 	switch (key_len) {
 	case AES_KEYSIZE_128 * 2:
 		memcpy(ctx->u.aes.key, key, key_len);
 		break;
+	case AES_KEYSIZE_256 * 2:
+		if (ccpversion > CCP_VERSION(3, 0))
+			memcpy(ctx->u.aes.key, key, key_len);
+		break;
 	}
 	ctx->u.aes.key_len = key_len / 2;
 	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
@@ -117,6 +110,8 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 {
 	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
 	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	unsigned int ccpversion = ccp_version();
+	unsigned int fallback = 0;
 	unsigned int unit;
 	u32 unit_size;
 	int ret;
@@ -130,18 +125,32 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 	if (!req->info)
 		return -EINVAL;
 
+	/* Check conditions under which the CCP can fulfill a request. The
+	 * device can handle input plaintext of a length that is a multiple
+	 * of the unit_size, bug the crypto implementation only supports
+	 * the unit_size being equal to the input length. This limits the
+	 * number of scenarios we can handle.
+	 */
 	unit_size = CCP_XTS_AES_UNIT_SIZE__LAST;
-	if (req->nbytes <= unit_size_map[0].size) {
-		for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++) {
-			if (!(req->nbytes & (unit_size_map[unit].size - 1))) {
-				unit_size = unit_size_map[unit].value;
-				break;
-			}
+	for (unit = 0; unit < ARRAY_SIZE(xts_unit_sizes); unit++) {
+		if (req->nbytes == xts_unit_sizes[unit].size) {
+			unit_size = unit;
+			break;
 		}
 	}
-
-	if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) ||
-	    (ctx->u.aes.key_len != AES_KEYSIZE_128)) {
+	/* The CCP has restrictions on block sizes. Also, a version 3 device
+	 * only supports AES-128 operations; version 5 CCPs support both
+	 * AES-128 and -256 operations.
+	 */
+	if (unit_size == CCP_XTS_AES_UNIT_SIZE__LAST)
+		fallback = 1;
+	if ((ccpversion < CCP_VERSION(5, 0)) &&
+	    (ctx->u.aes.key_len != AES_KEYSIZE_128))
+		fallback = 1;
+	if ((ctx->u.aes.key_len != AES_KEYSIZE_128) &&
+	    (ctx->u.aes.key_len != AES_KEYSIZE_256))
+		fallback = 1;
+	if (fallback) {
 		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher);
 
 		/* Use the fallback to process the request for any
@@ -164,6 +173,7 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
 	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
 	INIT_LIST_HEAD(&rctx->cmd.entry);
 	rctx->cmd.engine = CCP_ENGINE_XTS_AES_128;
+	rctx->cmd.u.xts.type = CCP_AES_TYPE_128;
 	rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT
 					   : CCP_AES_ACTION_DECRYPT;
 	rctx->cmd.u.xts.unit_size = unit_size;

+ 1 - 1
drivers/crypto/ccp/ccp-crypto-des3.c

@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) DES3 crypto API support
  *
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
  *
  * Author: Gary R Hook <ghook@amd.com>
  *

+ 20 - 1
drivers/crypto/ccp/ccp-crypto-main.c

@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) crypto API support
  *
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  *
@@ -17,6 +17,7 @@
 #include <linux/ccp.h>
 #include <linux/scatterlist.h>
 #include <crypto/internal/hash.h>
+#include <crypto/internal/akcipher.h>
 
 #include "ccp-crypto.h"
 
@@ -37,10 +38,15 @@ static unsigned int des3_disable;
 module_param(des3_disable, uint, 0444);
 MODULE_PARM_DESC(des3_disable, "Disable use of 3DES - any non-zero value");
 
+static unsigned int rsa_disable;
+module_param(rsa_disable, uint, 0444);
+MODULE_PARM_DESC(rsa_disable, "Disable use of RSA - any non-zero value");
+
 /* List heads for the supported algorithms */
 static LIST_HEAD(hash_algs);
 static LIST_HEAD(cipher_algs);
 static LIST_HEAD(aead_algs);
+static LIST_HEAD(akcipher_algs);
 
 /* For any tfm, requests for that tfm must be returned on the order
  * received.  With multiple queues available, the CCP can process more
@@ -358,6 +364,12 @@ static int ccp_register_algs(void)
 			return ret;
 	}
 
+	if (!rsa_disable) {
+		ret = ccp_register_rsa_algs(&akcipher_algs);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
@@ -366,6 +378,7 @@ static void ccp_unregister_algs(void)
 	struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp;
 	struct ccp_crypto_ablkcipher_alg *ablk_alg, *ablk_tmp;
 	struct ccp_crypto_aead *aead_alg, *aead_tmp;
+	struct ccp_crypto_akcipher_alg *akc_alg, *akc_tmp;
 
 	list_for_each_entry_safe(ahash_alg, ahash_tmp, &hash_algs, entry) {
 		crypto_unregister_ahash(&ahash_alg->alg);
@@ -384,6 +397,12 @@ static void ccp_unregister_algs(void)
 		list_del(&aead_alg->entry);
 		kfree(aead_alg);
 	}
+
+	list_for_each_entry_safe(akc_alg, akc_tmp, &akcipher_algs, entry) {
+		crypto_unregister_akcipher(&akc_alg->alg);
+		list_del(&akc_alg->entry);
+		kfree(akc_alg);
+	}
 }
 
 static int ccp_crypto_init(void)

+ 299 - 0
drivers/crypto/ccp/ccp-crypto-rsa.c

@@ -0,0 +1,299 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) RSA crypto API support
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/rsa.h>
+#include <crypto/internal/akcipher.h>
+#include <crypto/akcipher.h>
+#include <crypto/scatterwalk.h>
+
+#include "ccp-crypto.h"
+
+static inline struct akcipher_request *akcipher_request_cast(
+	struct crypto_async_request *req)
+{
+	return container_of(req, struct akcipher_request, base);
+}
+
+static inline int ccp_copy_and_save_keypart(u8 **kpbuf, unsigned int *kplen,
+					    const u8 *buf, size_t sz)
+{
+	int nskip;
+
+	for (nskip = 0; nskip < sz; nskip++)
+		if (buf[nskip])
+			break;
+	*kplen = sz - nskip;
+	*kpbuf = kzalloc(*kplen, GFP_KERNEL);
+	if (!*kpbuf)
+		return -ENOMEM;
+	memcpy(*kpbuf, buf + nskip, *kplen);
+
+	return 0;
+}
+
+static int ccp_rsa_complete(struct crypto_async_request *async_req, int ret)
+{
+	struct akcipher_request *req = akcipher_request_cast(async_req);
+	struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req);
+
+	if (ret)
+		return ret;
+
+	req->dst_len = rctx->cmd.u.rsa.key_size >> 3;
+
+	return 0;
+}
+
+static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm)
+{
+	if (ccp_version() > CCP_VERSION(3, 0))
+		return CCP5_RSA_MAXMOD;
+	else
+		return CCP_RSA_MAXMOD;
+}
+
+static int ccp_rsa_crypt(struct akcipher_request *req, bool encrypt)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req);
+	int ret = 0;
+
+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+	INIT_LIST_HEAD(&rctx->cmd.entry);
+	rctx->cmd.engine = CCP_ENGINE_RSA;
+
+	rctx->cmd.u.rsa.key_size = ctx->u.rsa.key_len; /* in bits */
+	if (encrypt) {
+		rctx->cmd.u.rsa.exp = &ctx->u.rsa.e_sg;
+		rctx->cmd.u.rsa.exp_len = ctx->u.rsa.e_len;
+	} else {
+		rctx->cmd.u.rsa.exp = &ctx->u.rsa.d_sg;
+		rctx->cmd.u.rsa.exp_len = ctx->u.rsa.d_len;
+	}
+	rctx->cmd.u.rsa.mod = &ctx->u.rsa.n_sg;
+	rctx->cmd.u.rsa.mod_len = ctx->u.rsa.n_len;
+	rctx->cmd.u.rsa.src = req->src;
+	rctx->cmd.u.rsa.src_len = req->src_len;
+	rctx->cmd.u.rsa.dst = req->dst;
+
+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+	return ret;
+}
+
+static int ccp_rsa_encrypt(struct akcipher_request *req)
+{
+	return ccp_rsa_crypt(req, true);
+}
+
+static int ccp_rsa_decrypt(struct akcipher_request *req)
+{
+	return ccp_rsa_crypt(req, false);
+}
+
+static int ccp_check_key_length(unsigned int len)
+{
+	/* In bits */
+	if (len < 8 || len > 4096)
+		return -EINVAL;
+	return 0;
+}
+
+static void ccp_rsa_free_key_bufs(struct ccp_ctx *ctx)
+{
+	/* Clean up old key data */
+	kzfree(ctx->u.rsa.e_buf);
+	ctx->u.rsa.e_buf = NULL;
+	ctx->u.rsa.e_len = 0;
+	kzfree(ctx->u.rsa.n_buf);
+	ctx->u.rsa.n_buf = NULL;
+	ctx->u.rsa.n_len = 0;
+	kzfree(ctx->u.rsa.d_buf);
+	ctx->u.rsa.d_buf = NULL;
+	ctx->u.rsa.d_len = 0;
+}
+
+static int ccp_rsa_setkey(struct crypto_akcipher *tfm, const void *key,
+			  unsigned int keylen, bool private)
+{
+	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct rsa_key raw_key;
+	int ret;
+
+	ccp_rsa_free_key_bufs(ctx);
+	memset(&raw_key, 0, sizeof(raw_key));
+
+	/* Code borrowed from crypto/rsa.c */
+	if (private)
+		ret = rsa_parse_priv_key(&raw_key, key, keylen);
+	else
+		ret = rsa_parse_pub_key(&raw_key, key, keylen);
+	if (ret)
+		goto n_key;
+
+	ret = ccp_copy_and_save_keypart(&ctx->u.rsa.n_buf, &ctx->u.rsa.n_len,
+					raw_key.n, raw_key.n_sz);
+	if (ret)
+		goto key_err;
+	sg_init_one(&ctx->u.rsa.n_sg, ctx->u.rsa.n_buf, ctx->u.rsa.n_len);
+
+	ctx->u.rsa.key_len = ctx->u.rsa.n_len << 3; /* convert to bits */
+	if (ccp_check_key_length(ctx->u.rsa.key_len)) {
+		ret = -EINVAL;
+		goto key_err;
+	}
+
+	ret = ccp_copy_and_save_keypart(&ctx->u.rsa.e_buf, &ctx->u.rsa.e_len,
+					raw_key.e, raw_key.e_sz);
+	if (ret)
+		goto key_err;
+	sg_init_one(&ctx->u.rsa.e_sg, ctx->u.rsa.e_buf, ctx->u.rsa.e_len);
+
+	if (private) {
+		ret = ccp_copy_and_save_keypart(&ctx->u.rsa.d_buf,
+						&ctx->u.rsa.d_len,
+						raw_key.d, raw_key.d_sz);
+		if (ret)
+			goto key_err;
+		sg_init_one(&ctx->u.rsa.d_sg,
+			    ctx->u.rsa.d_buf, ctx->u.rsa.d_len);
+	}
+
+	return 0;
+
+key_err:
+	ccp_rsa_free_key_bufs(ctx);
+
+n_key:
+	return ret;
+}
+
+static int ccp_rsa_setprivkey(struct crypto_akcipher *tfm, const void *key,
+			      unsigned int keylen)
+{
+	return ccp_rsa_setkey(tfm, key, keylen, true);
+}
+
+static int ccp_rsa_setpubkey(struct crypto_akcipher *tfm, const void *key,
+			     unsigned int keylen)
+{
+	return ccp_rsa_setkey(tfm, key, keylen, false);
+}
+
+static int ccp_rsa_init_tfm(struct crypto_akcipher *tfm)
+{
+	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+
+	akcipher_set_reqsize(tfm, sizeof(struct ccp_rsa_req_ctx));
+	ctx->complete = ccp_rsa_complete;
+
+	return 0;
+}
+
+static void ccp_rsa_exit_tfm(struct crypto_akcipher *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(&tfm->base);
+
+	ccp_rsa_free_key_bufs(ctx);
+}
+
+static struct akcipher_alg ccp_rsa_defaults = {
+	.encrypt = ccp_rsa_encrypt,
+	.decrypt = ccp_rsa_decrypt,
+	.sign = ccp_rsa_decrypt,
+	.verify = ccp_rsa_encrypt,
+	.set_pub_key = ccp_rsa_setpubkey,
+	.set_priv_key = ccp_rsa_setprivkey,
+	.max_size = ccp_rsa_maxsize,
+	.init = ccp_rsa_init_tfm,
+	.exit = ccp_rsa_exit_tfm,
+	.base = {
+		.cra_name = "rsa",
+		.cra_driver_name = "rsa-ccp",
+		.cra_priority = CCP_CRA_PRIORITY,
+		.cra_module = THIS_MODULE,
+		.cra_ctxsize = 2 * sizeof(struct ccp_ctx),
+	},
+};
+
+struct ccp_rsa_def {
+	unsigned int version;
+	const char *name;
+	const char *driver_name;
+	unsigned int reqsize;
+	struct akcipher_alg *alg_defaults;
+};
+
+static struct ccp_rsa_def rsa_algs[] = {
+	{
+		.version	= CCP_VERSION(3, 0),
+		.name		= "rsa",
+		.driver_name	= "rsa-ccp",
+		.reqsize	= sizeof(struct ccp_rsa_req_ctx),
+		.alg_defaults	= &ccp_rsa_defaults,
+	}
+};
+
+int ccp_register_rsa_alg(struct list_head *head, const struct ccp_rsa_def *def)
+{
+	struct ccp_crypto_akcipher_alg *ccp_alg;
+	struct akcipher_alg *alg;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ccp_alg->entry);
+
+	alg = &ccp_alg->alg;
+	*alg = *def->alg_defaults;
+	snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 def->driver_name);
+	ret = crypto_register_akcipher(alg);
+	if (ret) {
+		pr_err("%s akcipher algorithm registration error (%d)\n",
+		       alg->base.cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	return 0;
+}
+
+int ccp_register_rsa_algs(struct list_head *head)
+{
+	int i, ret;
+	unsigned int ccpversion = ccp_version();
+
+	/* Register the RSA algorithm in standard mode
+	 * This works for CCP v3 and later
+	 */
+	for (i = 0; i < ARRAY_SIZE(rsa_algs); i++) {
+		if (rsa_algs[i].version > ccpversion)
+			continue;
+		ret = ccp_register_rsa_alg(head, &rsa_algs[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}

+ 1 - 1
drivers/crypto/ccp/ccp-crypto-sha.c

@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) SHA crypto API support
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>

+ 34 - 2
drivers/crypto/ccp/ccp-crypto.h

@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) crypto API support
  *
- * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  *
@@ -24,6 +24,8 @@
 #include <crypto/ctr.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
+#include <crypto/akcipher.h>
+#include <crypto/internal/rsa.h>
 
 #define	CCP_LOG_LEVEL	KERN_INFO
 
@@ -58,6 +60,12 @@ struct ccp_crypto_ahash_alg {
 	struct ahash_alg alg;
 };
 
+struct ccp_crypto_akcipher_alg {
+	struct list_head entry;
+
+	struct akcipher_alg alg;
+};
+
 static inline struct ccp_crypto_ablkcipher_alg *
 	ccp_crypto_ablkcipher_alg(struct crypto_tfm *tfm)
 {
@@ -91,7 +99,7 @@ struct ccp_aes_ctx {
 
 	struct scatterlist key_sg;
 	unsigned int key_len;
-	u8 key[AES_MAX_KEY_SIZE];
+	u8 key[AES_MAX_KEY_SIZE * 2];
 
 	u8 nonce[CTR_RFC3686_NONCE_SIZE];
 
@@ -227,12 +235,35 @@ struct ccp_sha_exp_ctx {
 	u8 buf[MAX_SHA_BLOCK_SIZE];
 };
 
+/***** RSA related defines *****/
+
+struct ccp_rsa_ctx {
+	unsigned int key_len; /* in bits */
+	struct scatterlist e_sg;
+	u8 *e_buf;
+	unsigned int e_len;
+	struct scatterlist n_sg;
+	u8 *n_buf;
+	unsigned int n_len;
+	struct scatterlist d_sg;
+	u8 *d_buf;
+	unsigned int d_len;
+};
+
+struct ccp_rsa_req_ctx {
+	struct ccp_cmd cmd;
+};
+
+#define	CCP_RSA_MAXMOD	(4 * 1024 / 8)
+#define	CCP5_RSA_MAXMOD	(16 * 1024 / 8)
+
 /***** Common Context Structure *****/
 struct ccp_ctx {
 	int (*complete)(struct crypto_async_request *req, int ret);
 
 	union {
 		struct ccp_aes_ctx aes;
+		struct ccp_rsa_ctx rsa;
 		struct ccp_sha_ctx sha;
 		struct ccp_des3_ctx des3;
 	} u;
@@ -249,5 +280,6 @@ int ccp_register_aes_xts_algs(struct list_head *head);
 int ccp_register_aes_aeads(struct list_head *head);
 int ccp_register_sha_algs(struct list_head *head);
 int ccp_register_des3_algs(struct list_head *head);
+int ccp_register_rsa_algs(struct list_head *head);
 
 #endif

+ 10 - 5
drivers/crypto/ccp/ccp-debugfs.c

@@ -305,19 +305,19 @@ void ccp5_debugfs_setup(struct ccp_device *ccp)
 
 	ccp->debugfs_instance = debugfs_create_dir(ccp->name, ccp_debugfs_dir);
 	if (!ccp->debugfs_instance)
-		return;
+		goto err;
 
 	debugfs_info = debugfs_create_file("info", 0400,
 					   ccp->debugfs_instance, ccp,
 					   &ccp_debugfs_info_ops);
 	if (!debugfs_info)
-		return;
+		goto err;
 
 	debugfs_stats = debugfs_create_file("stats", 0600,
 					    ccp->debugfs_instance, ccp,
 					    &ccp_debugfs_stats_ops);
 	if (!debugfs_stats)
-		return;
+		goto err;
 
 	for (i = 0; i < ccp->cmd_q_count; i++) {
 		cmd_q = &ccp->cmd_q[i];
@@ -327,15 +327,20 @@ void ccp5_debugfs_setup(struct ccp_device *ccp)
 		debugfs_q_instance =
 			debugfs_create_dir(name, ccp->debugfs_instance);
 		if (!debugfs_q_instance)
-			return;
+			goto err;
 
 		debugfs_q_stats =
 			debugfs_create_file("stats", 0600,
 					    debugfs_q_instance, cmd_q,
 					    &ccp_debugfs_queue_ops);
 		if (!debugfs_q_stats)
-			return;
+			goto err;
 	}
+
+	return;
+
+err:
+	debugfs_remove_recursive(ccp->debugfs_instance);
 }
 
 void ccp5_debugfs_destroy(void)

+ 13 - 7
drivers/crypto/ccp/ccp-dev-v3.c

@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
@@ -359,8 +359,7 @@ static void ccp_irq_bh(unsigned long data)
 
 static irqreturn_t ccp_irq_handler(int irq, void *data)
 {
-	struct device *dev = data;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
+	struct ccp_device *ccp = (struct ccp_device *)data;
 
 	ccp_disable_queue_interrupts(ccp);
 	if (ccp->use_tasklet)
@@ -454,7 +453,7 @@ static int ccp_init(struct ccp_device *ccp)
 	iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG);
 
 	/* Request an irq */
-	ret = ccp->get_irq(ccp);
+	ret = sp_request_ccp_irq(ccp->sp, ccp_irq_handler, ccp->name, ccp);
 	if (ret) {
 		dev_err(dev, "unable to allocate an IRQ\n");
 		goto e_pool;
@@ -511,7 +510,7 @@ e_kthread:
 		if (ccp->cmd_q[i].kthread)
 			kthread_stop(ccp->cmd_q[i].kthread);
 
-	ccp->free_irq(ccp);
+	sp_free_ccp_irq(ccp->sp, ccp);
 
 e_pool:
 	for (i = 0; i < ccp->cmd_q_count; i++)
@@ -550,7 +549,7 @@ static void ccp_destroy(struct ccp_device *ccp)
 		if (ccp->cmd_q[i].kthread)
 			kthread_stop(ccp->cmd_q[i].kthread);
 
-	ccp->free_irq(ccp);
+	sp_free_ccp_irq(ccp->sp, ccp);
 
 	for (i = 0; i < ccp->cmd_q_count; i++)
 		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
@@ -586,10 +585,17 @@ static const struct ccp_actions ccp3_actions = {
 	.irqhandler = ccp_irq_handler,
 };
 
+const struct ccp_vdata ccpv3_platform = {
+	.version = CCP_VERSION(3, 0),
+	.setup = NULL,
+	.perform = &ccp3_actions,
+	.offset = 0,
+};
+
 const struct ccp_vdata ccpv3 = {
 	.version = CCP_VERSION(3, 0),
 	.setup = NULL,
 	.perform = &ccp3_actions,
-	.bar = 2,
 	.offset = 0x20000,
+	.rsamax = CCP_RSA_MAX_WIDTH,
 };

+ 14 - 14
drivers/crypto/ccp/ccp-dev-v5.c

@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
  *
  * Author: Gary R Hook <gary.hook@amd.com>
  *
@@ -145,6 +145,7 @@ union ccp_function {
 #define	CCP_AES_MODE(p)		((p)->aes.mode)
 #define	CCP_AES_TYPE(p)		((p)->aes.type)
 #define	CCP_XTS_SIZE(p)		((p)->aes_xts.size)
+#define	CCP_XTS_TYPE(p)		((p)->aes_xts.type)
 #define	CCP_XTS_ENCRYPT(p)	((p)->aes_xts.encrypt)
 #define	CCP_DES3_SIZE(p)	((p)->des3.size)
 #define	CCP_DES3_ENCRYPT(p)	((p)->des3.encrypt)
@@ -344,6 +345,7 @@ static int ccp5_perform_xts_aes(struct ccp_op *op)
 	CCP5_CMD_PROT(&desc) = 0;
 
 	function.raw = 0;
+	CCP_XTS_TYPE(&function) = op->u.xts.type;
 	CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
 	CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
 	CCP5_CMD_FUNCTION(&desc) = function.raw;
@@ -469,7 +471,7 @@ static int ccp5_perform_rsa(struct ccp_op *op)
 	CCP5_CMD_PROT(&desc) = 0;
 
 	function.raw = 0;
-	CCP_RSA_SIZE(&function) = op->u.rsa.mod_size >> 3;
+	CCP_RSA_SIZE(&function) = (op->u.rsa.mod_size + 7) >> 3;
 	CCP5_CMD_FUNCTION(&desc) = function.raw;
 
 	CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
@@ -484,10 +486,10 @@ static int ccp5_perform_rsa(struct ccp_op *op)
 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
 
-	/* Exponent is in LSB memory */
-	CCP5_CMD_KEY_LO(&desc) = op->sb_key * LSB_ITEM_SIZE;
-	CCP5_CMD_KEY_HI(&desc) = 0;
-	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
+	/* Key (Exponent) is in external memory */
+	CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma);
+	CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma);
+	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
 
 	return ccp5_do_cmd(&desc, op->cmd_q);
 }
@@ -769,8 +771,7 @@ static void ccp5_irq_bh(unsigned long data)
 
 static irqreturn_t ccp5_irq_handler(int irq, void *data)
 {
-	struct device *dev = data;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
+	struct ccp_device *ccp = (struct ccp_device *)data;
 
 	ccp5_disable_queue_interrupts(ccp);
 	ccp->total_interrupts++;
@@ -881,7 +882,7 @@ static int ccp5_init(struct ccp_device *ccp)
 
 	dev_dbg(dev, "Requesting an IRQ...\n");
 	/* Request an irq */
-	ret = ccp->get_irq(ccp);
+	ret = sp_request_ccp_irq(ccp->sp, ccp5_irq_handler, ccp->name, ccp);
 	if (ret) {
 		dev_err(dev, "unable to allocate an IRQ\n");
 		goto e_pool;
@@ -987,7 +988,7 @@ e_kthread:
 			kthread_stop(ccp->cmd_q[i].kthread);
 
 e_irq:
-	ccp->free_irq(ccp);
+	sp_free_ccp_irq(ccp->sp, ccp);
 
 e_pool:
 	for (i = 0; i < ccp->cmd_q_count; i++)
@@ -1037,7 +1038,7 @@ static void ccp5_destroy(struct ccp_device *ccp)
 		if (ccp->cmd_q[i].kthread)
 			kthread_stop(ccp->cmd_q[i].kthread);
 
-	ccp->free_irq(ccp);
+	sp_free_ccp_irq(ccp->sp, ccp);
 
 	for (i = 0; i < ccp->cmd_q_count; i++) {
 		cmd_q = &ccp->cmd_q[i];
@@ -1106,15 +1107,14 @@ static const struct ccp_actions ccp5_actions = {
 	.init = ccp5_init,
 	.destroy = ccp5_destroy,
 	.get_free_slots = ccp5_get_free_slots,
-	.irqhandler = ccp5_irq_handler,
 };
 
 const struct ccp_vdata ccpv5a = {
 	.version = CCP_VERSION(5, 0),
 	.setup = ccp5_config,
 	.perform = &ccp5_actions,
-	.bar = 2,
 	.offset = 0x0,
+	.rsamax = CCP5_RSA_MAX_WIDTH,
 };
 
 const struct ccp_vdata ccpv5b = {
@@ -1122,6 +1122,6 @@ const struct ccp_vdata ccpv5b = {
 	.dma_chan_attr = DMA_PRIVATE,
 	.setup = ccp5other_config,
 	.perform = &ccp5_actions,
-	.bar = 2,
 	.offset = 0x0,
+	.rsamax = CCP5_RSA_MAX_WIDTH,
 };

+ 84 - 50
drivers/crypto/ccp/ccp-dev.c

@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
@@ -11,7 +11,6 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
@@ -30,12 +29,6 @@
 
 #include "ccp-dev.h"
 
-MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
-MODULE_AUTHOR("Gary R Hook <gary.hook@amd.com>");
-MODULE_LICENSE("GPL");
-MODULE_VERSION("1.1.0");
-MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver");
-
 struct ccp_tasklet_data {
 	struct completion completion;
 	struct ccp_cmd *cmd;
@@ -111,13 +104,6 @@ static LIST_HEAD(ccp_units);
 static DEFINE_SPINLOCK(ccp_rr_lock);
 static struct ccp_device *ccp_rr;
 
-/* Ever-increasing value to produce unique unit numbers */
-static atomic_t ccp_unit_ordinal;
-static unsigned int ccp_increment_unit_ordinal(void)
-{
-	return atomic_inc_return(&ccp_unit_ordinal);
-}
-
 /**
  * ccp_add_device - add a CCP device to the list
  *
@@ -415,6 +401,7 @@ static void ccp_do_cmd_complete(unsigned long data)
 	struct ccp_cmd *cmd = tdata->cmd;
 
 	cmd->callback(cmd->data, cmd->ret);
+
 	complete(&tdata->completion);
 }
 
@@ -464,14 +451,17 @@ int ccp_cmd_queue_thread(void *data)
  *
  * @dev: device struct of the CCP
  */
-struct ccp_device *ccp_alloc_struct(struct device *dev)
+struct ccp_device *ccp_alloc_struct(struct sp_device *sp)
 {
+	struct device *dev = sp->dev;
 	struct ccp_device *ccp;
 
 	ccp = devm_kzalloc(dev, sizeof(*ccp), GFP_KERNEL);
 	if (!ccp)
 		return NULL;
 	ccp->dev = dev;
+	ccp->sp = sp;
+	ccp->axcache = sp->axcache;
 
 	INIT_LIST_HEAD(&ccp->cmd);
 	INIT_LIST_HEAD(&ccp->backlog);
@@ -486,9 +476,8 @@ struct ccp_device *ccp_alloc_struct(struct device *dev)
 	init_waitqueue_head(&ccp->sb_queue);
 	init_waitqueue_head(&ccp->suspend_queue);
 
-	ccp->ord = ccp_increment_unit_ordinal();
-	snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", ccp->ord);
-	snprintf(ccp->rngname, MAX_CCP_NAME_LEN, "ccp-%u-rng", ccp->ord);
+	snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", sp->ord);
+	snprintf(ccp->rngname, MAX_CCP_NAME_LEN, "ccp-%u-rng", sp->ord);
 
 	return ccp;
 }
@@ -538,55 +527,100 @@ bool ccp_queues_suspended(struct ccp_device *ccp)
 
 	return ccp->cmd_q_count == suspended;
 }
-#endif
 
-static int __init ccp_mod_init(void)
+int ccp_dev_suspend(struct sp_device *sp, pm_message_t state)
 {
-#ifdef CONFIG_X86
-	int ret;
+	struct ccp_device *ccp = sp->ccp_data;
+	unsigned long flags;
+	unsigned int i;
 
-	ret = ccp_pci_init();
-	if (ret)
-		return ret;
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
 
-	/* Don't leave the driver loaded if init failed */
-	if (ccp_present() != 0) {
-		ccp_pci_exit();
-		return -ENODEV;
+	ccp->suspending = 1;
+
+	/* Wake all the queue kthreads to prepare for suspend */
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		wake_up_process(ccp->cmd_q[i].kthread);
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	/* Wait for all queue kthreads to say they're done */
+	while (!ccp_queues_suspended(ccp))
+		wait_event_interruptible(ccp->suspend_queue,
+					 ccp_queues_suspended(ccp));
+
+	return 0;
+}
+
+int ccp_dev_resume(struct sp_device *sp)
+{
+	struct ccp_device *ccp = sp->ccp_data;
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	ccp->suspending = 0;
+
+	/* Wake up all the kthreads */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		ccp->cmd_q[i].suspended = 0;
+		wake_up_process(ccp->cmd_q[i].kthread);
 	}
 
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
 	return 0;
+}
 #endif
 
-#ifdef CONFIG_ARM64
+int ccp_dev_init(struct sp_device *sp)
+{
+	struct device *dev = sp->dev;
+	struct ccp_device *ccp;
 	int ret;
 
-	ret = ccp_platform_init();
+	ret = -ENOMEM;
+	ccp = ccp_alloc_struct(sp);
+	if (!ccp)
+		goto e_err;
+	sp->ccp_data = ccp;
+
+	ccp->vdata = (struct ccp_vdata *)sp->dev_vdata->ccp_vdata;
+	if (!ccp->vdata || !ccp->vdata->version) {
+		ret = -ENODEV;
+		dev_err(dev, "missing driver data\n");
+		goto e_err;
+	}
+
+	ccp->use_tasklet = sp->use_tasklet;
+
+	ccp->io_regs = sp->io_map + ccp->vdata->offset;
+	if (ccp->vdata->setup)
+		ccp->vdata->setup(ccp);
+
+	ret = ccp->vdata->perform->init(ccp);
 	if (ret)
-		return ret;
+		goto e_err;
 
-	/* Don't leave the driver loaded if init failed */
-	if (ccp_present() != 0) {
-		ccp_platform_exit();
-		return -ENODEV;
-	}
+	dev_notice(dev, "ccp enabled\n");
 
 	return 0;
-#endif
 
-	return -ENODEV;
+e_err:
+	sp->ccp_data = NULL;
+
+	dev_notice(dev, "ccp initialization failed\n");
+
+	return ret;
 }
 
-static void __exit ccp_mod_exit(void)
+void ccp_dev_destroy(struct sp_device *sp)
 {
-#ifdef CONFIG_X86
-	ccp_pci_exit();
-#endif
+	struct ccp_device *ccp = sp->ccp_data;
 
-#ifdef CONFIG_ARM64
-	ccp_platform_exit();
-#endif
-}
+	if (!ccp)
+		return;
 
-module_init(ccp_mod_init);
-module_exit(ccp_mod_exit);
+	ccp->vdata->perform->destroy(ccp);
+}

+ 9 - 21
drivers/crypto/ccp/ccp-dev.h

@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
@@ -27,6 +27,8 @@
 #include <linux/irqreturn.h>
 #include <linux/dmaengine.h>
 
+#include "sp-dev.h"
+
 #define MAX_CCP_NAME_LEN		16
 #define MAX_DMAPOOL_NAME_LEN		32
 
@@ -192,6 +194,7 @@
 #define CCP_AES_CTX_SB_COUNT		1
 
 #define CCP_XTS_AES_KEY_SB_COUNT	1
+#define CCP5_XTS_AES_KEY_SB_COUNT	2
 #define CCP_XTS_AES_CTX_SB_COUNT	1
 
 #define CCP_DES3_KEY_SB_COUNT		1
@@ -200,6 +203,7 @@
 #define CCP_SHA_SB_COUNT		1
 
 #define CCP_RSA_MAX_WIDTH		4096
+#define CCP5_RSA_MAX_WIDTH		16384
 
 #define CCP_PASSTHRU_BLOCKSIZE		256
 #define CCP_PASSTHRU_MASKSIZE		32
@@ -344,12 +348,11 @@ struct ccp_device {
 	char rngname[MAX_CCP_NAME_LEN];
 
 	struct device *dev;
+	struct sp_device *sp;
 
 	/* Bus specific device information
 	 */
 	void *dev_specific;
-	int (*get_irq)(struct ccp_device *ccp);
-	void (*free_irq)(struct ccp_device *ccp);
 	unsigned int qim;
 	unsigned int irq;
 	bool use_tasklet;
@@ -362,7 +365,6 @@ struct ccp_device {
 	 *   them.
 	 */
 	struct mutex req_mutex ____cacheline_aligned;
-	void __iomem *io_map;
 	void __iomem *io_regs;
 
 	/* Master lists that all cmds are queued on. Because there can be
@@ -497,6 +499,7 @@ struct ccp_aes_op {
 };
 
 struct ccp_xts_aes_op {
+	enum ccp_aes_type type;
 	enum ccp_aes_action action;
 	enum ccp_xts_aes_unit_size unit_size;
 };
@@ -626,18 +629,12 @@ struct ccp5_desc {
 	struct dword7 dw7;
 };
 
-int ccp_pci_init(void);
-void ccp_pci_exit(void);
-
-int ccp_platform_init(void);
-void ccp_platform_exit(void);
-
 void ccp_add_device(struct ccp_device *ccp);
 void ccp_del_device(struct ccp_device *ccp);
 
 extern void ccp_log_error(struct ccp_device *, int);
 
-struct ccp_device *ccp_alloc_struct(struct device *dev);
+struct ccp_device *ccp_alloc_struct(struct sp_device *sp);
 bool ccp_queues_suspended(struct ccp_device *ccp);
 int ccp_cmd_queue_thread(void *data);
 int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait);
@@ -669,16 +666,7 @@ struct ccp_actions {
 	irqreturn_t (*irqhandler)(int, void *);
 };
 
-/* Structure to hold CCP version-specific values */
-struct ccp_vdata {
-	const unsigned int version;
-	const unsigned int dma_chan_attr;
-	void (*setup)(struct ccp_device *);
-	const struct ccp_actions *perform;
-	const unsigned int bar;
-	const unsigned int offset;
-};
-
+extern const struct ccp_vdata ccpv3_platform;
 extern const struct ccp_vdata ccpv3;
 extern const struct ccp_vdata ccpv5a;
 extern const struct ccp_vdata ccpv5b;

+ 1 - 1
drivers/crypto/ccp/ccp-dmaengine.c

@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
  *
  * Author: Gary R Hook <gary.hook@amd.com>
  *

+ 91 - 42
drivers/crypto/ccp/ccp-ops.c

@@ -1,7 +1,7 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
@@ -168,7 +168,7 @@ static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
 
 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
 						 dir);
-		if (!wa->dma.address)
+		if (dma_mapping_error(wa->dev, wa->dma.address))
 			return -ENOMEM;
 
 		wa->dma.length = len;
@@ -1038,6 +1038,8 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 	struct ccp_op op;
 	unsigned int unit_size, dm_offset;
 	bool in_place = false;
+	unsigned int sb_count;
+	enum ccp_aes_type aestype;
 	int ret;
 
 	switch (xts->unit_size) {
@@ -1061,7 +1063,11 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 		return -EINVAL;
 	}
 
-	if (xts->key_len != AES_KEYSIZE_128)
+	if (xts->key_len == AES_KEYSIZE_128)
+		aestype = CCP_AES_TYPE_128;
+	else if (xts->key_len == AES_KEYSIZE_256)
+		aestype = CCP_AES_TYPE_256;
+	else
 		return -EINVAL;
 
 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
@@ -1083,23 +1089,44 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 	op.sb_key = cmd_q->sb_key;
 	op.sb_ctx = cmd_q->sb_ctx;
 	op.init = 1;
+	op.u.xts.type = aestype;
 	op.u.xts.action = xts->action;
 	op.u.xts.unit_size = xts->unit_size;
 
-	/* All supported key sizes fit in a single (32-byte) SB entry
-	 * and must be in little endian format. Use the 256-bit byte
-	 * swap passthru option to convert from big endian to little
-	 * endian.
+	/* A version 3 device only supports 128-bit keys, which fits into a
+	 * single SB entry. A version 5 device uses a 512-bit vector, so two
+	 * SB entries.
 	 */
+	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
+		sb_count = CCP_XTS_AES_KEY_SB_COUNT;
+	else
+		sb_count = CCP5_XTS_AES_KEY_SB_COUNT;
 	ret = ccp_init_dm_workarea(&key, cmd_q,
-				   CCP_XTS_AES_KEY_SB_COUNT * CCP_SB_BYTES,
+				   sb_count * CCP_SB_BYTES,
 				   DMA_TO_DEVICE);
 	if (ret)
 		return ret;
 
-	dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
-	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
-	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
+	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
+		/* All supported key sizes must be in little endian format.
+		 * Use the 256-bit byte swap passthru option to convert from
+		 * big endian to little endian.
+		 */
+		dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
+		ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
+		ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len);
+	} else {
+		/* Version 5 CCPs use a 512-bit space for the key: each portion
+		 * occupies 256 bits, or one entire slot, and is zero-padded.
+		 */
+		unsigned int pad;
+
+		dm_offset = CCP_SB_BYTES;
+		pad = dm_offset - xts->key_len;
+		ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len);
+		ccp_set_dm_area(&key, dm_offset + pad, xts->key, xts->key_len,
+				xts->key_len);
+	}
 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
 			     CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
@@ -1731,42 +1758,53 @@ e_ctx:
 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 {
 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
-	struct ccp_dm_workarea exp, src;
-	struct ccp_data dst;
+	struct ccp_dm_workarea exp, src, dst;
 	struct ccp_op op;
 	unsigned int sb_count, i_len, o_len;
 	int ret;
 
-	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
+	/* Check against the maximum allowable size, in bits */
+	if (rsa->key_size > cmd_q->ccp->vdata->rsamax)
 		return -EINVAL;
 
 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
 		return -EINVAL;
 
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+
 	/* The RSA modulus must precede the message being acted upon, so
 	 * it must be copied to a DMA area where the message and the
 	 * modulus can be concatenated.  Therefore the input buffer
 	 * length required is twice the output buffer length (which
-	 * must be a multiple of 256-bits).
+	 * must be a multiple of 256-bits).  Compute o_len, i_len in bytes.
+	 * Buffer sizes must be a multiple of 32 bytes; rounding up may be
+	 * required.
 	 */
-	o_len = ((rsa->key_size + 255) / 256) * 32;
+	o_len = 32 * ((rsa->key_size + 255) / 256);
 	i_len = o_len * 2;
 
-	sb_count = o_len / CCP_SB_BYTES;
-
-	memset(&op, 0, sizeof(op));
-	op.cmd_q = cmd_q;
-	op.jobid = ccp_gen_jobid(cmd_q->ccp);
-	op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q, sb_count);
-
-	if (!op.sb_key)
-		return -EIO;
+	sb_count = 0;
+	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
+		/* sb_count is the number of storage block slots required
+		 * for the modulus.
+		 */
+		sb_count = o_len / CCP_SB_BYTES;
+		op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q,
+								sb_count);
+		if (!op.sb_key)
+			return -EIO;
+	} else {
+		/* A version 5 device allows a modulus size that will not fit
+		 * in the LSB, so the command will transfer it from memory.
+		 * Set the sb key to the default, even though it's not used.
+		 */
+		op.sb_key = cmd_q->sb_key;
+	}
 
-	/* The RSA exponent may span multiple (32-byte) SB entries and must
-	 * be in little endian format. Reverse copy each 32-byte chunk
-	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
-	 * and each byte within that chunk and do not perform any byte swap
-	 * operations on the passthru operation.
+	/* The RSA exponent must be in little endian format. Reverse its
+	 * byte order.
 	 */
 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
 	if (ret)
@@ -1775,11 +1813,22 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len);
 	if (ret)
 		goto e_exp;
-	ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
-			     CCP_PASSTHRU_BYTESWAP_NOOP);
-	if (ret) {
-		cmd->engine_error = cmd_q->cmd_error;
-		goto e_exp;
+
+	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
+		/* Copy the exponent to the local storage block, using
+		 * as many 32-byte blocks as were allocated above. It's
+		 * already little endian, so no further change is required.
+		 */
+		ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
+				     CCP_PASSTHRU_BYTESWAP_NOOP);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_exp;
+		}
+	} else {
+		/* The exponent can be retrieved from memory via DMA. */
+		op.exp.u.dma.address = exp.dma.address;
+		op.exp.u.dma.offset = 0;
 	}
 
 	/* Concatenate the modulus and the message. Both the modulus and
@@ -1798,8 +1847,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		goto e_src;
 
 	/* Prepare the output area for the operation */
-	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
-			    o_len, DMA_FROM_DEVICE);
+	ret = ccp_init_dm_workarea(&dst, cmd_q, o_len, DMA_FROM_DEVICE);
 	if (ret)
 		goto e_src;
 
@@ -1807,7 +1855,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	op.src.u.dma.address = src.dma.address;
 	op.src.u.dma.offset = 0;
 	op.src.u.dma.length = i_len;
-	op.dst.u.dma.address = dst.dm_wa.dma.address;
+	op.dst.u.dma.address = dst.dma.address;
 	op.dst.u.dma.offset = 0;
 	op.dst.u.dma.length = o_len;
 
@@ -1820,10 +1868,10 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		goto e_dst;
 	}
 
-	ccp_reverse_get_dm_area(&dst.dm_wa, 0, rsa->dst, 0, rsa->mod_len);
+	ccp_reverse_get_dm_area(&dst, 0, rsa->dst, 0, rsa->mod_len);
 
 e_dst:
-	ccp_free_data(&dst, cmd_q);
+	ccp_dm_free(&dst);
 
 e_src:
 	ccp_dm_free(&src);
@@ -1832,7 +1880,8 @@ e_exp:
 	ccp_dm_free(&exp);
 
 e_sb:
-	cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
+	if (sb_count)
+		cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
 
 	return ret;
 }
@@ -1992,7 +2041,7 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
 
 	memset(&op, 0, sizeof(op));
 	op.cmd_q = cmd_q;
-	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
 
 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
 		/* Load the mask */

+ 0 - 356
drivers/crypto/ccp/ccp-pci.c

@@ -1,356 +0,0 @@
-/*
- * AMD Cryptographic Coprocessor (CCP) driver
- *
- * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
- *
- * Author: Tom Lendacky <thomas.lendacky@amd.com>
- * Author: Gary R Hook <gary.hook@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/pci.h>
-#include <linux/pci_ids.h>
-#include <linux/dma-mapping.h>
-#include <linux/kthread.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/ccp.h>
-
-#include "ccp-dev.h"
-
-#define MSIX_VECTORS			2
-
-struct ccp_msix {
-	u32 vector;
-	char name[16];
-};
-
-struct ccp_pci {
-	int msix_count;
-	struct ccp_msix msix[MSIX_VECTORS];
-};
-
-static int ccp_get_msix_irqs(struct ccp_device *ccp)
-{
-	struct ccp_pci *ccp_pci = ccp->dev_specific;
-	struct device *dev = ccp->dev;
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct msix_entry msix_entry[MSIX_VECTORS];
-	unsigned int name_len = sizeof(ccp_pci->msix[0].name) - 1;
-	int v, ret;
-
-	for (v = 0; v < ARRAY_SIZE(msix_entry); v++)
-		msix_entry[v].entry = v;
-
-	ret = pci_enable_msix_range(pdev, msix_entry, 1, v);
-	if (ret < 0)
-		return ret;
-
-	ccp_pci->msix_count = ret;
-	for (v = 0; v < ccp_pci->msix_count; v++) {
-		/* Set the interrupt names and request the irqs */
-		snprintf(ccp_pci->msix[v].name, name_len, "%s-%u",
-			 ccp->name, v);
-		ccp_pci->msix[v].vector = msix_entry[v].vector;
-		ret = request_irq(ccp_pci->msix[v].vector,
-				  ccp->vdata->perform->irqhandler,
-				  0, ccp_pci->msix[v].name, dev);
-		if (ret) {
-			dev_notice(dev, "unable to allocate MSI-X IRQ (%d)\n",
-				   ret);
-			goto e_irq;
-		}
-	}
-	ccp->use_tasklet = true;
-
-	return 0;
-
-e_irq:
-	while (v--)
-		free_irq(ccp_pci->msix[v].vector, dev);
-
-	pci_disable_msix(pdev);
-
-	ccp_pci->msix_count = 0;
-
-	return ret;
-}
-
-static int ccp_get_msi_irq(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	struct pci_dev *pdev = to_pci_dev(dev);
-	int ret;
-
-	ret = pci_enable_msi(pdev);
-	if (ret)
-		return ret;
-
-	ccp->irq = pdev->irq;
-	ret = request_irq(ccp->irq, ccp->vdata->perform->irqhandler, 0,
-			  ccp->name, dev);
-	if (ret) {
-		dev_notice(dev, "unable to allocate MSI IRQ (%d)\n", ret);
-		goto e_msi;
-	}
-	ccp->use_tasklet = true;
-
-	return 0;
-
-e_msi:
-	pci_disable_msi(pdev);
-
-	return ret;
-}
-
-static int ccp_get_irqs(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	int ret;
-
-	ret = ccp_get_msix_irqs(ccp);
-	if (!ret)
-		return 0;
-
-	/* Couldn't get MSI-X vectors, try MSI */
-	dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
-	ret = ccp_get_msi_irq(ccp);
-	if (!ret)
-		return 0;
-
-	/* Couldn't get MSI interrupt */
-	dev_notice(dev, "could not enable MSI (%d)\n", ret);
-
-	return ret;
-}
-
-static void ccp_free_irqs(struct ccp_device *ccp)
-{
-	struct ccp_pci *ccp_pci = ccp->dev_specific;
-	struct device *dev = ccp->dev;
-	struct pci_dev *pdev = to_pci_dev(dev);
-
-	if (ccp_pci->msix_count) {
-		while (ccp_pci->msix_count--)
-			free_irq(ccp_pci->msix[ccp_pci->msix_count].vector,
-				 dev);
-		pci_disable_msix(pdev);
-	} else if (ccp->irq) {
-		free_irq(ccp->irq, dev);
-		pci_disable_msi(pdev);
-	}
-	ccp->irq = 0;
-}
-
-static int ccp_find_mmio_area(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	struct pci_dev *pdev = to_pci_dev(dev);
-	resource_size_t io_len;
-	unsigned long io_flags;
-
-	io_flags = pci_resource_flags(pdev, ccp->vdata->bar);
-	io_len = pci_resource_len(pdev, ccp->vdata->bar);
-	if ((io_flags & IORESOURCE_MEM) &&
-	    (io_len >= (ccp->vdata->offset + 0x800)))
-		return ccp->vdata->bar;
-
-	return -EIO;
-}
-
-static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-{
-	struct ccp_device *ccp;
-	struct ccp_pci *ccp_pci;
-	struct device *dev = &pdev->dev;
-	unsigned int bar;
-	int ret;
-
-	ret = -ENOMEM;
-	ccp = ccp_alloc_struct(dev);
-	if (!ccp)
-		goto e_err;
-
-	ccp_pci = devm_kzalloc(dev, sizeof(*ccp_pci), GFP_KERNEL);
-	if (!ccp_pci)
-		goto e_err;
-
-	ccp->dev_specific = ccp_pci;
-	ccp->vdata = (struct ccp_vdata *)id->driver_data;
-	if (!ccp->vdata || !ccp->vdata->version) {
-		ret = -ENODEV;
-		dev_err(dev, "missing driver data\n");
-		goto e_err;
-	}
-	ccp->get_irq = ccp_get_irqs;
-	ccp->free_irq = ccp_free_irqs;
-
-	ret = pci_request_regions(pdev, "ccp");
-	if (ret) {
-		dev_err(dev, "pci_request_regions failed (%d)\n", ret);
-		goto e_err;
-	}
-
-	ret = pci_enable_device(pdev);
-	if (ret) {
-		dev_err(dev, "pci_enable_device failed (%d)\n", ret);
-		goto e_regions;
-	}
-
-	pci_set_master(pdev);
-
-	ret = ccp_find_mmio_area(ccp);
-	if (ret < 0)
-		goto e_device;
-	bar = ret;
-
-	ret = -EIO;
-	ccp->io_map = pci_iomap(pdev, bar, 0);
-	if (!ccp->io_map) {
-		dev_err(dev, "pci_iomap failed\n");
-		goto e_device;
-	}
-	ccp->io_regs = ccp->io_map + ccp->vdata->offset;
-
-	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
-	if (ret) {
-		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
-		if (ret) {
-			dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n",
-				ret);
-			goto e_iomap;
-		}
-	}
-
-	dev_set_drvdata(dev, ccp);
-
-	if (ccp->vdata->setup)
-		ccp->vdata->setup(ccp);
-
-	ret = ccp->vdata->perform->init(ccp);
-	if (ret)
-		goto e_iomap;
-
-	dev_notice(dev, "enabled\n");
-
-	return 0;
-
-e_iomap:
-	pci_iounmap(pdev, ccp->io_map);
-
-e_device:
-	pci_disable_device(pdev);
-
-e_regions:
-	pci_release_regions(pdev);
-
-e_err:
-	dev_notice(dev, "initialization failed\n");
-	return ret;
-}
-
-static void ccp_pci_remove(struct pci_dev *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
-
-	if (!ccp)
-		return;
-
-	ccp->vdata->perform->destroy(ccp);
-
-	pci_iounmap(pdev, ccp->io_map);
-
-	pci_disable_device(pdev);
-
-	pci_release_regions(pdev);
-
-	dev_notice(dev, "disabled\n");
-}
-
-#ifdef CONFIG_PM
-static int ccp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
-{
-	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&ccp->cmd_lock, flags);
-
-	ccp->suspending = 1;
-
-	/* Wake all the queue kthreads to prepare for suspend */
-	for (i = 0; i < ccp->cmd_q_count; i++)
-		wake_up_process(ccp->cmd_q[i].kthread);
-
-	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
-
-	/* Wait for all queue kthreads to say they're done */
-	while (!ccp_queues_suspended(ccp))
-		wait_event_interruptible(ccp->suspend_queue,
-					 ccp_queues_suspended(ccp));
-
-	return 0;
-}
-
-static int ccp_pci_resume(struct pci_dev *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&ccp->cmd_lock, flags);
-
-	ccp->suspending = 0;
-
-	/* Wake up all the kthreads */
-	for (i = 0; i < ccp->cmd_q_count; i++) {
-		ccp->cmd_q[i].suspended = 0;
-		wake_up_process(ccp->cmd_q[i].kthread);
-	}
-
-	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
-
-	return 0;
-}
-#endif
-
-static const struct pci_device_id ccp_pci_table[] = {
-	{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&ccpv3 },
-	{ PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&ccpv5a },
-	{ PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&ccpv5b },
-	/* Last entry must be zero */
-	{ 0, }
-};
-MODULE_DEVICE_TABLE(pci, ccp_pci_table);
-
-static struct pci_driver ccp_pci_driver = {
-	.name = "ccp",
-	.id_table = ccp_pci_table,
-	.probe = ccp_pci_probe,
-	.remove = ccp_pci_remove,
-#ifdef CONFIG_PM
-	.suspend = ccp_pci_suspend,
-	.resume = ccp_pci_resume,
-#endif
-};
-
-int ccp_pci_init(void)
-{
-	return pci_register_driver(&ccp_pci_driver);
-}
-
-void ccp_pci_exit(void)
-{
-	pci_unregister_driver(&ccp_pci_driver);
-}

+ 0 - 293
drivers/crypto/ccp/ccp-platform.c

@@ -1,293 +0,0 @@
-/*
- * AMD Cryptographic Coprocessor (CCP) driver
- *
- * Copyright (C) 2014,2016 Advanced Micro Devices, Inc.
- *
- * Author: Tom Lendacky <thomas.lendacky@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/ioport.h>
-#include <linux/dma-mapping.h>
-#include <linux/kthread.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/ccp.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/acpi.h>
-
-#include "ccp-dev.h"
-
-struct ccp_platform {
-	int coherent;
-};
-
-static const struct acpi_device_id ccp_acpi_match[];
-static const struct of_device_id ccp_of_match[];
-
-static struct ccp_vdata *ccp_get_of_version(struct platform_device *pdev)
-{
-#ifdef CONFIG_OF
-	const struct of_device_id *match;
-
-	match = of_match_node(ccp_of_match, pdev->dev.of_node);
-	if (match && match->data)
-		return (struct ccp_vdata *)match->data;
-#endif
-	return NULL;
-}
-
-static struct ccp_vdata *ccp_get_acpi_version(struct platform_device *pdev)
-{
-#ifdef CONFIG_ACPI
-	const struct acpi_device_id *match;
-
-	match = acpi_match_device(ccp_acpi_match, &pdev->dev);
-	if (match && match->driver_data)
-		return (struct ccp_vdata *)match->driver_data;
-#endif
-	return NULL;
-}
-
-static int ccp_get_irq(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	struct platform_device *pdev = to_platform_device(dev);
-	int ret;
-
-	ret = platform_get_irq(pdev, 0);
-	if (ret < 0)
-		return ret;
-
-	ccp->irq = ret;
-	ret = request_irq(ccp->irq, ccp->vdata->perform->irqhandler, 0,
-			  ccp->name, dev);
-	if (ret) {
-		dev_notice(dev, "unable to allocate IRQ (%d)\n", ret);
-		return ret;
-	}
-
-	return 0;
-}
-
-static int ccp_get_irqs(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	int ret;
-
-	ret = ccp_get_irq(ccp);
-	if (!ret)
-		return 0;
-
-	/* Couldn't get an interrupt */
-	dev_notice(dev, "could not enable interrupts (%d)\n", ret);
-
-	return ret;
-}
-
-static void ccp_free_irqs(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-
-	free_irq(ccp->irq, dev);
-}
-
-static struct resource *ccp_find_mmio_area(struct ccp_device *ccp)
-{
-	struct device *dev = ccp->dev;
-	struct platform_device *pdev = to_platform_device(dev);
-	struct resource *ior;
-
-	ior = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (ior && (resource_size(ior) >= 0x800))
-		return ior;
-
-	return NULL;
-}
-
-static int ccp_platform_probe(struct platform_device *pdev)
-{
-	struct ccp_device *ccp;
-	struct ccp_platform *ccp_platform;
-	struct device *dev = &pdev->dev;
-	enum dev_dma_attr attr;
-	struct resource *ior;
-	int ret;
-
-	ret = -ENOMEM;
-	ccp = ccp_alloc_struct(dev);
-	if (!ccp)
-		goto e_err;
-
-	ccp_platform = devm_kzalloc(dev, sizeof(*ccp_platform), GFP_KERNEL);
-	if (!ccp_platform)
-		goto e_err;
-
-	ccp->dev_specific = ccp_platform;
-	ccp->vdata = pdev->dev.of_node ? ccp_get_of_version(pdev)
-					 : ccp_get_acpi_version(pdev);
-	if (!ccp->vdata || !ccp->vdata->version) {
-		ret = -ENODEV;
-		dev_err(dev, "missing driver data\n");
-		goto e_err;
-	}
-	ccp->get_irq = ccp_get_irqs;
-	ccp->free_irq = ccp_free_irqs;
-
-	ior = ccp_find_mmio_area(ccp);
-	ccp->io_map = devm_ioremap_resource(dev, ior);
-	if (IS_ERR(ccp->io_map)) {
-		ret = PTR_ERR(ccp->io_map);
-		goto e_err;
-	}
-	ccp->io_regs = ccp->io_map;
-
-	attr = device_get_dma_attr(dev);
-	if (attr == DEV_DMA_NOT_SUPPORTED) {
-		dev_err(dev, "DMA is not supported");
-		goto e_err;
-	}
-
-	ccp_platform->coherent = (attr == DEV_DMA_COHERENT);
-	if (ccp_platform->coherent)
-		ccp->axcache = CACHE_WB_NO_ALLOC;
-	else
-		ccp->axcache = CACHE_NONE;
-
-	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
-	if (ret) {
-		dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);
-		goto e_err;
-	}
-
-	dev_set_drvdata(dev, ccp);
-
-	ret = ccp->vdata->perform->init(ccp);
-	if (ret)
-		goto e_err;
-
-	dev_notice(dev, "enabled\n");
-
-	return 0;
-
-e_err:
-	dev_notice(dev, "initialization failed\n");
-	return ret;
-}
-
-static int ccp_platform_remove(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
-
-	ccp->vdata->perform->destroy(ccp);
-
-	dev_notice(dev, "disabled\n");
-
-	return 0;
-}
-
-#ifdef CONFIG_PM
-static int ccp_platform_suspend(struct platform_device *pdev,
-				pm_message_t state)
-{
-	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&ccp->cmd_lock, flags);
-
-	ccp->suspending = 1;
-
-	/* Wake all the queue kthreads to prepare for suspend */
-	for (i = 0; i < ccp->cmd_q_count; i++)
-		wake_up_process(ccp->cmd_q[i].kthread);
-
-	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
-
-	/* Wait for all queue kthreads to say they're done */
-	while (!ccp_queues_suspended(ccp))
-		wait_event_interruptible(ccp->suspend_queue,
-					 ccp_queues_suspended(ccp));
-
-	return 0;
-}
-
-static int ccp_platform_resume(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct ccp_device *ccp = dev_get_drvdata(dev);
-	unsigned long flags;
-	unsigned int i;
-
-	spin_lock_irqsave(&ccp->cmd_lock, flags);
-
-	ccp->suspending = 0;
-
-	/* Wake up all the kthreads */
-	for (i = 0; i < ccp->cmd_q_count; i++) {
-		ccp->cmd_q[i].suspended = 0;
-		wake_up_process(ccp->cmd_q[i].kthread);
-	}
-
-	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
-
-	return 0;
-}
-#endif
-
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id ccp_acpi_match[] = {
-	{ "AMDI0C00", (kernel_ulong_t)&ccpv3 },
-	{ },
-};
-MODULE_DEVICE_TABLE(acpi, ccp_acpi_match);
-#endif
-
-#ifdef CONFIG_OF
-static const struct of_device_id ccp_of_match[] = {
-	{ .compatible = "amd,ccp-seattle-v1a",
-	  .data = (const void *)&ccpv3 },
-	{ },
-};
-MODULE_DEVICE_TABLE(of, ccp_of_match);
-#endif
-
-static struct platform_driver ccp_platform_driver = {
-	.driver = {
-		.name = "ccp",
-#ifdef CONFIG_ACPI
-		.acpi_match_table = ccp_acpi_match,
-#endif
-#ifdef CONFIG_OF
-		.of_match_table = ccp_of_match,
-#endif
-	},
-	.probe = ccp_platform_probe,
-	.remove = ccp_platform_remove,
-#ifdef CONFIG_PM
-	.suspend = ccp_platform_suspend,
-	.resume = ccp_platform_resume,
-#endif
-};
-
-int ccp_platform_init(void)
-{
-	return platform_driver_register(&ccp_platform_driver);
-}
-
-void ccp_platform_exit(void)
-{
-	platform_driver_unregister(&ccp_platform_driver);
-}

+ 277 - 0
drivers/crypto/ccp/sp-dev.c

@@ -0,0 +1,277 @@
+/*
+ * AMD Secure Processor driver
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+#include "sp-dev.h"
+
+MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
+MODULE_AUTHOR("Gary R Hook <gary.hook@amd.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.1.0");
+MODULE_DESCRIPTION("AMD Secure Processor driver");
+
+/* List of SPs, SP count, read-write access lock, and access functions
+ *
+ * Lock structure: get sp_unit_lock for reading whenever we need to
+ * examine the SP list.
+ */
+static DEFINE_RWLOCK(sp_unit_lock);
+static LIST_HEAD(sp_units);
+
+/* Ever-increasing value to produce unique unit numbers */
+static atomic_t sp_ordinal;
+
+static void sp_add_device(struct sp_device *sp)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&sp_unit_lock, flags);
+
+	list_add_tail(&sp->entry, &sp_units);
+
+	write_unlock_irqrestore(&sp_unit_lock, flags);
+}
+
+static void sp_del_device(struct sp_device *sp)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&sp_unit_lock, flags);
+
+	list_del(&sp->entry);
+
+	write_unlock_irqrestore(&sp_unit_lock, flags);
+}
+
+static irqreturn_t sp_irq_handler(int irq, void *data)
+{
+	struct sp_device *sp = data;
+
+	if (sp->ccp_irq_handler)
+		sp->ccp_irq_handler(irq, sp->ccp_irq_data);
+
+	if (sp->psp_irq_handler)
+		sp->psp_irq_handler(irq, sp->psp_irq_data);
+
+	return IRQ_HANDLED;
+}
+
+int sp_request_ccp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data)
+{
+	int ret;
+
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->psp_vdata) {
+		/* Need a common routine to manage all interrupts */
+		sp->ccp_irq_data = data;
+		sp->ccp_irq_handler = handler;
+
+		if (!sp->irq_registered) {
+			ret = request_irq(sp->ccp_irq, sp_irq_handler, 0,
+					  sp->name, sp);
+			if (ret)
+				return ret;
+
+			sp->irq_registered = true;
+		}
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		ret = request_irq(sp->ccp_irq, handler, 0, name, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int sp_request_psp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data)
+{
+	int ret;
+
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->ccp_vdata) {
+		/* Need a common routine to manage all interrupts */
+		sp->psp_irq_data = data;
+		sp->psp_irq_handler = handler;
+
+		if (!sp->irq_registered) {
+			ret = request_irq(sp->psp_irq, sp_irq_handler, 0,
+					  sp->name, sp);
+			if (ret)
+				return ret;
+
+			sp->irq_registered = true;
+		}
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		ret = request_irq(sp->psp_irq, handler, 0, name, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+void sp_free_ccp_irq(struct sp_device *sp, void *data)
+{
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->psp_vdata) {
+		/* Using common routine to manage all interrupts */
+		if (!sp->psp_irq_handler) {
+			/* Nothing else using it, so free it */
+			free_irq(sp->ccp_irq, sp);
+
+			sp->irq_registered = false;
+		}
+
+		sp->ccp_irq_handler = NULL;
+		sp->ccp_irq_data = NULL;
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		free_irq(sp->ccp_irq, data);
+	}
+}
+
+void sp_free_psp_irq(struct sp_device *sp, void *data)
+{
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->ccp_vdata) {
+		/* Using common routine to manage all interrupts */
+		if (!sp->ccp_irq_handler) {
+			/* Nothing else using it, so free it */
+			free_irq(sp->psp_irq, sp);
+
+			sp->irq_registered = false;
+		}
+
+		sp->psp_irq_handler = NULL;
+		sp->psp_irq_data = NULL;
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		free_irq(sp->psp_irq, data);
+	}
+}
+
+/**
+ * sp_alloc_struct - allocate and initialize the sp_device struct
+ *
+ * @dev: device struct of the SP
+ */
+struct sp_device *sp_alloc_struct(struct device *dev)
+{
+	struct sp_device *sp;
+
+	sp = devm_kzalloc(dev, sizeof(*sp), GFP_KERNEL);
+	if (!sp)
+		return NULL;
+
+	sp->dev = dev;
+	sp->ord = atomic_inc_return(&sp_ordinal);
+	snprintf(sp->name, SP_MAX_NAME_LEN, "sp-%u", sp->ord);
+
+	return sp;
+}
+
+int sp_init(struct sp_device *sp)
+{
+	sp_add_device(sp);
+
+	if (sp->dev_vdata->ccp_vdata)
+		ccp_dev_init(sp);
+
+	return 0;
+}
+
+void sp_destroy(struct sp_device *sp)
+{
+	if (sp->dev_vdata->ccp_vdata)
+		ccp_dev_destroy(sp);
+
+	sp_del_device(sp);
+}
+
+#ifdef CONFIG_PM
+int sp_suspend(struct sp_device *sp, pm_message_t state)
+{
+	int ret;
+
+	if (sp->dev_vdata->ccp_vdata) {
+		ret = ccp_dev_suspend(sp, state);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int sp_resume(struct sp_device *sp)
+{
+	int ret;
+
+	if (sp->dev_vdata->ccp_vdata) {
+		ret = ccp_dev_resume(sp);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static int __init sp_mod_init(void)
+{
+#ifdef CONFIG_X86
+	int ret;
+
+	ret = sp_pci_init();
+	if (ret)
+		return ret;
+
+	return 0;
+#endif
+
+#ifdef CONFIG_ARM64
+	int ret;
+
+	ret = sp_platform_init();
+	if (ret)
+		return ret;
+
+	return 0;
+#endif
+
+	return -ENODEV;
+}
+
+static void __exit sp_mod_exit(void)
+{
+#ifdef CONFIG_X86
+	sp_pci_exit();
+#endif
+
+#ifdef CONFIG_ARM64
+	sp_platform_exit();
+#endif
+}
+
+module_init(sp_mod_init);
+module_exit(sp_mod_exit);

+ 133 - 0
drivers/crypto/ccp/sp-dev.h

@@ -0,0 +1,133 @@
+/*
+ * AMD Secure Processor driver
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __SP_DEV_H__
+#define __SP_DEV_H__
+
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/dmapool.h>
+#include <linux/hw_random.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
+
+#define SP_MAX_NAME_LEN		32
+
+#define CACHE_NONE			0x00
+#define CACHE_WB_NO_ALLOC		0xb7
+
+/* Structure to hold CCP device data */
+struct ccp_device;
+struct ccp_vdata {
+	const unsigned int version;
+	const unsigned int dma_chan_attr;
+	void (*setup)(struct ccp_device *);
+	const struct ccp_actions *perform;
+	const unsigned int offset;
+	const unsigned int rsamax;
+};
+/* Structure to hold SP device data */
+struct sp_dev_vdata {
+	const unsigned int bar;
+
+	const struct ccp_vdata *ccp_vdata;
+	void *psp_vdata;
+};
+
+struct sp_device {
+	struct list_head entry;
+
+	struct device *dev;
+
+	struct sp_dev_vdata *dev_vdata;
+	unsigned int ord;
+	char name[SP_MAX_NAME_LEN];
+
+	/* Bus specific device information */
+	void *dev_specific;
+
+	/* I/O area used for device communication. */
+	void __iomem *io_map;
+
+	/* DMA caching attribute support */
+	unsigned int axcache;
+
+	bool irq_registered;
+	bool use_tasklet;
+
+	unsigned int ccp_irq;
+	irq_handler_t ccp_irq_handler;
+	void *ccp_irq_data;
+
+	unsigned int psp_irq;
+	irq_handler_t psp_irq_handler;
+	void *psp_irq_data;
+
+	void *ccp_data;
+	void *psp_data;
+};
+
+int sp_pci_init(void);
+void sp_pci_exit(void);
+
+int sp_platform_init(void);
+void sp_platform_exit(void);
+
+struct sp_device *sp_alloc_struct(struct device *dev);
+
+int sp_init(struct sp_device *sp);
+void sp_destroy(struct sp_device *sp);
+struct sp_device *sp_get_master(void);
+
+int sp_suspend(struct sp_device *sp, pm_message_t state);
+int sp_resume(struct sp_device *sp);
+int sp_request_ccp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data);
+void sp_free_ccp_irq(struct sp_device *sp, void *data);
+int sp_request_psp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data);
+void sp_free_psp_irq(struct sp_device *sp, void *data);
+
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+
+int ccp_dev_init(struct sp_device *sp);
+void ccp_dev_destroy(struct sp_device *sp);
+
+int ccp_dev_suspend(struct sp_device *sp, pm_message_t state);
+int ccp_dev_resume(struct sp_device *sp);
+
+#else	/* !CONFIG_CRYPTO_DEV_SP_CCP */
+
+static inline int ccp_dev_init(struct sp_device *sp)
+{
+	return 0;
+}
+static inline void ccp_dev_destroy(struct sp_device *sp) { }
+
+static inline int ccp_dev_suspend(struct sp_device *sp, pm_message_t state)
+{
+	return 0;
+}
+static inline int ccp_dev_resume(struct sp_device *sp)
+{
+	return 0;
+}
+#endif	/* CONFIG_CRYPTO_DEV_SP_CCP */
+
+#endif

+ 276 - 0
drivers/crypto/ccp/sp-pci.c

@@ -0,0 +1,276 @@
+/*
+ * AMD Secure Processor device driver
+ *
+ * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/dma-mapping.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+#define MSIX_VECTORS			2
+
+struct sp_pci {
+	int msix_count;
+	struct msix_entry msix_entry[MSIX_VECTORS];
+};
+
+static int sp_get_msix_irqs(struct sp_device *sp)
+{
+	struct sp_pci *sp_pci = sp->dev_specific;
+	struct device *dev = sp->dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int v, ret;
+
+	for (v = 0; v < ARRAY_SIZE(sp_pci->msix_entry); v++)
+		sp_pci->msix_entry[v].entry = v;
+
+	ret = pci_enable_msix_range(pdev, sp_pci->msix_entry, 1, v);
+	if (ret < 0)
+		return ret;
+
+	sp_pci->msix_count = ret;
+	sp->use_tasklet = true;
+
+	sp->psp_irq = sp_pci->msix_entry[0].vector;
+	sp->ccp_irq = (sp_pci->msix_count > 1) ? sp_pci->msix_entry[1].vector
+					       : sp_pci->msix_entry[0].vector;
+	return 0;
+}
+
+static int sp_get_msi_irq(struct sp_device *sp)
+{
+	struct device *dev = sp->dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret;
+
+	ret = pci_enable_msi(pdev);
+	if (ret)
+		return ret;
+
+	sp->ccp_irq = pdev->irq;
+	sp->psp_irq = pdev->irq;
+
+	return 0;
+}
+
+static int sp_get_irqs(struct sp_device *sp)
+{
+	struct device *dev = sp->dev;
+	int ret;
+
+	ret = sp_get_msix_irqs(sp);
+	if (!ret)
+		return 0;
+
+	/* Couldn't get MSI-X vectors, try MSI */
+	dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
+	ret = sp_get_msi_irq(sp);
+	if (!ret)
+		return 0;
+
+	/* Couldn't get MSI interrupt */
+	dev_notice(dev, "could not enable MSI (%d)\n", ret);
+
+	return ret;
+}
+
+static void sp_free_irqs(struct sp_device *sp)
+{
+	struct sp_pci *sp_pci = sp->dev_specific;
+	struct device *dev = sp->dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (sp_pci->msix_count)
+		pci_disable_msix(pdev);
+	else if (sp->psp_irq)
+		pci_disable_msi(pdev);
+
+	sp->ccp_irq = 0;
+	sp->psp_irq = 0;
+}
+
+static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct sp_device *sp;
+	struct sp_pci *sp_pci;
+	struct device *dev = &pdev->dev;
+	void __iomem * const *iomap_table;
+	int bar_mask;
+	int ret;
+
+	ret = -ENOMEM;
+	sp = sp_alloc_struct(dev);
+	if (!sp)
+		goto e_err;
+
+	sp_pci = devm_kzalloc(dev, sizeof(*sp_pci), GFP_KERNEL);
+	if (!sp_pci)
+		goto e_err;
+
+	sp->dev_specific = sp_pci;
+	sp->dev_vdata = (struct sp_dev_vdata *)id->driver_data;
+	if (!sp->dev_vdata) {
+		ret = -ENODEV;
+		dev_err(dev, "missing driver data\n");
+		goto e_err;
+	}
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		dev_err(dev, "pcim_enable_device failed (%d)\n", ret);
+		goto e_err;
+	}
+
+	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
+	ret = pcim_iomap_regions(pdev, bar_mask, "ccp");
+	if (ret) {
+		dev_err(dev, "pcim_iomap_regions failed (%d)\n", ret);
+		goto e_err;
+	}
+
+	iomap_table = pcim_iomap_table(pdev);
+	if (!iomap_table) {
+		dev_err(dev, "pcim_iomap_table failed\n");
+		ret = -ENOMEM;
+		goto e_err;
+	}
+
+	sp->io_map = iomap_table[sp->dev_vdata->bar];
+	if (!sp->io_map) {
+		dev_err(dev, "ioremap failed\n");
+		ret = -ENOMEM;
+		goto e_err;
+	}
+
+	ret = sp_get_irqs(sp);
+	if (ret)
+		goto e_err;
+
+	pci_set_master(pdev);
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+	if (ret) {
+		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+		if (ret) {
+			dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n",
+				ret);
+			goto e_err;
+		}
+	}
+
+	dev_set_drvdata(dev, sp);
+
+	ret = sp_init(sp);
+	if (ret)
+		goto e_err;
+
+	dev_notice(dev, "enabled\n");
+
+	return 0;
+
+e_err:
+	dev_notice(dev, "initialization failed\n");
+	return ret;
+}
+
+static void sp_pci_remove(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	if (!sp)
+		return;
+
+	sp_destroy(sp);
+
+	sp_free_irqs(sp);
+
+	dev_notice(dev, "disabled\n");
+}
+
+#ifdef CONFIG_PM
+static int sp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	return sp_suspend(sp, state);
+}
+
+static int sp_pci_resume(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	return sp_resume(sp);
+}
+#endif
+
+static const struct sp_dev_vdata dev_vdata[] = {
+	{
+		.bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv3,
+#endif
+	},
+	{
+		.bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv5a,
+#endif
+	},
+	{
+		.bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv5b,
+#endif
+	},
+};
+static const struct pci_device_id sp_pci_table[] = {
+	{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&dev_vdata[0] },
+	{ PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&dev_vdata[1] },
+	{ PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&dev_vdata[2] },
+	/* Last entry must be zero */
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, sp_pci_table);
+
+static struct pci_driver sp_pci_driver = {
+	.name = "ccp",
+	.id_table = sp_pci_table,
+	.probe = sp_pci_probe,
+	.remove = sp_pci_remove,
+#ifdef CONFIG_PM
+	.suspend = sp_pci_suspend,
+	.resume = sp_pci_resume,
+#endif
+};
+
+int sp_pci_init(void)
+{
+	return pci_register_driver(&sp_pci_driver);
+}
+
+void sp_pci_exit(void)
+{
+	pci_unregister_driver(&sp_pci_driver);
+}

+ 256 - 0
drivers/crypto/ccp/sp-platform.c

@@ -0,0 +1,256 @@
+/*
+ * AMD Secure Processor device driver
+ *
+ * Copyright (C) 2014,2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/ioport.h>
+#include <linux/dma-mapping.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/ccp.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/acpi.h>
+
+#include "ccp-dev.h"
+
+struct sp_platform {
+	int coherent;
+	unsigned int irq_count;
+};
+
+static const struct acpi_device_id sp_acpi_match[];
+static const struct of_device_id sp_of_match[];
+
+static struct sp_dev_vdata *sp_get_of_version(struct platform_device *pdev)
+{
+#ifdef CONFIG_OF
+	const struct of_device_id *match;
+
+	match = of_match_node(sp_of_match, pdev->dev.of_node);
+	if (match && match->data)
+		return (struct sp_dev_vdata *)match->data;
+#endif
+	return NULL;
+}
+
+static struct sp_dev_vdata *sp_get_acpi_version(struct platform_device *pdev)
+{
+#ifdef CONFIG_ACPI
+	const struct acpi_device_id *match;
+
+	match = acpi_match_device(sp_acpi_match, &pdev->dev);
+	if (match && match->driver_data)
+		return (struct sp_dev_vdata *)match->driver_data;
+#endif
+	return NULL;
+}
+
+static int sp_get_irqs(struct sp_device *sp)
+{
+	struct sp_platform *sp_platform = sp->dev_specific;
+	struct device *dev = sp->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	unsigned int i, count;
+	int ret;
+
+	for (i = 0, count = 0; i < pdev->num_resources; i++) {
+		struct resource *res = &pdev->resource[i];
+
+		if (resource_type(res) == IORESOURCE_IRQ)
+			count++;
+	}
+
+	sp_platform->irq_count = count;
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_notice(dev, "unable to get IRQ (%d)\n", ret);
+		return ret;
+	}
+
+	sp->psp_irq = ret;
+	if (count == 1) {
+		sp->ccp_irq = ret;
+	} else {
+		ret = platform_get_irq(pdev, 1);
+		if (ret < 0) {
+			dev_notice(dev, "unable to get IRQ (%d)\n", ret);
+			return ret;
+		}
+
+		sp->ccp_irq = ret;
+	}
+
+	return 0;
+}
+
+static int sp_platform_probe(struct platform_device *pdev)
+{
+	struct sp_device *sp;
+	struct sp_platform *sp_platform;
+	struct device *dev = &pdev->dev;
+	enum dev_dma_attr attr;
+	struct resource *ior;
+	int ret;
+
+	ret = -ENOMEM;
+	sp = sp_alloc_struct(dev);
+	if (!sp)
+		goto e_err;
+
+	sp_platform = devm_kzalloc(dev, sizeof(*sp_platform), GFP_KERNEL);
+	if (!sp_platform)
+		goto e_err;
+
+	sp->dev_specific = sp_platform;
+	sp->dev_vdata = pdev->dev.of_node ? sp_get_of_version(pdev)
+					 : sp_get_acpi_version(pdev);
+	if (!sp->dev_vdata) {
+		ret = -ENODEV;
+		dev_err(dev, "missing driver data\n");
+		goto e_err;
+	}
+
+	ior = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	sp->io_map = devm_ioremap_resource(dev, ior);
+	if (IS_ERR(sp->io_map)) {
+		ret = PTR_ERR(sp->io_map);
+		goto e_err;
+	}
+
+	attr = device_get_dma_attr(dev);
+	if (attr == DEV_DMA_NOT_SUPPORTED) {
+		dev_err(dev, "DMA is not supported");
+		goto e_err;
+	}
+
+	sp_platform->coherent = (attr == DEV_DMA_COHERENT);
+	if (sp_platform->coherent)
+		sp->axcache = CACHE_WB_NO_ALLOC;
+	else
+		sp->axcache = CACHE_NONE;
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+	if (ret) {
+		dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);
+		goto e_err;
+	}
+
+	ret = sp_get_irqs(sp);
+	if (ret)
+		goto e_err;
+
+	dev_set_drvdata(dev, sp);
+
+	ret = sp_init(sp);
+	if (ret)
+		goto e_err;
+
+	dev_notice(dev, "enabled\n");
+
+	return 0;
+
+e_err:
+	dev_notice(dev, "initialization failed\n");
+	return ret;
+}
+
+static int sp_platform_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	sp_destroy(sp);
+
+	dev_notice(dev, "disabled\n");
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int sp_platform_suspend(struct platform_device *pdev,
+				pm_message_t state)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	return sp_suspend(sp, state);
+}
+
+static int sp_platform_resume(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	return sp_resume(sp);
+}
+#endif
+
+static const struct sp_dev_vdata dev_vdata[] = {
+	{
+		.bar = 0,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv3_platform,
+#endif
+	},
+};
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id sp_acpi_match[] = {
+	{ "AMDI0C00", (kernel_ulong_t)&dev_vdata[0] },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, sp_acpi_match);
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id sp_of_match[] = {
+	{ .compatible = "amd,ccp-seattle-v1a",
+	  .data = (const void *)&dev_vdata[0] },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, sp_of_match);
+#endif
+
+static struct platform_driver sp_platform_driver = {
+	.driver = {
+		.name = "ccp",
+#ifdef CONFIG_ACPI
+		.acpi_match_table = sp_acpi_match,
+#endif
+#ifdef CONFIG_OF
+		.of_match_table = sp_of_match,
+#endif
+	},
+	.probe = sp_platform_probe,
+	.remove = sp_platform_remove,
+#ifdef CONFIG_PM
+	.suspend = sp_platform_suspend,
+	.resume = sp_platform_resume,
+#endif
+};
+
+int sp_platform_init(void)
+{
+	return platform_driver_register(&sp_platform_driver);
+}
+
+void sp_platform_exit(void)
+{
+	platform_driver_unregister(&sp_platform_driver);
+}

Some files were not shown because too many files changed in this diff