Browse Source

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "Algorithms:
   - add private key generation to ecdh

  Drivers:
   - add generic gcm(aes) to aesni-intel
   - add SafeXcel EIP197 crypto engine driver
   - add ecb(aes), cfb(aes) and ecb(des3_ede) to cavium
   - add support for CNN55XX adapters in cavium
   - add ctr mode to chcr
   - add support for gcm(aes) to omap"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (140 commits)
  crypto: testmgr - Reenable sha1/aes in FIPS mode
  crypto: ccp - Release locks before returning
  crypto: cavium/nitrox - dma_mapping_error() returns bool
  crypto: doc - fix typo in docs
  Documentation/bindings: Document the SafeXel cryptographic engine driver
  crypto: caam - fix gfp allocation flags (part II)
  crypto: caam - fix gfp allocation flags (part I)
  crypto: drbg - Fixes panic in wait_for_completion call
  crypto: caam - make of_device_ids const.
  crypto: vmx - remove unnecessary check
  crypto: n2 - make of_device_ids const
  crypto: inside-secure - use the base_end pointer in ring rollback
  crypto: inside-secure - increase the batch size
  crypto: inside-secure - only dequeue when needed
  crypto: inside-secure - get the backlog before dequeueing the request
  crypto: inside-secure - stop requeueing failed requests
  crypto: inside-secure - use one queue per hw ring
  crypto: inside-secure - update the context and request later
  crypto: inside-secure - align the cipher and hash send functions
  crypto: inside-secure - optimize DSE bufferability control
  ...
Linus Torvalds 8 years ago
parent
commit
8ad06e56dc
100 changed files with 12138 additions and 1064 deletions
  1. 28 10
      Documentation/crypto/api-samples.rst
  2. 1 1
      Documentation/crypto/userspace-if.rst
  3. 29 0
      Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt
  4. 3 5
      Documentation/devicetree/bindings/crypto/mediatek-crypto.txt
  5. 3 1
      Documentation/devicetree/bindings/rng/mtk-rng.txt
  6. 7 0
      Documentation/devicetree/bindings/rng/timeriomem_rng.txt
  7. 18 0
      MAINTAINERS
  8. 2 4
      arch/arm/crypto/aes-ce-glue.c
  9. 6 0
      arch/arm/crypto/crc32-ce-glue.c
  10. 2 4
      arch/arm/crypto/ghash-ce-glue.c
  11. 2 3
      arch/arm/crypto/sha1-ce-glue.c
  12. 2 3
      arch/arm/crypto/sha2-ce-glue.c
  13. 4 2
      arch/arm64/crypto/sha1-ce-core.S
  14. 3 8
      arch/arm64/crypto/sha1-ce-glue.c
  15. 4 2
      arch/arm64/crypto/sha2-ce-core.S
  16. 5 8
      arch/arm64/crypto/sha2-ce-glue.c
  17. 22 25
      arch/x86/crypto/aes-x86_64-asm_64.S
  18. 180 51
      arch/x86/crypto/aesni-intel_asm.S
  19. 201 82
      arch/x86/crypto/aesni-intel_avx-x86_64.S
  20. 158 50
      arch/x86/crypto/aesni-intel_glue.c
  21. 0 3
      arch/x86/crypto/glue_helper.c
  22. 4 3
      arch/x86/crypto/sha512-mb/sha512_mb.c
  23. 1 0
      crypto/Kconfig
  24. 5 4
      crypto/Makefile
  25. 1 1
      crypto/aes_ti.c
  26. 1 1
      crypto/algapi.c
  27. 13 10
      crypto/crypto_engine.c
  28. 6 3
      crypto/dh.c
  29. 2 2
      crypto/dh_helper.c
  30. 1 0
      crypto/drbg.c
  31. 70 18
      crypto/ecc.c
  32. 25 16
      crypto/ecc.h
  33. 15 14
      crypto/ecdh.c
  34. 2 2
      crypto/ecdh_helper.c
  35. 3 2
      crypto/hmac.c
  36. 0 5
      crypto/rng.c
  37. 3 9
      crypto/rsa-pkcs1pad.c
  38. 2 2
      crypto/rsa.c
  39. 0 4
      crypto/tcrypt.c
  40. 104 24
      crypto/testmgr.c
  41. 143 0
      crypto/testmgr.h
  42. 42 0
      drivers/char/hw_random/mtk-rng.c
  43. 9 2
      drivers/char/hw_random/omap3-rom-rng.c
  44. 7 0
      drivers/char/hw_random/timeriomem-rng.c
  45. 30 0
      drivers/crypto/Kconfig
  46. 5 1
      drivers/crypto/Makefile
  47. 1 0
      drivers/crypto/amcc/crypto4xx_core.c
  48. 4 3
      drivers/crypto/bcm/cipher.c
  49. 4 6
      drivers/crypto/caam/caamalg.c
  50. 4 6
      drivers/crypto/caam/caamalg_qi.c
  51. 16 16
      drivers/crypto/caam/caamhash.c
  52. 446 26
      drivers/crypto/caam/caampkc.c
  53. 58 0
      drivers/crypto/caam/caampkc.h
  54. 1 1
      drivers/crypto/caam/jr.c
  55. 62 0
      drivers/crypto/caam/pdb.h
  56. 36 0
      drivers/crypto/caam/pkc_desc.c
  57. 157 77
      drivers/crypto/cavium/cpt/cptvf_algs.c
  58. 7 0
      drivers/crypto/cavium/cpt/cptvf_algs.h
  59. 1 1
      drivers/crypto/cavium/cpt/cptvf_main.c
  60. 21 0
      drivers/crypto/cavium/nitrox/Kconfig
  61. 8 0
      drivers/crypto/cavium/nitrox/Makefile
  62. 457 0
      drivers/crypto/cavium/nitrox/nitrox_algs.c
  63. 42 0
      drivers/crypto/cavium/nitrox/nitrox_common.h
  64. 1084 0
      drivers/crypto/cavium/nitrox/nitrox_csr.h
  65. 179 0
      drivers/crypto/cavium/nitrox/nitrox_dev.h
  66. 401 0
      drivers/crypto/cavium/nitrox/nitrox_hal.c
  67. 467 0
      drivers/crypto/cavium/nitrox/nitrox_isr.c
  68. 210 0
      drivers/crypto/cavium/nitrox/nitrox_lib.c
  69. 640 0
      drivers/crypto/cavium/nitrox/nitrox_main.c
  70. 445 0
      drivers/crypto/cavium/nitrox/nitrox_req.h
  71. 735 0
      drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
  72. 2 1
      drivers/crypto/ccp/Makefile
  73. 3 2
      drivers/crypto/ccp/ccp-crypto-sha.c
  74. 344 0
      drivers/crypto/ccp/ccp-debugfs.c
  75. 27 1
      drivers/crypto/ccp/ccp-dev-v5.c
  76. 2 1
      drivers/crypto/ccp/ccp-dev.c
  77. 20 0
      drivers/crypto/ccp/ccp-dev.h
  78. 2 2
      drivers/crypto/ccp/ccp-platform.c
  79. 874 222
      drivers/crypto/chelsio/chcr_algo.c
  80. 27 3
      drivers/crypto/chelsio/chcr_algo.h
  81. 39 17
      drivers/crypto/chelsio/chcr_core.c
  82. 4 1
      drivers/crypto/chelsio/chcr_core.h
  83. 22 3
      drivers/crypto/chelsio/chcr_crypto.h
  84. 10 2
      drivers/crypto/img-hash.c
  85. 2 0
      drivers/crypto/inside-secure/Makefile
  86. 926 0
      drivers/crypto/inside-secure/safexcel.c
  87. 574 0
      drivers/crypto/inside-secure/safexcel.h
  88. 561 0
      drivers/crypto/inside-secure/safexcel_cipher.c
  89. 1052 0
      drivers/crypto/inside-secure/safexcel_hash.c
  90. 157 0
      drivers/crypto/inside-secure/safexcel_ring.c
  91. 1 2
      drivers/crypto/ixp4xx_crypto.c
  92. 3 2
      drivers/crypto/marvell/hash.c
  93. 1 10
      drivers/crypto/mediatek/mtk-platform.c
  94. 0 2
      drivers/crypto/mediatek/mtk-platform.h
  95. 3 2
      drivers/crypto/mediatek/mtk-sha.c
  96. 3 2
      drivers/crypto/mv_cesa.c
  97. 2 2
      drivers/crypto/n2_core.c
  98. 408 0
      drivers/crypto/omap-aes-gcm.c
  99. 195 266
      drivers/crypto/omap-aes.c
  100. 214 0
      drivers/crypto/omap-aes.h

+ 28 - 10
Documentation/crypto/api-samples.rst

@@ -155,9 +155,9 @@ Code Example For Use of Operational State Memory With SHASH
         char ctx[];
         char ctx[];
     };
     };
 
 
-    static struct sdesc init_sdesc(struct crypto_shash *alg)
+    static struct sdesc *init_sdesc(struct crypto_shash *alg)
     {
     {
-        struct sdesc sdesc;
+        struct sdesc *sdesc;
         int size;
         int size;
 
 
         size = sizeof(struct shash_desc) + crypto_shash_descsize(alg);
         size = sizeof(struct shash_desc) + crypto_shash_descsize(alg);
@@ -169,15 +169,16 @@ Code Example For Use of Operational State Memory With SHASH
         return sdesc;
         return sdesc;
     }
     }
 
 
-    static int calc_hash(struct crypto_shashalg,
-                 const unsigned chardata, unsigned int datalen,
-                 unsigned chardigest) {
-        struct sdesc sdesc;
+    static int calc_hash(struct crypto_shash *alg,
+                 const unsigned char *data, unsigned int datalen,
+                 unsigned char *digest)
+    {
+        struct sdesc *sdesc;
         int ret;
         int ret;
 
 
         sdesc = init_sdesc(alg);
         sdesc = init_sdesc(alg);
         if (IS_ERR(sdesc)) {
         if (IS_ERR(sdesc)) {
-            pr_info("trusted_key: can't alloc %s\n", hash_alg);
+            pr_info("can't alloc sdesc\n");
             return PTR_ERR(sdesc);
             return PTR_ERR(sdesc);
         }
         }
 
 
@@ -186,6 +187,23 @@ Code Example For Use of Operational State Memory With SHASH
         return ret;
         return ret;
     }
     }
 
 
+    static int test_hash(const unsigned char *data, unsigned int datalen,
+                 unsigned char *digest)
+    {
+        struct crypto_shash *alg;
+        char *hash_alg_name = "sha1-padlock-nano";
+        int ret;
+
+        alg = crypto_alloc_shash(hash_alg_name, CRYPTO_ALG_TYPE_SHASH, 0);
+        if (IS_ERR(alg)) {
+                pr_info("can't alloc alg %s\n", hash_alg_name);
+                return PTR_ERR(alg);
+        }
+        ret = calc_hash(alg, data, datalen, digest);
+        crypto_free_shash(alg);
+        return ret;
+    }
+
 
 
 Code Example For Random Number Generator Usage
 Code Example For Random Number Generator Usage
 ----------------------------------------------
 ----------------------------------------------
@@ -195,8 +213,8 @@ Code Example For Random Number Generator Usage
 
 
     static int get_random_numbers(u8 *buf, unsigned int len)
     static int get_random_numbers(u8 *buf, unsigned int len)
     {
     {
-        struct crypto_rngrng = NULL;
-        chardrbg = "drbg_nopr_sha256"; /* Hash DRBG with SHA-256, no PR */
+        struct crypto_rng *rng = NULL;
+        char *drbg = "drbg_nopr_sha256"; /* Hash DRBG with SHA-256, no PR */
         int ret;
         int ret;
 
 
         if (!buf || !len) {
         if (!buf || !len) {
@@ -207,7 +225,7 @@ Code Example For Random Number Generator Usage
         rng = crypto_alloc_rng(drbg, 0, 0);
         rng = crypto_alloc_rng(drbg, 0, 0);
         if (IS_ERR(rng)) {
         if (IS_ERR(rng)) {
             pr_debug("could not allocate RNG handle for %s\n", drbg);
             pr_debug("could not allocate RNG handle for %s\n", drbg);
-            return -PTR_ERR(rng);
+            return PTR_ERR(rng);
         }
         }
 
 
         ret = crypto_rng_get_bytes(rng, buf, len);
         ret = crypto_rng_get_bytes(rng, buf, len);

+ 1 - 1
Documentation/crypto/userspace-if.rst

@@ -327,7 +327,7 @@ boundary. Non-aligned data can be used as well, but may require more
 operations of the kernel which would defeat the speed gains obtained
 operations of the kernel which would defeat the speed gains obtained
 from the zero-copy interface.
 from the zero-copy interface.
 
 
-The system-interent limit for the size of one zero-copy operation is 16
+The system-inherent limit for the size of one zero-copy operation is 16
 pages. If more data is to be sent to AF_ALG, user space must slice the
 pages. If more data is to be sent to AF_ALG, user space must slice the
 input into segments with a maximum size of 16 pages.
 input into segments with a maximum size of 16 pages.
 
 

+ 29 - 0
Documentation/devicetree/bindings/crypto/inside-secure-safexcel.txt

@@ -0,0 +1,29 @@
+Inside Secure SafeXcel cryptographic engine
+
+Required properties:
+- compatible: Should be "inside-secure,safexcel-eip197".
+- reg: Base physical address of the engine and length of memory mapped region.
+- interrupts: Interrupt numbers for the rings and engine.
+- interrupt-names: Should be "ring0", "ring1", "ring2", "ring3", "eip", "mem".
+
+Optional properties:
+- clocks: Reference to the crypto engine clock.
+- dma-mask: The address mask limitation. Defaults to 64.
+
+Example:
+
+	crypto: crypto@800000 {
+		compatible = "inside-secure,safexcel-eip197";
+		reg = <0x800000 0x200000>;
+		interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "mem", "ring0", "ring1", "ring2", "ring3",
+				  "eip";
+		clocks = <&cpm_syscon0 1 26>;
+		dma-mask = <0xff 0xffffffff>;
+		status = "disabled";
+	};

+ 3 - 5
Documentation/devicetree/bindings/crypto/mediatek-crypto.txt

@@ -6,8 +6,7 @@ Required properties:
 - interrupts: Should contain the five crypto engines interrupts in numeric
 - interrupts: Should contain the five crypto engines interrupts in numeric
 	order. These are global system and four descriptor rings.
 	order. These are global system and four descriptor rings.
 - clocks: the clock used by the core
 - clocks: the clock used by the core
-- clock-names: the names of the clock listed in the clocks property. These are
-	"ethif", "cryp"
+- clock-names: Must contain "cryp".
 - power-domains: Must contain a reference to the PM domain.
 - power-domains: Must contain a reference to the PM domain.
 
 
 
 
@@ -20,8 +19,7 @@ Example:
 			     <GIC_SPI 84 IRQ_TYPE_LEVEL_LOW>,
 			     <GIC_SPI 84 IRQ_TYPE_LEVEL_LOW>,
 			     <GIC_SPI 91 IRQ_TYPE_LEVEL_LOW>,
 			     <GIC_SPI 91 IRQ_TYPE_LEVEL_LOW>,
 			     <GIC_SPI 97 IRQ_TYPE_LEVEL_LOW>;
 			     <GIC_SPI 97 IRQ_TYPE_LEVEL_LOW>;
-		clocks = <&topckgen CLK_TOP_ETHIF_SEL>,
-			 <&ethsys CLK_ETHSYS_CRYPTO>;
-		clock-names = "ethif","cryp";
+		clocks = <&ethsys CLK_ETHSYS_CRYPTO>;
+		clock-names = "cryp";
 		power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
 		power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
 	};
 	};

+ 3 - 1
Documentation/devicetree/bindings/rng/mtk-rng.txt

@@ -2,7 +2,9 @@ Device-Tree bindings for Mediatek random number generator
 found in Mediatek SoC family
 found in Mediatek SoC family
 
 
 Required properties:
 Required properties:
-- compatible	    : Should be "mediatek,mt7623-rng"
+- compatible	    : Should be
+			"mediatek,mt7622-rng", 	"mediatek,mt7623-rng" : for MT7622
+			"mediatek,mt7623-rng" : for MT7623
 - clocks	    : list of clock specifiers, corresponding to
 - clocks	    : list of clock specifiers, corresponding to
 		      entries in clock-names property;
 		      entries in clock-names property;
 - clock-names	    : Should contain "rng" entries;
 - clock-names	    : Should contain "rng" entries;

+ 7 - 0
Documentation/devicetree/bindings/rng/timeriomem_rng.txt

@@ -5,6 +5,13 @@ Required properties:
 - reg : base address to sample from
 - reg : base address to sample from
 - period : wait time in microseconds to use between samples
 - period : wait time in microseconds to use between samples
 
 
+Optional properties:
+- quality : estimated number of bits of true entropy per 1024 bits read from the
+            rng.  Defaults to zero which causes the kernel's default quality to
+            be used instead.  Note that the default quality is usually zero
+            which disables using this rng to automatically fill the kernel's
+            entropy pool.
+
 N.B. currently 'reg' must be four bytes wide and aligned
 N.B. currently 'reg' must be four bytes wide and aligned
 
 
 Example:
 Example:

+ 18 - 0
MAINTAINERS

@@ -3746,6 +3746,13 @@ S:	Supported
 F:	drivers/infiniband/hw/cxgb4/
 F:	drivers/infiniband/hw/cxgb4/
 F:	include/uapi/rdma/cxgb4-abi.h
 F:	include/uapi/rdma/cxgb4-abi.h
 
 
+CXGB4 CRYPTO DRIVER (chcr)
+M:	Harsh Jain <harsh@chelsio.com>
+L:	linux-crypto@vger.kernel.org
+W:	http://www.chelsio.com
+S:	Supported
+F:	drivers/crypto/chelsio
+
 CXGB4VF ETHERNET DRIVER (CXGB4VF)
 CXGB4VF ETHERNET DRIVER (CXGB4VF)
 M:	Casey Leedom <leedom@chelsio.com>
 M:	Casey Leedom <leedom@chelsio.com>
 L:	netdev@vger.kernel.org
 L:	netdev@vger.kernel.org
@@ -6647,6 +6654,12 @@ F:	Documentation/input/multi-touch-protocol.rst
 F:	drivers/input/input-mt.c
 F:	drivers/input/input-mt.c
 K:	\b(ABS|SYN)_MT_
 K:	\b(ABS|SYN)_MT_
 
 
+INSIDE SECURE CRYPTO DRIVER
+M:	Antoine Tenart <antoine.tenart@free-electrons.com>
+F:	drivers/crypto/inside-secure/
+S:	Maintained
+L:	linux-crypto@vger.kernel.org
+
 INTEL ASoC BDW/HSW DRIVERS
 INTEL ASoC BDW/HSW DRIVERS
 M:	Jie Yang <yang.jie@linux.intel.com>
 M:	Jie Yang <yang.jie@linux.intel.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -8306,6 +8319,11 @@ L:	linux-wireless@vger.kernel.org
 S:	Maintained
 S:	Maintained
 F:	drivers/net/wireless/mediatek/mt7601u/
 F:	drivers/net/wireless/mediatek/mt7601u/
 
 
+MEDIATEK RANDOM NUMBER GENERATOR SUPPORT
+M:      Sean Wang <sean.wang@mediatek.com>
+S:      Maintained
+F:      drivers/char/hw_random/mtk-rng.c
+
 MEGACHIPS STDPXXXX-GE-B850V3-FW LVDS/DP++ BRIDGES
 MEGACHIPS STDPXXXX-GE-B850V3-FW LVDS/DP++ BRIDGES
 M:	Peter Senna Tschudin <peter.senna@collabora.com>
 M:	Peter Senna Tschudin <peter.senna@collabora.com>
 M:	Martin Donnelly <martin.donnelly@ge.com>
 M:	Martin Donnelly <martin.donnelly@ge.com>

+ 2 - 4
arch/arm/crypto/aes-ce-glue.c

@@ -14,6 +14,7 @@
 #include <crypto/aes.h>
 #include <crypto/aes.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/skcipher.h>
+#include <linux/cpufeature.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <crypto/xts.h>
 #include <crypto/xts.h>
 
 
@@ -425,9 +426,6 @@ static int __init aes_init(void)
 	int err;
 	int err;
 	int i;
 	int i;
 
 
-	if (!(elf_hwcap2 & HWCAP2_AES))
-		return -ENODEV;
-
 	err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
 	err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
 	if (err)
 	if (err)
 		return err;
 		return err;
@@ -451,5 +449,5 @@ unregister_simds:
 	return err;
 	return err;
 }
 }
 
 
-module_init(aes_init);
+module_cpu_feature_match(AES, aes_init);
 module_exit(aes_exit);
 module_exit(aes_exit);

+ 6 - 0
arch/arm/crypto/crc32-ce-glue.c

@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  * published by the Free Software Foundation.
  */
  */
 
 
+#include <linux/cpufeature.h>
 #include <linux/crc32.h>
 #include <linux/crc32.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
@@ -233,6 +234,11 @@ static void __exit crc32_pmull_mod_exit(void)
 				  ARRAY_SIZE(crc32_pmull_algs));
 				  ARRAY_SIZE(crc32_pmull_algs));
 }
 }
 
 
+static const struct cpu_feature crc32_cpu_feature[] = {
+	{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
+};
+MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
+
 module_init(crc32_pmull_mod_init);
 module_init(crc32_pmull_mod_init);
 module_exit(crc32_pmull_mod_exit);
 module_exit(crc32_pmull_mod_exit);
 
 

+ 2 - 4
arch/arm/crypto/ghash-ce-glue.c

@@ -15,6 +15,7 @@
 #include <crypto/cryptd.h>
 #include <crypto/cryptd.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 #include <crypto/gf128mul.h>
 #include <crypto/gf128mul.h>
+#include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 #include <linux/module.h>
 
 
@@ -311,9 +312,6 @@ static int __init ghash_ce_mod_init(void)
 {
 {
 	int err;
 	int err;
 
 
-	if (!(elf_hwcap2 & HWCAP2_PMULL))
-		return -ENODEV;
-
 	err = crypto_register_shash(&ghash_alg);
 	err = crypto_register_shash(&ghash_alg);
 	if (err)
 	if (err)
 		return err;
 		return err;
@@ -334,5 +332,5 @@ static void __exit ghash_ce_mod_exit(void)
 	crypto_unregister_shash(&ghash_alg);
 	crypto_unregister_shash(&ghash_alg);
 }
 }
 
 
-module_init(ghash_ce_mod_init);
+module_cpu_feature_match(PMULL, ghash_ce_mod_init);
 module_exit(ghash_ce_mod_exit);
 module_exit(ghash_ce_mod_exit);

+ 2 - 3
arch/arm/crypto/sha1-ce-glue.c

@@ -11,6 +11,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <crypto/sha.h>
 #include <crypto/sha1_base.h>
 #include <crypto/sha1_base.h>
+#include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 #include <linux/module.h>
 
 
@@ -82,8 +83,6 @@ static struct shash_alg alg = {
 
 
 static int __init sha1_ce_mod_init(void)
 static int __init sha1_ce_mod_init(void)
 {
 {
-	if (!(elf_hwcap2 & HWCAP2_SHA1))
-		return -ENODEV;
 	return crypto_register_shash(&alg);
 	return crypto_register_shash(&alg);
 }
 }
 
 
@@ -92,5 +91,5 @@ static void __exit sha1_ce_mod_fini(void)
 	crypto_unregister_shash(&alg);
 	crypto_unregister_shash(&alg);
 }
 }
 
 
-module_init(sha1_ce_mod_init);
+module_cpu_feature_match(SHA1, sha1_ce_mod_init);
 module_exit(sha1_ce_mod_fini);
 module_exit(sha1_ce_mod_fini);

+ 2 - 3
arch/arm/crypto/sha2-ce-glue.c

@@ -11,6 +11,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <crypto/sha.h>
 #include <crypto/sha256_base.h>
 #include <crypto/sha256_base.h>
+#include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 #include <linux/module.h>
 
 
@@ -100,8 +101,6 @@ static struct shash_alg algs[] = { {
 
 
 static int __init sha2_ce_mod_init(void)
 static int __init sha2_ce_mod_init(void)
 {
 {
-	if (!(elf_hwcap2 & HWCAP2_SHA2))
-		return -ENODEV;
 	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
 	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
 }
 }
 
 
@@ -110,5 +109,5 @@ static void __exit sha2_ce_mod_fini(void)
 	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
 	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
 }
 }
 
 
-module_init(sha2_ce_mod_init);
+module_cpu_feature_match(SHA2, sha2_ce_mod_init);
 module_exit(sha2_ce_mod_fini);
 module_exit(sha2_ce_mod_fini);

+ 4 - 2
arch/arm64/crypto/sha1-ce-core.S

@@ -82,7 +82,8 @@ ENTRY(sha1_ce_transform)
 	ldr		dgb, [x0, #16]
 	ldr		dgb, [x0, #16]
 
 
 	/* load sha1_ce_state::finalize */
 	/* load sha1_ce_state::finalize */
-	ldr		w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
+	ldr_l		w4, sha1_ce_offsetof_finalize, x4
+	ldr		w4, [x0, x4]
 
 
 	/* load input */
 	/* load input */
 0:	ld1		{v8.4s-v11.4s}, [x1], #64
 0:	ld1		{v8.4s-v11.4s}, [x1], #64
@@ -132,7 +133,8 @@ CPU_LE(	rev32		v11.16b, v11.16b	)
 	 * the padding is handled by the C code in that case.
 	 * the padding is handled by the C code in that case.
 	 */
 	 */
 	cbz		x4, 3f
 	cbz		x4, 3f
-	ldr		x4, [x0, #:lo12:sha1_ce_offsetof_count]
+	ldr_l		w4, sha1_ce_offsetof_count, x4
+	ldr		x4, [x0, x4]
 	movi		v9.2d, #0
 	movi		v9.2d, #0
 	mov		x8, #0x80000000
 	mov		x8, #0x80000000
 	movi		v10.2d, #0
 	movi		v10.2d, #0

+ 3 - 8
arch/arm64/crypto/sha1-ce-glue.c

@@ -17,9 +17,6 @@
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 #include <linux/module.h>
 
 
-#define ASM_EXPORT(sym, val) \
-	asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
-
 MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
 MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_LICENSE("GPL v2");
@@ -32,6 +29,9 @@ struct sha1_ce_state {
 asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
 asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
 				  int blocks);
 				  int blocks);
 
 
+const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count);
+const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize);
+
 static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
 static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
 			  unsigned int len)
 			  unsigned int len)
 {
 {
@@ -52,11 +52,6 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 	struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 	bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
 	bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
 
 
-	ASM_EXPORT(sha1_ce_offsetof_count,
-		   offsetof(struct sha1_ce_state, sst.count));
-	ASM_EXPORT(sha1_ce_offsetof_finalize,
-		   offsetof(struct sha1_ce_state, finalize));
-
 	/*
 	/*
 	 * Allow the asm code to perform the finalization if there is no
 	 * Allow the asm code to perform the finalization if there is no
 	 * partial data and the input is a round multiple of the block size.
 	 * partial data and the input is a round multiple of the block size.

+ 4 - 2
arch/arm64/crypto/sha2-ce-core.S

@@ -88,7 +88,8 @@ ENTRY(sha2_ce_transform)
 	ld1		{dgav.4s, dgbv.4s}, [x0]
 	ld1		{dgav.4s, dgbv.4s}, [x0]
 
 
 	/* load sha256_ce_state::finalize */
 	/* load sha256_ce_state::finalize */
-	ldr		w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
+	ldr_l		w4, sha256_ce_offsetof_finalize, x4
+	ldr		w4, [x0, x4]
 
 
 	/* load input */
 	/* load input */
 0:	ld1		{v16.4s-v19.4s}, [x1], #64
 0:	ld1		{v16.4s-v19.4s}, [x1], #64
@@ -136,7 +137,8 @@ CPU_LE(	rev32		v19.16b, v19.16b	)
 	 * the padding is handled by the C code in that case.
 	 * the padding is handled by the C code in that case.
 	 */
 	 */
 	cbz		x4, 3f
 	cbz		x4, 3f
-	ldr		x4, [x0, #:lo12:sha256_ce_offsetof_count]
+	ldr_l		w4, sha256_ce_offsetof_count, x4
+	ldr		x4, [x0, x4]
 	movi		v17.2d, #0
 	movi		v17.2d, #0
 	mov		x8, #0x80000000
 	mov		x8, #0x80000000
 	movi		v18.2d, #0
 	movi		v18.2d, #0

+ 5 - 8
arch/arm64/crypto/sha2-ce-glue.c

@@ -17,9 +17,6 @@
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 #include <linux/module.h>
 
 
-#define ASM_EXPORT(sym, val) \
-	asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
-
 MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
 MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_LICENSE("GPL v2");
@@ -32,6 +29,11 @@ struct sha256_ce_state {
 asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
 asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
 				  int blocks);
 				  int blocks);
 
 
+const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
+					      sst.count);
+const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
+						 finalize);
+
 static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
 static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
 			    unsigned int len)
 			    unsigned int len)
 {
 {
@@ -52,11 +54,6 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 	struct sha256_ce_state *sctx = shash_desc_ctx(desc);
 	bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
 	bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
 
 
-	ASM_EXPORT(sha256_ce_offsetof_count,
-		   offsetof(struct sha256_ce_state, sst.count));
-	ASM_EXPORT(sha256_ce_offsetof_finalize,
-		   offsetof(struct sha256_ce_state, finalize));
-
 	/*
 	/*
 	 * Allow the asm code to perform the finalization if there is no
 	 * Allow the asm code to perform the finalization if there is no
 	 * partial data and the input is a round multiple of the block size.
 	 * partial data and the input is a round multiple of the block size.

+ 22 - 25
arch/x86/crypto/aes-x86_64-asm_64.S

@@ -42,17 +42,15 @@
 #define R5E	%esi
 #define R5E	%esi
 #define R6	%rdi
 #define R6	%rdi
 #define R6E	%edi
 #define R6E	%edi
-#define R7	%rbp
-#define R7E	%ebp
+#define R7	%r9	/* don't use %rbp; it breaks stack traces */
+#define R7E	%r9d
 #define R8	%r8
 #define R8	%r8
-#define R9	%r9
 #define R10	%r10
 #define R10	%r10
 #define R11	%r11
 #define R11	%r11
 
 
-#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
+#define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \
 	ENTRY(FUNC);			\
 	ENTRY(FUNC);			\
 	movq	r1,r2;			\
 	movq	r1,r2;			\
-	movq	r3,r4;			\
 	leaq	KEY+48(r8),r9;		\
 	leaq	KEY+48(r8),r9;		\
 	movq	r10,r11;		\
 	movq	r10,r11;		\
 	movl	(r7),r5 ## E;		\
 	movl	(r7),r5 ## E;		\
@@ -70,9 +68,8 @@
 	je	B192;			\
 	je	B192;			\
 	leaq	32(r9),r9;
 	leaq	32(r9),r9;
 
 
-#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \
+#define epilogue(FUNC,r1,r2,r5,r6,r7,r8,r9) \
 	movq	r1,r2;			\
 	movq	r1,r2;			\
-	movq	r3,r4;			\
 	movl	r5 ## E,(r9);		\
 	movl	r5 ## E,(r9);		\
 	movl	r6 ## E,4(r9);		\
 	movl	r6 ## E,4(r9);		\
 	movl	r7 ## E,8(r9);		\
 	movl	r7 ## E,8(r9);		\
@@ -88,12 +85,12 @@
 	movl	TAB(,r6,4),r6 ## E;	\
 	movl	TAB(,r6,4),r6 ## E;	\
 	roll	$16,r2 ## E;		\
 	roll	$16,r2 ## E;		\
 	shrl	$16,r4 ## E;		\
 	shrl	$16,r4 ## E;		\
-	movzbl	r4 ## H,r7 ## E;	\
-	movzbl	r4 ## L,r4 ## E;	\
+	movzbl	r4 ## L,r7 ## E;	\
+	movzbl	r4 ## H,r4 ## E;	\
 	xorl	OFFSET(r8),ra ## E;	\
 	xorl	OFFSET(r8),ra ## E;	\
 	xorl	OFFSET+4(r8),rb ## E;	\
 	xorl	OFFSET+4(r8),rb ## E;	\
-	xorl	TAB+3072(,r7,4),r5 ## E;\
-	xorl	TAB+2048(,r4,4),r6 ## E;\
+	xorl	TAB+3072(,r4,4),r5 ## E;\
+	xorl	TAB+2048(,r7,4),r6 ## E;\
 	movzbl	r1 ## L,r7 ## E;	\
 	movzbl	r1 ## L,r7 ## E;	\
 	movzbl	r1 ## H,r4 ## E;	\
 	movzbl	r1 ## H,r4 ## E;	\
 	movl	TAB+1024(,r4,4),r4 ## E;\
 	movl	TAB+1024(,r4,4),r4 ## E;\
@@ -101,19 +98,19 @@
 	roll	$16,r1 ## E;		\
 	roll	$16,r1 ## E;		\
 	shrl	$16,r3 ## E;		\
 	shrl	$16,r3 ## E;		\
 	xorl	TAB(,r7,4),r5 ## E;	\
 	xorl	TAB(,r7,4),r5 ## E;	\
-	movzbl	r3 ## H,r7 ## E;	\
-	movzbl	r3 ## L,r3 ## E;	\
-	xorl	TAB+3072(,r7,4),r4 ## E;\
-	xorl	TAB+2048(,r3,4),r5 ## E;\
-	movzbl	r1 ## H,r7 ## E;	\
-	movzbl	r1 ## L,r3 ## E;	\
+	movzbl	r3 ## L,r7 ## E;	\
+	movzbl	r3 ## H,r3 ## E;	\
+	xorl	TAB+3072(,r3,4),r4 ## E;\
+	xorl	TAB+2048(,r7,4),r5 ## E;\
+	movzbl	r1 ## L,r7 ## E;	\
+	movzbl	r1 ## H,r3 ## E;	\
 	shrl	$16,r1 ## E;		\
 	shrl	$16,r1 ## E;		\
-	xorl	TAB+3072(,r7,4),r6 ## E;\
-	movl	TAB+2048(,r3,4),r3 ## E;\
-	movzbl	r1 ## H,r7 ## E;	\
-	movzbl	r1 ## L,r1 ## E;	\
-	xorl	TAB+1024(,r7,4),r6 ## E;\
-	xorl	TAB(,r1,4),r3 ## E;	\
+	xorl	TAB+3072(,r3,4),r6 ## E;\
+	movl	TAB+2048(,r7,4),r3 ## E;\
+	movzbl	r1 ## L,r7 ## E;	\
+	movzbl	r1 ## H,r1 ## E;	\
+	xorl	TAB+1024(,r1,4),r6 ## E;\
+	xorl	TAB(,r7,4),r3 ## E;	\
 	movzbl	r2 ## H,r1 ## E;	\
 	movzbl	r2 ## H,r1 ## E;	\
 	movzbl	r2 ## L,r7 ## E;	\
 	movzbl	r2 ## L,r7 ## E;	\
 	shrl	$16,r2 ## E;		\
 	shrl	$16,r2 ## E;		\
@@ -131,9 +128,9 @@
 	movl	r4 ## E,r2 ## E;
 	movl	r4 ## E,r2 ## E;
 
 
 #define entry(FUNC,KEY,B128,B192) \
 #define entry(FUNC,KEY,B128,B192) \
-	prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
+	prologue(FUNC,KEY,B128,B192,R2,R8,R1,R3,R4,R6,R10,R5,R11)
 
 
-#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11)
+#define return(FUNC) epilogue(FUNC,R8,R2,R5,R6,R3,R4,R11)
 
 
 #define encrypt_round(TAB,OFFSET) \
 #define encrypt_round(TAB,OFFSET) \
 	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
 	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \

+ 180 - 51
arch/x86/crypto/aesni-intel_asm.S

@@ -89,6 +89,29 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
 ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
 ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
             .octa 0x00000000000000000000000000000000
             .octa 0x00000000000000000000000000000000
 
 
+.section .rodata
+.align 16
+.type aad_shift_arr, @object
+.size aad_shift_arr, 272
+aad_shift_arr:
+        .octa     0xffffffffffffffffffffffffffffffff
+        .octa     0xffffffffffffffffffffffffffffff0C
+        .octa     0xffffffffffffffffffffffffffff0D0C
+        .octa     0xffffffffffffffffffffffffff0E0D0C
+        .octa     0xffffffffffffffffffffffff0F0E0D0C
+        .octa     0xffffffffffffffffffffff0C0B0A0908
+        .octa     0xffffffffffffffffffff0D0C0B0A0908
+        .octa     0xffffffffffffffffff0E0D0C0B0A0908
+        .octa     0xffffffffffffffff0F0E0D0C0B0A0908
+        .octa     0xffffffffffffff0C0B0A090807060504
+        .octa     0xffffffffffff0D0C0B0A090807060504
+        .octa     0xffffffffff0E0D0C0B0A090807060504
+        .octa     0xffffffff0F0E0D0C0B0A090807060504
+        .octa     0xffffff0C0B0A09080706050403020100
+        .octa     0xffff0D0C0B0A09080706050403020100
+        .octa     0xff0E0D0C0B0A09080706050403020100
+        .octa     0x0F0E0D0C0B0A09080706050403020100
+
 
 
 .text
 .text
 
 
@@ -252,32 +275,66 @@ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
 	mov	   arg8, %r12           # %r12 = aadLen
 	mov	   arg8, %r12           # %r12 = aadLen
 	mov	   %r12, %r11
 	mov	   %r12, %r11
 	pxor	   %xmm\i, %xmm\i
 	pxor	   %xmm\i, %xmm\i
+	pxor       \XMM2, \XMM2
 
 
-_get_AAD_loop\num_initial_blocks\operation:
-	movd	   (%r10), \TMP1
-	pslldq	   $12, \TMP1
-	psrldq	   $4, %xmm\i
+	cmp	   $16, %r11
+	jl	   _get_AAD_rest8\num_initial_blocks\operation
+_get_AAD_blocks\num_initial_blocks\operation:
+	movdqu	   (%r10), %xmm\i
+	PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
+	pxor	   %xmm\i, \XMM2
+	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+	add	   $16, %r10
+	sub	   $16, %r12
+	sub	   $16, %r11
+	cmp	   $16, %r11
+	jge	   _get_AAD_blocks\num_initial_blocks\operation
+
+	movdqu	   \XMM2, %xmm\i
+	cmp	   $0, %r11
+	je	   _get_AAD_done\num_initial_blocks\operation
+
+	pxor	   %xmm\i,%xmm\i
+
+	/* read the last <16B of AAD. since we have at least 4B of
+	data right after the AAD (the ICV, and maybe some CT), we can
+	read 4B/8B blocks safely, and then get rid of the extra stuff */
+_get_AAD_rest8\num_initial_blocks\operation:
+	cmp	   $4, %r11
+	jle	   _get_AAD_rest4\num_initial_blocks\operation
+	movq	   (%r10), \TMP1
+	add	   $8, %r10
+	sub	   $8, %r11
+	pslldq	   $8, \TMP1
+	psrldq	   $8, %xmm\i
 	pxor	   \TMP1, %xmm\i
 	pxor	   \TMP1, %xmm\i
+	jmp	   _get_AAD_rest8\num_initial_blocks\operation
+_get_AAD_rest4\num_initial_blocks\operation:
+	cmp	   $0, %r11
+	jle	   _get_AAD_rest0\num_initial_blocks\operation
+	mov	   (%r10), %eax
+	movq	   %rax, \TMP1
 	add	   $4, %r10
 	add	   $4, %r10
-	sub	   $4, %r12
-	jne	   _get_AAD_loop\num_initial_blocks\operation
-
-	cmp	   $16, %r11
-	je	   _get_AAD_loop2_done\num_initial_blocks\operation
-
-	mov	   $16, %r12
-_get_AAD_loop2\num_initial_blocks\operation:
+	sub	   $4, %r10
+	pslldq	   $12, \TMP1
 	psrldq	   $4, %xmm\i
 	psrldq	   $4, %xmm\i
-	sub	   $4, %r12
-	cmp	   %r11, %r12
-	jne	   _get_AAD_loop2\num_initial_blocks\operation
-
-_get_AAD_loop2_done\num_initial_blocks\operation:
+	pxor	   \TMP1, %xmm\i
+_get_AAD_rest0\num_initial_blocks\operation:
+	/* finalize: shift out the extra bytes we read, and align
+	left. since pslldq can only shift by an immediate, we use
+	vpshufb and an array of shuffle masks */
+	movq	   %r12, %r11
+	salq	   $4, %r11
+	movdqu	   aad_shift_arr(%r11), \TMP1
+	PSHUFB_XMM \TMP1, %xmm\i
+_get_AAD_rest_final\num_initial_blocks\operation:
 	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
 	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
+	pxor	   \XMM2, %xmm\i
+	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
 
 
+_get_AAD_done\num_initial_blocks\operation:
 	xor	   %r11, %r11 # initialise the data pointer offset as zero
 	xor	   %r11, %r11 # initialise the data pointer offset as zero
-
-        # start AES for num_initial_blocks blocks
+	# start AES for num_initial_blocks blocks
 
 
 	mov	   %arg5, %rax                      # %rax = *Y0
 	mov	   %arg5, %rax                      # %rax = *Y0
 	movdqu	   (%rax), \XMM0                    # XMM0 = Y0
 	movdqu	   (%rax), \XMM0                    # XMM0 = Y0
@@ -322,7 +379,7 @@ aes_loop_initial_dec\num_initial_blocks:
                 # prepare plaintext/ciphertext for GHASH computation
                 # prepare plaintext/ciphertext for GHASH computation
 .endr
 .endr
 .endif
 .endif
-	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+
         # apply GHASH on num_initial_blocks blocks
         # apply GHASH on num_initial_blocks blocks
 
 
 .if \i == 5
 .if \i == 5
@@ -477,28 +534,66 @@ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
 	mov	   arg8, %r12           # %r12 = aadLen
 	mov	   arg8, %r12           # %r12 = aadLen
 	mov	   %r12, %r11
 	mov	   %r12, %r11
 	pxor	   %xmm\i, %xmm\i
 	pxor	   %xmm\i, %xmm\i
-_get_AAD_loop\num_initial_blocks\operation:
-	movd	   (%r10), \TMP1
-	pslldq	   $12, \TMP1
-	psrldq	   $4, %xmm\i
+	pxor	   \XMM2, \XMM2
+
+	cmp	   $16, %r11
+	jl	   _get_AAD_rest8\num_initial_blocks\operation
+_get_AAD_blocks\num_initial_blocks\operation:
+	movdqu	   (%r10), %xmm\i
+	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
+	pxor	   %xmm\i, \XMM2
+	GHASH_MUL  \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+	add	   $16, %r10
+	sub	   $16, %r12
+	sub	   $16, %r11
+	cmp	   $16, %r11
+	jge	   _get_AAD_blocks\num_initial_blocks\operation
+
+	movdqu	   \XMM2, %xmm\i
+	cmp	   $0, %r11
+	je	   _get_AAD_done\num_initial_blocks\operation
+
+	pxor	   %xmm\i,%xmm\i
+
+	/* read the last <16B of AAD. since we have at least 4B of
+	data right after the AAD (the ICV, and maybe some PT), we can
+	read 4B/8B blocks safely, and then get rid of the extra stuff */
+_get_AAD_rest8\num_initial_blocks\operation:
+	cmp	   $4, %r11
+	jle	   _get_AAD_rest4\num_initial_blocks\operation
+	movq	   (%r10), \TMP1
+	add	   $8, %r10
+	sub	   $8, %r11
+	pslldq	   $8, \TMP1
+	psrldq	   $8, %xmm\i
 	pxor	   \TMP1, %xmm\i
 	pxor	   \TMP1, %xmm\i
+	jmp	   _get_AAD_rest8\num_initial_blocks\operation
+_get_AAD_rest4\num_initial_blocks\operation:
+	cmp	   $0, %r11
+	jle	   _get_AAD_rest0\num_initial_blocks\operation
+	mov	   (%r10), %eax
+	movq	   %rax, \TMP1
 	add	   $4, %r10
 	add	   $4, %r10
-	sub	   $4, %r12
-	jne	   _get_AAD_loop\num_initial_blocks\operation
-	cmp	   $16, %r11
-	je	   _get_AAD_loop2_done\num_initial_blocks\operation
-	mov	   $16, %r12
-_get_AAD_loop2\num_initial_blocks\operation:
+	sub	   $4, %r10
+	pslldq	   $12, \TMP1
 	psrldq	   $4, %xmm\i
 	psrldq	   $4, %xmm\i
-	sub	   $4, %r12
-	cmp	   %r11, %r12
-	jne	   _get_AAD_loop2\num_initial_blocks\operation
-_get_AAD_loop2_done\num_initial_blocks\operation:
+	pxor	   \TMP1, %xmm\i
+_get_AAD_rest0\num_initial_blocks\operation:
+	/* finalize: shift out the extra bytes we read, and align
+	left. since pslldq can only shift by an immediate, we use
+	vpshufb and an array of shuffle masks */
+	movq	   %r12, %r11
+	salq	   $4, %r11
+	movdqu	   aad_shift_arr(%r11), \TMP1
+	PSHUFB_XMM \TMP1, %xmm\i
+_get_AAD_rest_final\num_initial_blocks\operation:
 	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
 	PSHUFB_XMM   %xmm14, %xmm\i # byte-reflect the AAD data
+	pxor	   \XMM2, %xmm\i
+	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
 
 
+_get_AAD_done\num_initial_blocks\operation:
 	xor	   %r11, %r11 # initialise the data pointer offset as zero
 	xor	   %r11, %r11 # initialise the data pointer offset as zero
-
-        # start AES for num_initial_blocks blocks
+	# start AES for num_initial_blocks blocks
 
 
 	mov	   %arg5, %rax                      # %rax = *Y0
 	mov	   %arg5, %rax                      # %rax = *Y0
 	movdqu	   (%rax), \XMM0                    # XMM0 = Y0
 	movdqu	   (%rax), \XMM0                    # XMM0 = Y0
@@ -543,7 +638,7 @@ aes_loop_initial_enc\num_initial_blocks:
 		# prepare plaintext/ciphertext for GHASH computation
 		# prepare plaintext/ciphertext for GHASH computation
 .endr
 .endr
 .endif
 .endif
-	GHASH_MUL  %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+
         # apply GHASH on num_initial_blocks blocks
         # apply GHASH on num_initial_blocks blocks
 
 
 .if \i == 5
 .if \i == 5
@@ -1454,18 +1549,35 @@ _return_T_decrypt:
 	mov	arg10, %r11               # %r11 = auth_tag_len
 	mov	arg10, %r11               # %r11 = auth_tag_len
 	cmp	$16, %r11
 	cmp	$16, %r11
 	je	_T_16_decrypt
 	je	_T_16_decrypt
-	cmp	$12, %r11
-	je	_T_12_decrypt
+	cmp	$8, %r11
+	jl	_T_4_decrypt
 _T_8_decrypt:
 _T_8_decrypt:
 	MOVQ_R64_XMM	%xmm0, %rax
 	MOVQ_R64_XMM	%xmm0, %rax
 	mov	%rax, (%r10)
 	mov	%rax, (%r10)
-	jmp	_return_T_done_decrypt
-_T_12_decrypt:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
+	add	$8, %r10
+	sub	$8, %r11
 	psrldq	$8, %xmm0
 	psrldq	$8, %xmm0
+	cmp	$0, %r11
+	je	_return_T_done_decrypt
+_T_4_decrypt:
+	movd	%xmm0, %eax
+	mov	%eax, (%r10)
+	add	$4, %r10
+	sub	$4, %r11
+	psrldq	$4, %xmm0
+	cmp	$0, %r11
+	je	_return_T_done_decrypt
+_T_123_decrypt:
 	movd	%xmm0, %eax
 	movd	%xmm0, %eax
-	mov	%eax, 8(%r10)
+	cmp	$2, %r11
+	jl	_T_1_decrypt
+	mov	%ax, (%r10)
+	cmp	$2, %r11
+	je	_return_T_done_decrypt
+	add	$2, %r10
+	sar	$16, %eax
+_T_1_decrypt:
+	mov	%al, (%r10)
 	jmp	_return_T_done_decrypt
 	jmp	_return_T_done_decrypt
 _T_16_decrypt:
 _T_16_decrypt:
 	movdqu	%xmm0, (%r10)
 	movdqu	%xmm0, (%r10)
@@ -1718,18 +1830,35 @@ _return_T_encrypt:
 	mov	arg10, %r11                    # %r11 = auth_tag_len
 	mov	arg10, %r11                    # %r11 = auth_tag_len
 	cmp	$16, %r11
 	cmp	$16, %r11
 	je	_T_16_encrypt
 	je	_T_16_encrypt
-	cmp	$12, %r11
-	je	_T_12_encrypt
+	cmp	$8, %r11
+	jl	_T_4_encrypt
 _T_8_encrypt:
 _T_8_encrypt:
 	MOVQ_R64_XMM	%xmm0, %rax
 	MOVQ_R64_XMM	%xmm0, %rax
 	mov	%rax, (%r10)
 	mov	%rax, (%r10)
-	jmp	_return_T_done_encrypt
-_T_12_encrypt:
-	MOVQ_R64_XMM	%xmm0, %rax
-	mov	%rax, (%r10)
+	add	$8, %r10
+	sub	$8, %r11
 	psrldq	$8, %xmm0
 	psrldq	$8, %xmm0
+	cmp	$0, %r11
+	je	_return_T_done_encrypt
+_T_4_encrypt:
+	movd	%xmm0, %eax
+	mov	%eax, (%r10)
+	add	$4, %r10
+	sub	$4, %r11
+	psrldq	$4, %xmm0
+	cmp	$0, %r11
+	je	_return_T_done_encrypt
+_T_123_encrypt:
 	movd	%xmm0, %eax
 	movd	%xmm0, %eax
-	mov	%eax, 8(%r10)
+	cmp	$2, %r11
+	jl	_T_1_encrypt
+	mov	%ax, (%r10)
+	cmp	$2, %r11
+	je	_return_T_done_encrypt
+	add	$2, %r10
+	sar	$16, %eax
+_T_1_encrypt:
+	mov	%al, (%r10)
 	jmp	_return_T_done_encrypt
 	jmp	_return_T_done_encrypt
 _T_16_encrypt:
 _T_16_encrypt:
 	movdqu	%xmm0, (%r10)
 	movdqu	%xmm0, (%r10)

+ 201 - 82
arch/x86/crypto/aesni-intel_avx-x86_64.S

@@ -155,6 +155,30 @@ SHIFT_MASK:      .octa     0x0f0e0d0c0b0a09080706050403020100
 ALL_F:           .octa     0xffffffffffffffffffffffffffffffff
 ALL_F:           .octa     0xffffffffffffffffffffffffffffffff
                  .octa     0x00000000000000000000000000000000
                  .octa     0x00000000000000000000000000000000
 
 
+.section .rodata
+.align 16
+.type aad_shift_arr, @object
+.size aad_shift_arr, 272
+aad_shift_arr:
+        .octa     0xffffffffffffffffffffffffffffffff
+        .octa     0xffffffffffffffffffffffffffffff0C
+        .octa     0xffffffffffffffffffffffffffff0D0C
+        .octa     0xffffffffffffffffffffffffff0E0D0C
+        .octa     0xffffffffffffffffffffffff0F0E0D0C
+        .octa     0xffffffffffffffffffffff0C0B0A0908
+        .octa     0xffffffffffffffffffff0D0C0B0A0908
+        .octa     0xffffffffffffffffff0E0D0C0B0A0908
+        .octa     0xffffffffffffffff0F0E0D0C0B0A0908
+        .octa     0xffffffffffffff0C0B0A090807060504
+        .octa     0xffffffffffff0D0C0B0A090807060504
+        .octa     0xffffffffff0E0D0C0B0A090807060504
+        .octa     0xffffffff0F0E0D0C0B0A090807060504
+        .octa     0xffffff0C0B0A09080706050403020100
+        .octa     0xffff0D0C0B0A09080706050403020100
+        .octa     0xff0E0D0C0B0A09080706050403020100
+        .octa     0x0F0E0D0C0B0A09080706050403020100
+
+
 .text
 .text
 
 
 
 
@@ -372,41 +396,72 @@ VARIABLE_OFFSET = 16*8
 
 
 .macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC
 .macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC
 	i = (8-\num_initial_blocks)
 	i = (8-\num_initial_blocks)
+	j = 0
 	setreg
 	setreg
 
 
-        mov     arg6, %r10                      # r10 = AAD
-        mov     arg7, %r12                      # r12 = aadLen
-
-
-        mov     %r12, %r11
-
-        vpxor   reg_i, reg_i, reg_i
-_get_AAD_loop\@:
-        vmovd   (%r10), \T1
-        vpslldq $12, \T1, \T1
-        vpsrldq $4, reg_i, reg_i
-        vpxor   \T1, reg_i, reg_i
-
-        add     $4, %r10
-        sub     $4, %r12
-        jg      _get_AAD_loop\@
-
-
-        cmp     $16, %r11
-        je      _get_AAD_loop2_done\@
-        mov     $16, %r12
-
-_get_AAD_loop2\@:
-        vpsrldq $4, reg_i, reg_i
-        sub     $4, %r12
-        cmp     %r11, %r12
-        jg      _get_AAD_loop2\@
-
-_get_AAD_loop2_done\@:
-
-        #byte-reflect the AAD data
-        vpshufb SHUF_MASK(%rip), reg_i, reg_i
-
+	mov     arg6, %r10                      # r10 = AAD
+	mov     arg7, %r12                      # r12 = aadLen
+
+
+	mov     %r12, %r11
+
+	vpxor   reg_j, reg_j, reg_j
+	vpxor   reg_i, reg_i, reg_i
+	cmp     $16, %r11
+	jl      _get_AAD_rest8\@
+_get_AAD_blocks\@:
+	vmovdqu (%r10), reg_i
+	vpshufb SHUF_MASK(%rip), reg_i, reg_i
+	vpxor   reg_i, reg_j, reg_j
+	GHASH_MUL_AVX       reg_j, \T2, \T1, \T3, \T4, \T5, \T6
+	add     $16, %r10
+	sub     $16, %r12
+	sub     $16, %r11
+	cmp     $16, %r11
+	jge     _get_AAD_blocks\@
+	vmovdqu reg_j, reg_i
+	cmp     $0, %r11
+	je      _get_AAD_done\@
+
+	vpxor   reg_i, reg_i, reg_i
+
+	/* read the last <16B of AAD. since we have at least 4B of
+	data right after the AAD (the ICV, and maybe some CT), we can
+	read 4B/8B blocks safely, and then get rid of the extra stuff */
+_get_AAD_rest8\@:
+	cmp     $4, %r11
+	jle     _get_AAD_rest4\@
+	movq    (%r10), \T1
+	add     $8, %r10
+	sub     $8, %r11
+	vpslldq $8, \T1, \T1
+	vpsrldq $8, reg_i, reg_i
+	vpxor   \T1, reg_i, reg_i
+	jmp     _get_AAD_rest8\@
+_get_AAD_rest4\@:
+	cmp     $0, %r11
+	jle      _get_AAD_rest0\@
+	mov     (%r10), %eax
+	movq    %rax, \T1
+	add     $4, %r10
+	sub     $4, %r11
+	vpslldq $12, \T1, \T1
+	vpsrldq $4, reg_i, reg_i
+	vpxor   \T1, reg_i, reg_i
+_get_AAD_rest0\@:
+	/* finalize: shift out the extra bytes we read, and align
+	left. since pslldq can only shift by an immediate, we use
+	vpshufb and an array of shuffle masks */
+	movq    %r12, %r11
+	salq    $4, %r11
+	movdqu  aad_shift_arr(%r11), \T1
+	vpshufb \T1, reg_i, reg_i
+_get_AAD_rest_final\@:
+	vpshufb SHUF_MASK(%rip), reg_i, reg_i
+	vpxor   reg_j, reg_i, reg_i
+	GHASH_MUL_AVX       reg_i, \T2, \T1, \T3, \T4, \T5, \T6
+
+_get_AAD_done\@:
 	# initialize the data pointer offset as zero
 	# initialize the data pointer offset as zero
 	xor     %r11, %r11
 	xor     %r11, %r11
 
 
@@ -480,7 +535,6 @@ _get_AAD_loop2_done\@:
 	i = (8-\num_initial_blocks)
 	i = (8-\num_initial_blocks)
 	j = (9-\num_initial_blocks)
 	j = (9-\num_initial_blocks)
 	setreg
 	setreg
-        GHASH_MUL_AVX       reg_i, \T2, \T1, \T3, \T4, \T5, \T6
 
 
 .rep \num_initial_blocks
 .rep \num_initial_blocks
         vpxor    reg_i, reg_j, reg_j
         vpxor    reg_i, reg_j, reg_j
@@ -1427,19 +1481,36 @@ _return_T\@:
         cmp     $16, %r11
         cmp     $16, %r11
         je      _T_16\@
         je      _T_16\@
 
 
-        cmp     $12, %r11
-        je      _T_12\@
+        cmp     $8, %r11
+        jl      _T_4\@
 
 
 _T_8\@:
 _T_8\@:
         vmovq   %xmm9, %rax
         vmovq   %xmm9, %rax
         mov     %rax, (%r10)
         mov     %rax, (%r10)
-        jmp     _return_T_done\@
-_T_12\@:
-        vmovq   %xmm9, %rax
-        mov     %rax, (%r10)
+        add     $8, %r10
+        sub     $8, %r11
         vpsrldq $8, %xmm9, %xmm9
         vpsrldq $8, %xmm9, %xmm9
+        cmp     $0, %r11
+        je     _return_T_done\@
+_T_4\@:
         vmovd   %xmm9, %eax
         vmovd   %xmm9, %eax
-        mov     %eax, 8(%r10)
+        mov     %eax, (%r10)
+        add     $4, %r10
+        sub     $4, %r11
+        vpsrldq     $4, %xmm9, %xmm9
+        cmp     $0, %r11
+        je     _return_T_done\@
+_T_123\@:
+        vmovd     %xmm9, %eax
+        cmp     $2, %r11
+        jl     _T_1\@
+        mov     %ax, (%r10)
+        cmp     $2, %r11
+        je     _return_T_done\@
+        add     $2, %r10
+        sar     $16, %eax
+_T_1\@:
+        mov     %al, (%r10)
         jmp     _return_T_done\@
         jmp     _return_T_done\@
 
 
 _T_16\@:
 _T_16\@:
@@ -1631,41 +1702,73 @@ ENDPROC(aesni_gcm_dec_avx_gen2)
 
 
 .macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
 .macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
 	i = (8-\num_initial_blocks)
 	i = (8-\num_initial_blocks)
+	j = 0
 	setreg
 	setreg
 
 
-        mov     arg6, %r10                       # r10 = AAD
-        mov     arg7, %r12                       # r12 = aadLen
-
-
-        mov     %r12, %r11
-
-        vpxor   reg_i, reg_i, reg_i
-_get_AAD_loop\@:
-        vmovd   (%r10), \T1
-        vpslldq $12, \T1, \T1
-        vpsrldq $4, reg_i, reg_i
-        vpxor   \T1, reg_i, reg_i
-
-        add     $4, %r10
-        sub     $4, %r12
-        jg      _get_AAD_loop\@
-
-
-        cmp     $16, %r11
-        je      _get_AAD_loop2_done\@
-        mov     $16, %r12
-
-_get_AAD_loop2\@:
-        vpsrldq $4, reg_i, reg_i
-        sub     $4, %r12
-        cmp     %r11, %r12
-        jg      _get_AAD_loop2\@
-
-_get_AAD_loop2_done\@:
-
-        #byte-reflect the AAD data
-        vpshufb SHUF_MASK(%rip), reg_i, reg_i
-
+	mov     arg6, %r10                       # r10 = AAD
+	mov     arg7, %r12                       # r12 = aadLen
+
+
+	mov     %r12, %r11
+
+	vpxor   reg_j, reg_j, reg_j
+	vpxor   reg_i, reg_i, reg_i
+
+	cmp     $16, %r11
+	jl      _get_AAD_rest8\@
+_get_AAD_blocks\@:
+	vmovdqu (%r10), reg_i
+	vpshufb SHUF_MASK(%rip), reg_i, reg_i
+	vpxor   reg_i, reg_j, reg_j
+	GHASH_MUL_AVX2      reg_j, \T2, \T1, \T3, \T4, \T5, \T6
+	add     $16, %r10
+	sub     $16, %r12
+	sub     $16, %r11
+	cmp     $16, %r11
+	jge     _get_AAD_blocks\@
+	vmovdqu reg_j, reg_i
+	cmp     $0, %r11
+	je      _get_AAD_done\@
+
+	vpxor   reg_i, reg_i, reg_i
+
+	/* read the last <16B of AAD. since we have at least 4B of
+	data right after the AAD (the ICV, and maybe some CT), we can
+	read 4B/8B blocks safely, and then get rid of the extra stuff */
+_get_AAD_rest8\@:
+	cmp     $4, %r11
+	jle     _get_AAD_rest4\@
+	movq    (%r10), \T1
+	add     $8, %r10
+	sub     $8, %r11
+	vpslldq $8, \T1, \T1
+	vpsrldq $8, reg_i, reg_i
+	vpxor   \T1, reg_i, reg_i
+	jmp     _get_AAD_rest8\@
+_get_AAD_rest4\@:
+	cmp     $0, %r11
+	jle     _get_AAD_rest0\@
+	mov     (%r10), %eax
+	movq    %rax, \T1
+	add     $4, %r10
+	sub     $4, %r11
+	vpslldq $12, \T1, \T1
+	vpsrldq $4, reg_i, reg_i
+	vpxor   \T1, reg_i, reg_i
+_get_AAD_rest0\@:
+	/* finalize: shift out the extra bytes we read, and align
+	left. since pslldq can only shift by an immediate, we use
+	vpshufb and an array of shuffle masks */
+	movq    %r12, %r11
+	salq    $4, %r11
+	movdqu  aad_shift_arr(%r11), \T1
+	vpshufb \T1, reg_i, reg_i
+_get_AAD_rest_final\@:
+	vpshufb SHUF_MASK(%rip), reg_i, reg_i
+	vpxor   reg_j, reg_i, reg_i
+	GHASH_MUL_AVX2      reg_i, \T2, \T1, \T3, \T4, \T5, \T6
+
+_get_AAD_done\@:
 	# initialize the data pointer offset as zero
 	# initialize the data pointer offset as zero
 	xor     %r11, %r11
 	xor     %r11, %r11
 
 
@@ -1740,7 +1843,6 @@ _get_AAD_loop2_done\@:
 	i = (8-\num_initial_blocks)
 	i = (8-\num_initial_blocks)
 	j = (9-\num_initial_blocks)
 	j = (9-\num_initial_blocks)
 	setreg
 	setreg
-        GHASH_MUL_AVX2       reg_i, \T2, \T1, \T3, \T4, \T5, \T6
 
 
 .rep \num_initial_blocks
 .rep \num_initial_blocks
         vpxor    reg_i, reg_j, reg_j
         vpxor    reg_i, reg_j, reg_j
@@ -2702,19 +2804,36 @@ _return_T\@:
         cmp     $16, %r11
         cmp     $16, %r11
         je      _T_16\@
         je      _T_16\@
 
 
-        cmp     $12, %r11
-        je      _T_12\@
+        cmp     $8, %r11
+        jl      _T_4\@
 
 
 _T_8\@:
 _T_8\@:
         vmovq   %xmm9, %rax
         vmovq   %xmm9, %rax
         mov     %rax, (%r10)
         mov     %rax, (%r10)
-        jmp     _return_T_done\@
-_T_12\@:
-        vmovq   %xmm9, %rax
-        mov     %rax, (%r10)
+        add     $8, %r10
+        sub     $8, %r11
         vpsrldq $8, %xmm9, %xmm9
         vpsrldq $8, %xmm9, %xmm9
+        cmp     $0, %r11
+        je     _return_T_done\@
+_T_4\@:
         vmovd   %xmm9, %eax
         vmovd   %xmm9, %eax
-        mov     %eax, 8(%r10)
+        mov     %eax, (%r10)
+        add     $4, %r10
+        sub     $4, %r11
+        vpsrldq     $4, %xmm9, %xmm9
+        cmp     $0, %r11
+        je     _return_T_done\@
+_T_123\@:
+        vmovd     %xmm9, %eax
+        cmp     $2, %r11
+        jl     _T_1\@
+        mov     %ax, (%r10)
+        cmp     $2, %r11
+        je     _return_T_done\@
+        add     $2, %r10
+        sar     $16, %eax
+_T_1\@:
+        mov     %al, (%r10)
         jmp     _return_T_done\@
         jmp     _return_T_done\@
 
 
 _T_16\@:
 _T_16\@:

+ 158 - 50
arch/x86/crypto/aesni-intel_glue.c

@@ -61,6 +61,11 @@ struct aesni_rfc4106_gcm_ctx {
 	u8 nonce[4];
 	u8 nonce[4];
 };
 };
 
 
+struct generic_gcmaes_ctx {
+	u8 hash_subkey[16] AESNI_ALIGN_ATTR;
+	struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
+};
+
 struct aesni_xts_ctx {
 struct aesni_xts_ctx {
 	u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
 	u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
 	u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
 	u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
@@ -102,13 +107,11 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
  * u8 *out, Ciphertext output. Encrypt in-place is allowed.
  * u8 *out, Ciphertext output. Encrypt in-place is allowed.
  * const u8 *in, Plaintext input
  * const u8 *in, Plaintext input
  * unsigned long plaintext_len, Length of data in bytes for encryption.
  * unsigned long plaintext_len, Length of data in bytes for encryption.
- * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association)
- *         concatenated with 8 byte Initialisation Vector (from IPSec ESP
- *         Payload) concatenated with 0x00000001. 16-byte aligned pointer.
+ * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001.
+ *         16-byte aligned pointer.
  * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
  * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
  * const u8 *aad, Additional Authentication Data (AAD)
  * const u8 *aad, Additional Authentication Data (AAD)
- * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this
- *          is going to be 8 or 12 bytes
+ * unsigned long aad_len, Length of AAD in bytes.
  * u8 *auth_tag, Authenticated Tag output.
  * u8 *auth_tag, Authenticated Tag output.
  * unsigned long auth_tag_len), Authenticated Tag Length in bytes.
  * unsigned long auth_tag_len), Authenticated Tag Length in bytes.
  *          Valid values are 16 (most likely), 12 or 8.
  *          Valid values are 16 (most likely), 12 or 8.
@@ -123,9 +126,8 @@ asmlinkage void aesni_gcm_enc(void *ctx, u8 *out,
  * u8 *out, Plaintext output. Decrypt in-place is allowed.
  * u8 *out, Plaintext output. Decrypt in-place is allowed.
  * const u8 *in, Ciphertext input
  * const u8 *in, Ciphertext input
  * unsigned long ciphertext_len, Length of data in bytes for decryption.
  * unsigned long ciphertext_len, Length of data in bytes for decryption.
- * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association)
- *         concatenated with 8 byte Initialisation Vector (from IPSec ESP
- *         Payload) concatenated with 0x00000001. 16-byte aligned pointer.
+ * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001.
+ *         16-byte aligned pointer.
  * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
  * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
  * const u8 *aad, Additional Authentication Data (AAD)
  * const u8 *aad, Additional Authentication Data (AAD)
  * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going
  * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going
@@ -275,6 +277,16 @@ aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
 		align = 1;
 		align = 1;
 	return PTR_ALIGN(crypto_aead_ctx(tfm), align);
 	return PTR_ALIGN(crypto_aead_ctx(tfm), align);
 }
 }
+
+static inline struct
+generic_gcmaes_ctx *generic_gcmaes_ctx_get(struct crypto_aead *tfm)
+{
+	unsigned long align = AESNI_ALIGN;
+
+	if (align <= crypto_tfm_ctx_alignment())
+		align = 1;
+	return PTR_ALIGN(crypto_aead_ctx(tfm), align);
+}
 #endif
 #endif
 
 
 static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
 static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
@@ -712,32 +724,34 @@ static int rfc4106_set_authsize(struct crypto_aead *parent,
 	return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
 	return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
 }
 }
 
 
-static int helper_rfc4106_encrypt(struct aead_request *req)
+static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
+				       unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 8:
+	case 12:
+	case 13:
+	case 14:
+	case 15:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
+			  u8 *hash_subkey, u8 *iv, void *aes_ctx)
 {
 {
 	u8 one_entry_in_sg = 0;
 	u8 one_entry_in_sg = 0;
 	u8 *src, *dst, *assoc;
 	u8 *src, *dst, *assoc;
-	__be32 counter = cpu_to_be32(1);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
-	void *aes_ctx = &(ctx->aes_key_expanded);
 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
-	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
 	struct scatter_walk src_sg_walk;
 	struct scatter_walk src_sg_walk;
 	struct scatter_walk dst_sg_walk = {};
 	struct scatter_walk dst_sg_walk = {};
-	unsigned int i;
-
-	/* Assuming we are supporting rfc4106 64-bit extended */
-	/* sequence numbers We need to have the AAD length equal */
-	/* to 16 or 20 bytes */
-	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
-		return -EINVAL;
-
-	/* IV below built */
-	for (i = 0; i < 4; i++)
-		*(iv+i) = ctx->nonce[i];
-	for (i = 0; i < 8; i++)
-		*(iv+4+i) = req->iv[i];
-	*((__be32 *)(iv+12)) = counter;
 
 
 	if (sg_is_last(req->src) &&
 	if (sg_is_last(req->src) &&
 	    (!PageHighMem(sg_page(req->src)) ||
 	    (!PageHighMem(sg_page(req->src)) ||
@@ -768,7 +782,7 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
 
 
 	kernel_fpu_begin();
 	kernel_fpu_begin();
 	aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv,
 	aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv,
-			  ctx->hash_subkey, assoc, req->assoclen - 8,
+			  hash_subkey, assoc, assoclen,
 			  dst + req->cryptlen, auth_tag_len);
 			  dst + req->cryptlen, auth_tag_len);
 	kernel_fpu_end();
 	kernel_fpu_end();
 
 
@@ -791,37 +805,20 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
 	return 0;
 	return 0;
 }
 }
 
 
-static int helper_rfc4106_decrypt(struct aead_request *req)
+static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
+			  u8 *hash_subkey, u8 *iv, void *aes_ctx)
 {
 {
 	u8 one_entry_in_sg = 0;
 	u8 one_entry_in_sg = 0;
 	u8 *src, *dst, *assoc;
 	u8 *src, *dst, *assoc;
 	unsigned long tempCipherLen = 0;
 	unsigned long tempCipherLen = 0;
-	__be32 counter = cpu_to_be32(1);
-	int retval = 0;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
-	void *aes_ctx = &(ctx->aes_key_expanded);
 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
-	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
 	u8 authTag[16];
 	u8 authTag[16];
 	struct scatter_walk src_sg_walk;
 	struct scatter_walk src_sg_walk;
 	struct scatter_walk dst_sg_walk = {};
 	struct scatter_walk dst_sg_walk = {};
-	unsigned int i;
-
-	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
-		return -EINVAL;
-
-	/* Assuming we are supporting rfc4106 64-bit extended */
-	/* sequence numbers We need to have the AAD length */
-	/* equal to 16 or 20 bytes */
+	int retval = 0;
 
 
 	tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
 	tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
-	/* IV below built */
-	for (i = 0; i < 4; i++)
-		*(iv+i) = ctx->nonce[i];
-	for (i = 0; i < 8; i++)
-		*(iv+4+i) = req->iv[i];
-	*((__be32 *)(iv+12)) = counter;
 
 
 	if (sg_is_last(req->src) &&
 	if (sg_is_last(req->src) &&
 	    (!PageHighMem(sg_page(req->src)) ||
 	    (!PageHighMem(sg_page(req->src)) ||
@@ -838,7 +835,6 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
 			scatterwalk_start(&dst_sg_walk, req->dst);
 			scatterwalk_start(&dst_sg_walk, req->dst);
 			dst = scatterwalk_map(&dst_sg_walk) + req->assoclen;
 			dst = scatterwalk_map(&dst_sg_walk) + req->assoclen;
 		}
 		}
-
 	} else {
 	} else {
 		/* Allocate memory for src, dst, assoc */
 		/* Allocate memory for src, dst, assoc */
 		assoc = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC);
 		assoc = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC);
@@ -850,9 +846,10 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
 		dst = src;
 		dst = src;
 	}
 	}
 
 
+
 	kernel_fpu_begin();
 	kernel_fpu_begin();
 	aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
 	aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
-			  ctx->hash_subkey, assoc, req->assoclen - 8,
+			  hash_subkey, assoc, assoclen,
 			  authTag, auth_tag_len);
 			  authTag, auth_tag_len);
 	kernel_fpu_end();
 	kernel_fpu_end();
 
 
@@ -875,6 +872,60 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
 		kfree(assoc);
 		kfree(assoc);
 	}
 	}
 	return retval;
 	return retval;
+
+}
+
+static int helper_rfc4106_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+	void *aes_ctx = &(ctx->aes_key_expanded);
+	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+	unsigned int i;
+	__be32 counter = cpu_to_be32(1);
+
+	/* Assuming we are supporting rfc4106 64-bit extended */
+	/* sequence numbers We need to have the AAD length equal */
+	/* to 16 or 20 bytes */
+	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
+		return -EINVAL;
+
+	/* IV below built */
+	for (i = 0; i < 4; i++)
+		*(iv+i) = ctx->nonce[i];
+	for (i = 0; i < 8; i++)
+		*(iv+4+i) = req->iv[i];
+	*((__be32 *)(iv+12)) = counter;
+
+	return gcmaes_encrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
+			      aes_ctx);
+}
+
+static int helper_rfc4106_decrypt(struct aead_request *req)
+{
+	__be32 counter = cpu_to_be32(1);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+	void *aes_ctx = &(ctx->aes_key_expanded);
+	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+	unsigned int i;
+
+	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
+		return -EINVAL;
+
+	/* Assuming we are supporting rfc4106 64-bit extended */
+	/* sequence numbers We need to have the AAD length */
+	/* equal to 16 or 20 bytes */
+
+	/* IV below built */
+	for (i = 0; i < 4; i++)
+		*(iv+i) = ctx->nonce[i];
+	for (i = 0; i < 8; i++)
+		*(iv+4+i) = req->iv[i];
+	*((__be32 *)(iv+12)) = counter;
+
+	return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
+			      aes_ctx);
 }
 }
 
 
 static int rfc4106_encrypt(struct aead_request *req)
 static int rfc4106_encrypt(struct aead_request *req)
@@ -1035,6 +1086,46 @@ struct {
 };
 };
 
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
+static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
+				  unsigned int key_len)
+{
+	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead);
+
+	return aes_set_key_common(crypto_aead_tfm(aead),
+				  &ctx->aes_key_expanded, key, key_len) ?:
+	       rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
+}
+
+static int generic_gcmaes_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
+	void *aes_ctx = &(ctx->aes_key_expanded);
+	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+	__be32 counter = cpu_to_be32(1);
+
+	memcpy(iv, req->iv, 12);
+	*((__be32 *)(iv+12)) = counter;
+
+	return gcmaes_encrypt(req, req->assoclen, ctx->hash_subkey, iv,
+			      aes_ctx);
+}
+
+static int generic_gcmaes_decrypt(struct aead_request *req)
+{
+	__be32 counter = cpu_to_be32(1);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+	void *aes_ctx = &(ctx->aes_key_expanded);
+	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+
+	memcpy(iv, req->iv, 12);
+	*((__be32 *)(iv+12)) = counter;
+
+	return gcmaes_decrypt(req, req->assoclen, ctx->hash_subkey, iv,
+			      aes_ctx);
+}
+
 static struct aead_alg aesni_aead_algs[] = { {
 static struct aead_alg aesni_aead_algs[] = { {
 	.setkey			= common_rfc4106_set_key,
 	.setkey			= common_rfc4106_set_key,
 	.setauthsize		= common_rfc4106_set_authsize,
 	.setauthsize		= common_rfc4106_set_authsize,
@@ -1069,6 +1160,23 @@ static struct aead_alg aesni_aead_algs[] = { {
 		.cra_ctxsize		= sizeof(struct cryptd_aead *),
 		.cra_ctxsize		= sizeof(struct cryptd_aead *),
 		.cra_module		= THIS_MODULE,
 		.cra_module		= THIS_MODULE,
 	},
 	},
+}, {
+	.setkey			= generic_gcmaes_set_key,
+	.setauthsize		= generic_gcmaes_set_authsize,
+	.encrypt		= generic_gcmaes_encrypt,
+	.decrypt		= generic_gcmaes_decrypt,
+	.ivsize			= 12,
+	.maxauthsize		= 16,
+	.base = {
+		.cra_name		= "gcm(aes)",
+		.cra_driver_name	= "generic-gcm-aesni",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct generic_gcmaes_ctx),
+		.cra_alignmask		= AESNI_ALIGN - 1,
+		.cra_module		= THIS_MODULE,
+	},
 } };
 } };
 #else
 #else
 static struct aead_alg aesni_aead_algs[0];
 static struct aead_alg aesni_aead_algs[0];

+ 0 - 3
arch/x86/crypto/glue_helper.c

@@ -176,9 +176,6 @@ __glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
 				src -= 1;
 				src -= 1;
 				dst -= 1;
 				dst -= 1;
 			} while (nbytes >= func_bytes);
 			} while (nbytes >= func_bytes);
-
-			if (nbytes < bsize)
-				goto done;
 		}
 		}
 	}
 	}
 
 

+ 4 - 3
arch/x86/crypto/sha512-mb/sha512_mb.c

@@ -269,19 +269,19 @@ static struct sha512_hash_ctx
 		 * LAST
 		 * LAST
 		 */
 		 */
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
 		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
-		return ctx;
+		goto unlock;
 	}
 	}
 
 
 	if (ctx->status & HASH_CTX_STS_PROCESSING) {
 	if (ctx->status & HASH_CTX_STS_PROCESSING) {
 		/* Cannot submit to a currently processing job. */
 		/* Cannot submit to a currently processing job. */
 		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
 		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
-		return ctx;
+		goto unlock;
 	}
 	}
 
 
 	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
 	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
 		/* Cannot update a finished job. */
 		/* Cannot update a finished job. */
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
 		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
-		return ctx;
+		goto unlock;
 	}
 	}
 
 
 
 
@@ -363,6 +363,7 @@ static struct sha512_hash_ctx
 	}
 	}
 
 
 	ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
 	ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
+unlock:
 	spin_unlock_irqrestore(&cstate->work_lock, irqflags);
 	spin_unlock_irqrestore(&cstate->work_lock, irqflags);
 	return ctx;
 	return ctx;
 }
 }

+ 1 - 0
crypto/Kconfig

@@ -130,6 +130,7 @@ config CRYPTO_DH
 config CRYPTO_ECDH
 config CRYPTO_ECDH
 	tristate "ECDH algorithm"
 	tristate "ECDH algorithm"
 	select CRYTPO_KPP
 	select CRYTPO_KPP
+	select CRYPTO_RNG_DEFAULT
 	help
 	help
 	  Generic implementation of the ECDH algorithm
 	  Generic implementation of the ECDH algorithm
 
 

+ 5 - 4
crypto/Makefile

@@ -33,10 +33,6 @@ obj-$(CONFIG_CRYPTO_KPP2) += kpp.o
 dh_generic-y := dh.o
 dh_generic-y := dh.o
 dh_generic-y += dh_helper.o
 dh_generic-y += dh_helper.o
 obj-$(CONFIG_CRYPTO_DH) += dh_generic.o
 obj-$(CONFIG_CRYPTO_DH) += dh_generic.o
-ecdh_generic-y := ecc.o
-ecdh_generic-y += ecdh.o
-ecdh_generic-y += ecdh_helper.o
-obj-$(CONFIG_CRYPTO_ECDH) += ecdh_generic.o
 
 
 $(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h
 $(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h
 $(obj)/rsaprivkey-asn1.o: $(obj)/rsaprivkey-asn1.c $(obj)/rsaprivkey-asn1.h
 $(obj)/rsaprivkey-asn1.o: $(obj)/rsaprivkey-asn1.c $(obj)/rsaprivkey-asn1.h
@@ -138,6 +134,11 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
 obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
 obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
 obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
 obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
 
 
+ecdh_generic-y := ecc.o
+ecdh_generic-y += ecdh.o
+ecdh_generic-y += ecdh_helper.o
+obj-$(CONFIG_CRYPTO_ECDH) += ecdh_generic.o
+
 #
 #
 # generic algorithms and the async_tx api
 # generic algorithms and the async_tx api
 #
 #

+ 1 - 1
crypto/aes_ti.c

@@ -114,7 +114,7 @@ static u32 mix_columns(u32 x)
 	 * | 0x2 0x3 0x1 0x1 |   | x[0] |
 	 * | 0x2 0x3 0x1 0x1 |   | x[0] |
 	 * | 0x1 0x2 0x3 0x1 |   | x[1] |
 	 * | 0x1 0x2 0x3 0x1 |   | x[1] |
 	 * | 0x1 0x1 0x2 0x3 | x | x[2] |
 	 * | 0x1 0x1 0x2 0x3 | x | x[2] |
-	 * | 0x3 0x1 0x1 0x3 |   | x[3] |
+	 * | 0x3 0x1 0x1 0x2 |   | x[3] |
 	 */
 	 */
 	u32 y = mul_by_x(x) ^ ror32(x, 16);
 	u32 y = mul_by_x(x) ^ ror32(x, 16);
 
 

+ 1 - 1
crypto/algapi.c

@@ -260,7 +260,7 @@ void crypto_alg_tested(const char *name, int err)
 			goto found;
 			goto found;
 	}
 	}
 
 
-	printk(KERN_ERR "alg: Unexpected test result for %s: %d\n", name, err);
+	pr_err("alg: Unexpected test result for %s: %d\n", name, err);
 	goto unlock;
 	goto unlock;
 
 
 found:
 found:

+ 13 - 10
crypto/crypto_engine.c

@@ -70,7 +70,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 
 
 		if (engine->unprepare_crypt_hardware &&
 		if (engine->unprepare_crypt_hardware &&
 		    engine->unprepare_crypt_hardware(engine))
 		    engine->unprepare_crypt_hardware(engine))
-			pr_err("failed to unprepare crypt hardware\n");
+			dev_err(engine->dev, "failed to unprepare crypt hardware\n");
 
 
 		spin_lock_irqsave(&engine->queue_lock, flags);
 		spin_lock_irqsave(&engine->queue_lock, flags);
 		engine->idling = false;
 		engine->idling = false;
@@ -99,7 +99,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 	if (!was_busy && engine->prepare_crypt_hardware) {
 	if (!was_busy && engine->prepare_crypt_hardware) {
 		ret = engine->prepare_crypt_hardware(engine);
 		ret = engine->prepare_crypt_hardware(engine);
 		if (ret) {
 		if (ret) {
-			pr_err("failed to prepare crypt hardware\n");
+			dev_err(engine->dev, "failed to prepare crypt hardware\n");
 			goto req_err;
 			goto req_err;
 		}
 		}
 	}
 	}
@@ -110,14 +110,15 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 		if (engine->prepare_hash_request) {
 		if (engine->prepare_hash_request) {
 			ret = engine->prepare_hash_request(engine, hreq);
 			ret = engine->prepare_hash_request(engine, hreq);
 			if (ret) {
 			if (ret) {
-				pr_err("failed to prepare request: %d\n", ret);
+				dev_err(engine->dev, "failed to prepare request: %d\n",
+					ret);
 				goto req_err;
 				goto req_err;
 			}
 			}
 			engine->cur_req_prepared = true;
 			engine->cur_req_prepared = true;
 		}
 		}
 		ret = engine->hash_one_request(engine, hreq);
 		ret = engine->hash_one_request(engine, hreq);
 		if (ret) {
 		if (ret) {
-			pr_err("failed to hash one request from queue\n");
+			dev_err(engine->dev, "failed to hash one request from queue\n");
 			goto req_err;
 			goto req_err;
 		}
 		}
 		return;
 		return;
@@ -126,19 +127,20 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 		if (engine->prepare_cipher_request) {
 		if (engine->prepare_cipher_request) {
 			ret = engine->prepare_cipher_request(engine, breq);
 			ret = engine->prepare_cipher_request(engine, breq);
 			if (ret) {
 			if (ret) {
-				pr_err("failed to prepare request: %d\n", ret);
+				dev_err(engine->dev, "failed to prepare request: %d\n",
+					ret);
 				goto req_err;
 				goto req_err;
 			}
 			}
 			engine->cur_req_prepared = true;
 			engine->cur_req_prepared = true;
 		}
 		}
 		ret = engine->cipher_one_request(engine, breq);
 		ret = engine->cipher_one_request(engine, breq);
 		if (ret) {
 		if (ret) {
-			pr_err("failed to cipher one request from queue\n");
+			dev_err(engine->dev, "failed to cipher one request from queue\n");
 			goto req_err;
 			goto req_err;
 		}
 		}
 		return;
 		return;
 	default:
 	default:
-		pr_err("failed to prepare request of unknown type\n");
+		dev_err(engine->dev, "failed to prepare request of unknown type\n");
 		return;
 		return;
 	}
 	}
 
 
@@ -275,7 +277,7 @@ void crypto_finalize_cipher_request(struct crypto_engine *engine,
 		    engine->unprepare_cipher_request) {
 		    engine->unprepare_cipher_request) {
 			ret = engine->unprepare_cipher_request(engine, req);
 			ret = engine->unprepare_cipher_request(engine, req);
 			if (ret)
 			if (ret)
-				pr_err("failed to unprepare request\n");
+				dev_err(engine->dev, "failed to unprepare request\n");
 		}
 		}
 		spin_lock_irqsave(&engine->queue_lock, flags);
 		spin_lock_irqsave(&engine->queue_lock, flags);
 		engine->cur_req = NULL;
 		engine->cur_req = NULL;
@@ -312,7 +314,7 @@ void crypto_finalize_hash_request(struct crypto_engine *engine,
 		    engine->unprepare_hash_request) {
 		    engine->unprepare_hash_request) {
 			ret = engine->unprepare_hash_request(engine, req);
 			ret = engine->unprepare_hash_request(engine, req);
 			if (ret)
 			if (ret)
-				pr_err("failed to unprepare request\n");
+				dev_err(engine->dev, "failed to unprepare request\n");
 		}
 		}
 		spin_lock_irqsave(&engine->queue_lock, flags);
 		spin_lock_irqsave(&engine->queue_lock, flags);
 		engine->cur_req = NULL;
 		engine->cur_req = NULL;
@@ -384,7 +386,7 @@ int crypto_engine_stop(struct crypto_engine *engine)
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 
 
 	if (ret)
 	if (ret)
-		pr_warn("could not stop engine\n");
+		dev_warn(engine->dev, "could not stop engine\n");
 
 
 	return ret;
 	return ret;
 }
 }
@@ -411,6 +413,7 @@ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt)
 	if (!engine)
 	if (!engine)
 		return NULL;
 		return NULL;
 
 
+	engine->dev = dev;
 	engine->rt = rt;
 	engine->rt = rt;
 	engine->running = false;
 	engine->running = false;
 	engine->busy = false;
 	engine->busy = false;

+ 6 - 3
crypto/dh.c

@@ -4,9 +4,9 @@
  * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
  * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
  *
  *
  * This program is free software; you can redistribute it and/or
  * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
+ * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
  * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
+ * 2 of the License, or (at your option) any later version.
  */
  */
 
 
 #include <linux/module.h>
 #include <linux/module.h>
@@ -85,6 +85,9 @@ static int dh_set_secret(struct crypto_kpp *tfm, const void *buf,
 	struct dh_ctx *ctx = dh_get_ctx(tfm);
 	struct dh_ctx *ctx = dh_get_ctx(tfm);
 	struct dh params;
 	struct dh params;
 
 
+	/* Free the old MPI key if any */
+	dh_free_ctx(ctx);
+
 	if (crypto_dh_decode_key(buf, len, &params) < 0)
 	if (crypto_dh_decode_key(buf, len, &params) < 0)
 		return -EINVAL;
 		return -EINVAL;
 
 
@@ -144,7 +147,7 @@ err_free_val:
 	return ret;
 	return ret;
 }
 }
 
 
-static int dh_max_size(struct crypto_kpp *tfm)
+static unsigned int dh_max_size(struct crypto_kpp *tfm)
 {
 {
 	struct dh_ctx *ctx = dh_get_ctx(tfm);
 	struct dh_ctx *ctx = dh_get_ctx(tfm);
 
 

+ 2 - 2
crypto/dh_helper.c

@@ -3,9 +3,9 @@
  * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
  * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
  *
  *
  * This program is free software; you can redistribute it and/or
  * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
+ * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
  * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
+ * 2 of the License, or (at your option) any later version.
  */
  */
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/export.h>

+ 1 - 0
crypto/drbg.c

@@ -1691,6 +1691,7 @@ static int drbg_init_sym_kernel(struct drbg_state *drbg)
 		return PTR_ERR(sk_tfm);
 		return PTR_ERR(sk_tfm);
 	}
 	}
 	drbg->ctr_handle = sk_tfm;
 	drbg->ctr_handle = sk_tfm;
+	init_completion(&drbg->ctr_completion);
 
 
 	req = skcipher_request_alloc(sk_tfm, GFP_KERNEL);
 	req = skcipher_request_alloc(sk_tfm, GFP_KERNEL);
 	if (!req) {
 	if (!req) {

+ 70 - 18
crypto/ecc.c

@@ -29,6 +29,7 @@
 #include <linux/swab.h>
 #include <linux/swab.h>
 #include <linux/fips.h>
 #include <linux/fips.h>
 #include <crypto/ecdh.h>
 #include <crypto/ecdh.h>
+#include <crypto/rng.h>
 
 
 #include "ecc.h"
 #include "ecc.h"
 #include "ecc_curve_defs.h"
 #include "ecc_curve_defs.h"
@@ -904,7 +905,7 @@ static inline void ecc_swap_digits(const u64 *in, u64 *out,
 }
 }
 
 
 int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
 int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
-		     const u8 *private_key, unsigned int private_key_len)
+		     const u64 *private_key, unsigned int private_key_len)
 {
 {
 	int nbytes;
 	int nbytes;
 	const struct ecc_curve *curve = ecc_get_curve(curve_id);
 	const struct ecc_curve *curve = ecc_get_curve(curve_id);
@@ -917,24 +918,77 @@ int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
 	if (private_key_len != nbytes)
 	if (private_key_len != nbytes)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	if (vli_is_zero((const u64 *)&private_key[0], ndigits))
+	if (vli_is_zero(private_key, ndigits))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	/* Make sure the private key is in the range [1, n-1]. */
 	/* Make sure the private key is in the range [1, n-1]. */
-	if (vli_cmp(curve->n, (const u64 *)&private_key[0], ndigits) != 1)
+	if (vli_cmp(curve->n, private_key, ndigits) != 1)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	return 0;
 	return 0;
 }
 }
 
 
-int ecdh_make_pub_key(unsigned int curve_id, unsigned int ndigits,
-		      const u8 *private_key, unsigned int private_key_len,
-		      u8 *public_key, unsigned int public_key_len)
+/*
+ * ECC private keys are generated using the method of extra random bits,
+ * equivalent to that described in FIPS 186-4, Appendix B.4.1.
+ *
+ * d = (c mod(n–1)) + 1    where c is a string of random bits, 64 bits longer
+ *                         than requested
+ * 0 <= c mod(n-1) <= n-2  and implies that
+ * 1 <= d <= n-1
+ *
+ * This method generates a private key uniformly distributed in the range
+ * [1, n-1].
+ */
+int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey)
+{
+	const struct ecc_curve *curve = ecc_get_curve(curve_id);
+	u64 priv[ndigits];
+	unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	unsigned int nbits = vli_num_bits(curve->n, ndigits);
+	int err;
+
+	/* Check that N is included in Table 1 of FIPS 186-4, section 6.1.1 */
+	if (nbits < 160)
+		return -EINVAL;
+
+	/*
+	 * FIPS 186-4 recommends that the private key should be obtained from a
+	 * RBG with a security strength equal to or greater than the security
+	 * strength associated with N.
+	 *
+	 * The maximum security strength identified by NIST SP800-57pt1r4 for
+	 * ECC is 256 (N >= 512).
+	 *
+	 * This condition is met by the default RNG because it selects a favored
+	 * DRBG with a security strength of 256.
+	 */
+	if (crypto_get_default_rng())
+		err = -EFAULT;
+
+	err = crypto_rng_get_bytes(crypto_default_rng, (u8 *)priv, nbytes);
+	crypto_put_default_rng();
+	if (err)
+		return err;
+
+	if (vli_is_zero(priv, ndigits))
+		return -EINVAL;
+
+	/* Make sure the private key is in the range [1, n-1]. */
+	if (vli_cmp(curve->n, priv, ndigits) != 1)
+		return -EINVAL;
+
+	ecc_swap_digits(priv, privkey, ndigits);
+
+	return 0;
+}
+
+int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits,
+		     const u64 *private_key, u64 *public_key)
 {
 {
 	int ret = 0;
 	int ret = 0;
 	struct ecc_point *pk;
 	struct ecc_point *pk;
 	u64 priv[ndigits];
 	u64 priv[ndigits];
-	unsigned int nbytes;
 	const struct ecc_curve *curve = ecc_get_curve(curve_id);
 	const struct ecc_curve *curve = ecc_get_curve(curve_id);
 
 
 	if (!private_key || !curve) {
 	if (!private_key || !curve) {
@@ -942,7 +996,7 @@ int ecdh_make_pub_key(unsigned int curve_id, unsigned int ndigits,
 		goto out;
 		goto out;
 	}
 	}
 
 
-	ecc_swap_digits((const u64 *)private_key, priv, ndigits);
+	ecc_swap_digits(private_key, priv, ndigits);
 
 
 	pk = ecc_alloc_point(ndigits);
 	pk = ecc_alloc_point(ndigits);
 	if (!pk) {
 	if (!pk) {
@@ -956,9 +1010,8 @@ int ecdh_make_pub_key(unsigned int curve_id, unsigned int ndigits,
 		goto err_free_point;
 		goto err_free_point;
 	}
 	}
 
 
-	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
-	ecc_swap_digits(pk->x, (u64 *)public_key, ndigits);
-	ecc_swap_digits(pk->y, (u64 *)&public_key[nbytes], ndigits);
+	ecc_swap_digits(pk->x, public_key, ndigits);
+	ecc_swap_digits(pk->y, &public_key[ndigits], ndigits);
 
 
 err_free_point:
 err_free_point:
 	ecc_free_point(pk);
 	ecc_free_point(pk);
@@ -967,9 +1020,8 @@ out:
 }
 }
 
 
 int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
-		       const u8 *private_key, unsigned int private_key_len,
-		       const u8 *public_key, unsigned int public_key_len,
-		       u8 *secret, unsigned int secret_len)
+			      const u64 *private_key, const u64 *public_key,
+			      u64 *secret)
 {
 {
 	int ret = 0;
 	int ret = 0;
 	struct ecc_point *product, *pk;
 	struct ecc_point *product, *pk;
@@ -999,13 +1051,13 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 		goto err_alloc_product;
 		goto err_alloc_product;
 	}
 	}
 
 
-	ecc_swap_digits((const u64 *)public_key, pk->x, ndigits);
-	ecc_swap_digits((const u64 *)&public_key[nbytes], pk->y, ndigits);
-	ecc_swap_digits((const u64 *)private_key, priv, ndigits);
+	ecc_swap_digits(public_key, pk->x, ndigits);
+	ecc_swap_digits(&public_key[ndigits], pk->y, ndigits);
+	ecc_swap_digits(private_key, priv, ndigits);
 
 
 	ecc_point_mult(product, pk, priv, rand_z, curve->p, ndigits);
 	ecc_point_mult(product, pk, priv, rand_z, curve->p, ndigits);
 
 
-	ecc_swap_digits(product->x, (u64 *)secret, ndigits);
+	ecc_swap_digits(product->x, secret, ndigits);
 
 
 	if (ecc_point_is_zero(product))
 	if (ecc_point_is_zero(product))
 		ret = -EFAULT;
 		ret = -EFAULT;

+ 25 - 16
crypto/ecc.h

@@ -34,41 +34,51 @@
  * ecc_is_key_valid() - Validate a given ECDH private key
  * ecc_is_key_valid() - Validate a given ECDH private key
  *
  *
  * @curve_id:		id representing the curve to use
  * @curve_id:		id representing the curve to use
- * @ndigits:		curve number of digits
+ * @ndigits:		curve's number of digits
  * @private_key:	private key to be used for the given curve
  * @private_key:	private key to be used for the given curve
- * @private_key_len:	private key len
+ * @private_key_len:	private key length
  *
  *
  * Returns 0 if the key is acceptable, a negative value otherwise
  * Returns 0 if the key is acceptable, a negative value otherwise
  */
  */
 int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
 int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
-		     const u8 *private_key, unsigned int private_key_len);
+		     const u64 *private_key, unsigned int private_key_len);
+
+/**
+ * ecc_gen_privkey() -  Generates an ECC private key.
+ * The private key is a random integer in the range 0 < random < n, where n is a
+ * prime that is the order of the cyclic subgroup generated by the distinguished
+ * point G.
+ * @curve_id:		id representing the curve to use
+ * @ndigits:		curve number of digits
+ * @private_key:	buffer for storing the generated private key
+ *
+ * Returns 0 if the private key was generated successfully, a negative value
+ * if an error occurred.
+ */
+int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey);
 
 
 /**
 /**
- * ecdh_make_pub_key() - Compute an ECC public key
+ * ecc_make_pub_key() - Compute an ECC public key
  *
  *
  * @curve_id:		id representing the curve to use
  * @curve_id:		id representing the curve to use
+ * @ndigits:		curve's number of digits
  * @private_key:	pregenerated private key for the given curve
  * @private_key:	pregenerated private key for the given curve
- * @private_key_len:	length of private_key
- * @public_key:		buffer for storing the public key generated
- * @public_key_len:	length of the public_key buffer
+ * @public_key:		buffer for storing the generated public key
  *
  *
  * Returns 0 if the public key was generated successfully, a negative value
  * Returns 0 if the public key was generated successfully, a negative value
  * if an error occurred.
  * if an error occurred.
  */
  */
-int ecdh_make_pub_key(const unsigned int curve_id, unsigned int ndigits,
-		      const u8 *private_key, unsigned int private_key_len,
-		      u8 *public_key, unsigned int public_key_len);
+int ecc_make_pub_key(const unsigned int curve_id, unsigned int ndigits,
+		     const u64 *private_key, u64 *public_key);
 
 
 /**
 /**
  * crypto_ecdh_shared_secret() - Compute a shared secret
  * crypto_ecdh_shared_secret() - Compute a shared secret
  *
  *
  * @curve_id:		id representing the curve to use
  * @curve_id:		id representing the curve to use
+ * @ndigits:		curve's number of digits
  * @private_key:	private key of part A
  * @private_key:	private key of part A
- * @private_key_len:	length of private_key
  * @public_key:		public key of counterpart B
  * @public_key:		public key of counterpart B
- * @public_key_len:	length of public_key
  * @secret:		buffer for storing the calculated shared secret
  * @secret:		buffer for storing the calculated shared secret
- * @secret_len:		length of the secret buffer
  *
  *
  * Note: It is recommended that you hash the result of crypto_ecdh_shared_secret
  * Note: It is recommended that you hash the result of crypto_ecdh_shared_secret
  * before using it for symmetric encryption or HMAC.
  * before using it for symmetric encryption or HMAC.
@@ -77,7 +87,6 @@ int ecdh_make_pub_key(const unsigned int curve_id, unsigned int ndigits,
  * if an error occurred.
  * if an error occurred.
  */
  */
 int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
 int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
-		       const u8 *private_key, unsigned int private_key_len,
-		       const u8 *public_key, unsigned int public_key_len,
-		       u8 *secret, unsigned int secret_len);
+			      const u64 *private_key, const u64 *public_key,
+			      u64 *secret);
 #endif
 #endif

+ 15 - 14
crypto/ecdh.c

@@ -4,9 +4,9 @@
  * Authors: Salvator Benedetto <salvatore.benedetto@intel.com>
  * Authors: Salvator Benedetto <salvatore.benedetto@intel.com>
  *
  *
  * This program is free software; you can redistribute it and/or
  * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
+ * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
  * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
+ * 2 of the License, or (at your option) any later version.
  */
  */
 
 
 #include <linux/module.h>
 #include <linux/module.h>
@@ -55,8 +55,12 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
 	ctx->curve_id = params.curve_id;
 	ctx->curve_id = params.curve_id;
 	ctx->ndigits = ndigits;
 	ctx->ndigits = ndigits;
 
 
+	if (!params.key || !params.key_size)
+		return ecc_gen_privkey(ctx->curve_id, ctx->ndigits,
+				       ctx->private_key);
+
 	if (ecc_is_key_valid(ctx->curve_id, ctx->ndigits,
 	if (ecc_is_key_valid(ctx->curve_id, ctx->ndigits,
-			     (const u8 *)params.key, params.key_size) < 0)
+			     (const u64 *)params.key, params.key_size) < 0)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	memcpy(ctx->private_key, params.key, params.key_size);
 	memcpy(ctx->private_key, params.key, params.key_size);
@@ -81,16 +85,14 @@ static int ecdh_compute_value(struct kpp_request *req)
 			return -EINVAL;
 			return -EINVAL;
 
 
 		ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits,
 		ret = crypto_ecdh_shared_secret(ctx->curve_id, ctx->ndigits,
-					 (const u8 *)ctx->private_key, nbytes,
-					 (const u8 *)ctx->public_key, 2 * nbytes,
-					 (u8 *)ctx->shared_secret, nbytes);
+						ctx->private_key,
+						ctx->public_key,
+						ctx->shared_secret);
 
 
 		buf = ctx->shared_secret;
 		buf = ctx->shared_secret;
 	} else {
 	} else {
-		ret = ecdh_make_pub_key(ctx->curve_id, ctx->ndigits,
-					(const u8 *)ctx->private_key, nbytes,
-					(u8 *)ctx->public_key,
-					sizeof(ctx->public_key));
+		ret = ecc_make_pub_key(ctx->curve_id, ctx->ndigits,
+				       ctx->private_key, ctx->public_key);
 		buf = ctx->public_key;
 		buf = ctx->public_key;
 		/* Public part is a point thus it has both coordinates */
 		/* Public part is a point thus it has both coordinates */
 		nbytes *= 2;
 		nbytes *= 2;
@@ -106,13 +108,12 @@ static int ecdh_compute_value(struct kpp_request *req)
 	return ret;
 	return ret;
 }
 }
 
 
-static int ecdh_max_size(struct crypto_kpp *tfm)
+static unsigned int ecdh_max_size(struct crypto_kpp *tfm)
 {
 {
 	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
 	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
-	int nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
 
 
-	/* Public key is made of two coordinates */
-	return 2 * nbytes;
+	/* Public key is made of two coordinates, add one to the left shift */
+	return ctx->ndigits << (ECC_DIGITS_TO_BYTES_SHIFT + 1);
 }
 }
 
 
 static void no_exit_tfm(struct crypto_kpp *tfm)
 static void no_exit_tfm(struct crypto_kpp *tfm)

+ 2 - 2
crypto/ecdh_helper.c

@@ -3,9 +3,9 @@
  * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
  * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
  *
  *
  * This program is free software; you can redistribute it and/or
  * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
+ * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
  * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
+ * 2 of the License, or (at your option) any later version.
  */
  */
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/export.h>

+ 3 - 2
crypto/hmac.c

@@ -16,6 +16,7 @@
  *
  *
  */
  */
 
 
+#include <crypto/hmac.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/scatterwalk.h>
 #include <linux/err.h>
 #include <linux/err.h>
@@ -74,8 +75,8 @@ static int hmac_setkey(struct crypto_shash *parent,
 	memcpy(opad, ipad, bs);
 	memcpy(opad, ipad, bs);
 
 
 	for (i = 0; i < bs; i++) {
 	for (i = 0; i < bs; i++) {
-		ipad[i] ^= 0x36;
-		opad[i] ^= 0x5c;
+		ipad[i] ^= HMAC_IPAD_VALUE;
+		opad[i] ^= HMAC_OPAD_VALUE;
 	}
 	}
 
 
 	return crypto_shash_init(shash) ?:
 	return crypto_shash_init(shash) ?:

+ 0 - 5
crypto/rng.c

@@ -33,11 +33,6 @@ struct crypto_rng *crypto_default_rng;
 EXPORT_SYMBOL_GPL(crypto_default_rng);
 EXPORT_SYMBOL_GPL(crypto_default_rng);
 static int crypto_default_rng_refcnt;
 static int crypto_default_rng_refcnt;
 
 
-static inline struct crypto_rng *__crypto_rng_cast(struct crypto_tfm *tfm)
-{
-	return container_of(tfm, struct crypto_rng, base);
-}
-
 int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
 int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
 {
 {
 	u8 *buf = NULL;
 	u8 *buf = NULL;

+ 3 - 9
crypto/rsa-pkcs1pad.c

@@ -120,9 +120,6 @@ static int pkcs1pad_set_pub_key(struct crypto_akcipher *tfm, const void *key,
 
 
 	/* Find out new modulus size from rsa implementation */
 	/* Find out new modulus size from rsa implementation */
 	err = crypto_akcipher_maxsize(ctx->child);
 	err = crypto_akcipher_maxsize(ctx->child);
-	if (err < 0)
-		return err;
-
 	if (err > PAGE_SIZE)
 	if (err > PAGE_SIZE)
 		return -ENOTSUPP;
 		return -ENOTSUPP;
 
 
@@ -144,9 +141,6 @@ static int pkcs1pad_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 
 
 	/* Find out new modulus size from rsa implementation */
 	/* Find out new modulus size from rsa implementation */
 	err = crypto_akcipher_maxsize(ctx->child);
 	err = crypto_akcipher_maxsize(ctx->child);
-	if (err < 0)
-		return err;
-
 	if (err > PAGE_SIZE)
 	if (err > PAGE_SIZE)
 		return -ENOTSUPP;
 		return -ENOTSUPP;
 
 
@@ -154,7 +148,7 @@ static int pkcs1pad_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 	return 0;
 	return 0;
 }
 }
 
 
-static int pkcs1pad_get_max_size(struct crypto_akcipher *tfm)
+static unsigned int pkcs1pad_get_max_size(struct crypto_akcipher *tfm)
 {
 {
 	struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm);
 
 
@@ -164,7 +158,7 @@ static int pkcs1pad_get_max_size(struct crypto_akcipher *tfm)
 	 * decrypt/verify.
 	 * decrypt/verify.
 	 */
 	 */
 
 
-	return ctx->key_size ?: -EINVAL;
+	return ctx->key_size;
 }
 }
 
 
 static void pkcs1pad_sg_set_buf(struct scatterlist *sg, void *buf, size_t len,
 static void pkcs1pad_sg_set_buf(struct scatterlist *sg, void *buf, size_t len,
@@ -496,7 +490,7 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
 		goto done;
 		goto done;
 	pos++;
 	pos++;
 
 
-	if (memcmp(out_buf + pos, digest_info->data, digest_info->size))
+	if (crypto_memneq(out_buf + pos, digest_info->data, digest_info->size))
 		goto done;
 		goto done;
 
 
 	pos += digest_info->size;
 	pos += digest_info->size;

+ 2 - 2
crypto/rsa.c

@@ -337,11 +337,11 @@ err:
 	return -ENOMEM;
 	return -ENOMEM;
 }
 }
 
 
-static int rsa_max_size(struct crypto_akcipher *tfm)
+static unsigned int rsa_max_size(struct crypto_akcipher *tfm)
 {
 {
 	struct rsa_mpi_key *pkey = akcipher_tfm_ctx(tfm);
 	struct rsa_mpi_key *pkey = akcipher_tfm_ctx(tfm);
 
 
-	return pkey->n ? mpi_get_size(pkey->n) : -EINVAL;
+	return mpi_get_size(pkey->n);
 }
 }
 
 
 static void rsa_exit_tfm(struct crypto_akcipher *tfm)
 static void rsa_exit_tfm(struct crypto_akcipher *tfm)

+ 0 - 4
crypto/tcrypt.c

@@ -138,8 +138,6 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen)
 	int ret = 0;
 	int ret = 0;
 	int i;
 	int i;
 
 
-	local_irq_disable();
-
 	/* Warm-up run. */
 	/* Warm-up run. */
 	for (i = 0; i < 4; i++) {
 	for (i = 0; i < 4; i++) {
 		if (enc)
 		if (enc)
@@ -169,8 +167,6 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen)
 	}
 	}
 
 
 out:
 out:
-	local_irq_enable();
-
 	if (ret == 0)
 	if (ret == 0)
 		printk("1 operation in %lu cycles (%d bytes)\n",
 		printk("1 operation in %lu cycles (%d bytes)\n",
 		       (cycles + 4) / 8, blen);
 		       (cycles + 4) / 8, blen);

+ 104 - 24
crypto/testmgr.c

@@ -218,14 +218,14 @@ static int ahash_partial_update(struct ahash_request **preq,
 			crypto_ahash_reqtfm(req));
 			crypto_ahash_reqtfm(req));
 	state = kmalloc(statesize + sizeof(guard), GFP_KERNEL);
 	state = kmalloc(statesize + sizeof(guard), GFP_KERNEL);
 	if (!state) {
 	if (!state) {
-		pr_err("alt: hash: Failed to alloc state for %s\n", algo);
+		pr_err("alg: hash: Failed to alloc state for %s\n", algo);
 		goto out_nostate;
 		goto out_nostate;
 	}
 	}
 	memcpy(state + statesize, guard, sizeof(guard));
 	memcpy(state + statesize, guard, sizeof(guard));
 	ret = crypto_ahash_export(req, state);
 	ret = crypto_ahash_export(req, state);
 	WARN_ON(memcmp(state + statesize, guard, sizeof(guard)));
 	WARN_ON(memcmp(state + statesize, guard, sizeof(guard)));
 	if (ret) {
 	if (ret) {
-		pr_err("alt: hash: Failed to export() for %s\n", algo);
+		pr_err("alg: hash: Failed to export() for %s\n", algo);
 		goto out;
 		goto out;
 	}
 	}
 	ahash_request_free(req);
 	ahash_request_free(req);
@@ -344,19 +344,19 @@ static int __test_hash(struct crypto_ahash *tfm,
 		} else {
 		} else {
 			ret = wait_async_op(&tresult, crypto_ahash_init(req));
 			ret = wait_async_op(&tresult, crypto_ahash_init(req));
 			if (ret) {
 			if (ret) {
-				pr_err("alt: hash: init failed on test %d "
+				pr_err("alg: hash: init failed on test %d "
 				       "for %s: ret=%d\n", j, algo, -ret);
 				       "for %s: ret=%d\n", j, algo, -ret);
 				goto out;
 				goto out;
 			}
 			}
 			ret = wait_async_op(&tresult, crypto_ahash_update(req));
 			ret = wait_async_op(&tresult, crypto_ahash_update(req));
 			if (ret) {
 			if (ret) {
-				pr_err("alt: hash: update failed on test %d "
+				pr_err("alg: hash: update failed on test %d "
 				       "for %s: ret=%d\n", j, algo, -ret);
 				       "for %s: ret=%d\n", j, algo, -ret);
 				goto out;
 				goto out;
 			}
 			}
 			ret = wait_async_op(&tresult, crypto_ahash_final(req));
 			ret = wait_async_op(&tresult, crypto_ahash_final(req));
 			if (ret) {
 			if (ret) {
-				pr_err("alt: hash: final failed on test %d "
+				pr_err("alg: hash: final failed on test %d "
 				       "for %s: ret=%d\n", j, algo, -ret);
 				       "for %s: ret=%d\n", j, algo, -ret);
 				goto out;
 				goto out;
 			}
 			}
@@ -488,13 +488,13 @@ static int __test_hash(struct crypto_ahash *tfm,
 		ahash_request_set_crypt(req, sg, result, template[i].tap[0]);
 		ahash_request_set_crypt(req, sg, result, template[i].tap[0]);
 		ret = wait_async_op(&tresult, crypto_ahash_init(req));
 		ret = wait_async_op(&tresult, crypto_ahash_init(req));
 		if (ret) {
 		if (ret) {
-			pr_err("alt: hash: init failed on test %d for %s: ret=%d\n",
+			pr_err("alg: hash: init failed on test %d for %s: ret=%d\n",
 				j, algo, -ret);
 				j, algo, -ret);
 			goto out;
 			goto out;
 		}
 		}
 		ret = wait_async_op(&tresult, crypto_ahash_update(req));
 		ret = wait_async_op(&tresult, crypto_ahash_update(req));
 		if (ret) {
 		if (ret) {
-			pr_err("alt: hash: update failed on test %d for %s: ret=%d\n",
+			pr_err("alg: hash: update failed on test %d for %s: ret=%d\n",
 				j, algo, -ret);
 				j, algo, -ret);
 			goto out;
 			goto out;
 		}
 		}
@@ -505,7 +505,7 @@ static int __test_hash(struct crypto_ahash *tfm,
 				hash_buff, k, temp, &sg[0], algo, result,
 				hash_buff, k, temp, &sg[0], algo, result,
 				&tresult);
 				&tresult);
 			if (ret) {
 			if (ret) {
-				pr_err("hash: partial update failed on test %d for %s: ret=%d\n",
+				pr_err("alg: hash: partial update failed on test %d for %s: ret=%d\n",
 					j, algo, -ret);
 					j, algo, -ret);
 				goto out_noreq;
 				goto out_noreq;
 			}
 			}
@@ -513,7 +513,7 @@ static int __test_hash(struct crypto_ahash *tfm,
 		}
 		}
 		ret = wait_async_op(&tresult, crypto_ahash_final(req));
 		ret = wait_async_op(&tresult, crypto_ahash_final(req));
 		if (ret) {
 		if (ret) {
-			pr_err("alt: hash: final failed on test %d for %s: ret=%d\n",
+			pr_err("alg: hash: final failed on test %d for %s: ret=%d\n",
 				j, algo, -ret);
 				j, algo, -ret);
 			goto out;
 			goto out;
 		}
 		}
@@ -1997,6 +1997,9 @@ static int do_test_kpp(struct crypto_kpp *tfm, const struct kpp_testvec *vec,
 	struct kpp_request *req;
 	struct kpp_request *req;
 	void *input_buf = NULL;
 	void *input_buf = NULL;
 	void *output_buf = NULL;
 	void *output_buf = NULL;
+	void *a_public = NULL;
+	void *a_ss = NULL;
+	void *shared_secret = NULL;
 	struct tcrypt_result result;
 	struct tcrypt_result result;
 	unsigned int out_len_max;
 	unsigned int out_len_max;
 	int err = -ENOMEM;
 	int err = -ENOMEM;
@@ -2026,20 +2029,31 @@ static int do_test_kpp(struct crypto_kpp *tfm, const struct kpp_testvec *vec,
 	kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 	kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				 tcrypt_complete, &result);
 				 tcrypt_complete, &result);
 
 
-	/* Compute public key */
+	/* Compute party A's public key */
 	err = wait_async_op(&result, crypto_kpp_generate_public_key(req));
 	err = wait_async_op(&result, crypto_kpp_generate_public_key(req));
 	if (err) {
 	if (err) {
-		pr_err("alg: %s: generate public key test failed. err %d\n",
+		pr_err("alg: %s: Party A: generate public key test failed. err %d\n",
 		       alg, err);
 		       alg, err);
 		goto free_output;
 		goto free_output;
 	}
 	}
-	/* Verify calculated public key */
-	if (memcmp(vec->expected_a_public, sg_virt(req->dst),
-		   vec->expected_a_public_size)) {
-		pr_err("alg: %s: generate public key test failed. Invalid output\n",
-		       alg);
-		err = -EINVAL;
-		goto free_output;
+
+	if (vec->genkey) {
+		/* Save party A's public key */
+		a_public = kzalloc(out_len_max, GFP_KERNEL);
+		if (!a_public) {
+			err = -ENOMEM;
+			goto free_output;
+		}
+		memcpy(a_public, sg_virt(req->dst), out_len_max);
+	} else {
+		/* Verify calculated public key */
+		if (memcmp(vec->expected_a_public, sg_virt(req->dst),
+			   vec->expected_a_public_size)) {
+			pr_err("alg: %s: Party A: generate public key test failed. Invalid output\n",
+			       alg);
+			err = -EINVAL;
+			goto free_output;
+		}
 	}
 	}
 
 
 	/* Calculate shared secret key by using counter part (b) public key. */
 	/* Calculate shared secret key by using counter part (b) public key. */
@@ -2058,15 +2072,53 @@ static int do_test_kpp(struct crypto_kpp *tfm, const struct kpp_testvec *vec,
 				 tcrypt_complete, &result);
 				 tcrypt_complete, &result);
 	err = wait_async_op(&result, crypto_kpp_compute_shared_secret(req));
 	err = wait_async_op(&result, crypto_kpp_compute_shared_secret(req));
 	if (err) {
 	if (err) {
-		pr_err("alg: %s: compute shard secret test failed. err %d\n",
+		pr_err("alg: %s: Party A: compute shared secret test failed. err %d\n",
 		       alg, err);
 		       alg, err);
 		goto free_all;
 		goto free_all;
 	}
 	}
+
+	if (vec->genkey) {
+		/* Save the shared secret obtained by party A */
+		a_ss = kzalloc(vec->expected_ss_size, GFP_KERNEL);
+		if (!a_ss) {
+			err = -ENOMEM;
+			goto free_all;
+		}
+		memcpy(a_ss, sg_virt(req->dst), vec->expected_ss_size);
+
+		/*
+		 * Calculate party B's shared secret by using party A's
+		 * public key.
+		 */
+		err = crypto_kpp_set_secret(tfm, vec->b_secret,
+					    vec->b_secret_size);
+		if (err < 0)
+			goto free_all;
+
+		sg_init_one(&src, a_public, vec->expected_a_public_size);
+		sg_init_one(&dst, output_buf, out_len_max);
+		kpp_request_set_input(req, &src, vec->expected_a_public_size);
+		kpp_request_set_output(req, &dst, out_len_max);
+		kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+					 tcrypt_complete, &result);
+		err = wait_async_op(&result,
+				    crypto_kpp_compute_shared_secret(req));
+		if (err) {
+			pr_err("alg: %s: Party B: compute shared secret failed. err %d\n",
+			       alg, err);
+			goto free_all;
+		}
+
+		shared_secret = a_ss;
+	} else {
+		shared_secret = (void *)vec->expected_ss;
+	}
+
 	/*
 	/*
 	 * verify shared secret from which the user will derive
 	 * verify shared secret from which the user will derive
 	 * secret key by executing whatever hash it has chosen
 	 * secret key by executing whatever hash it has chosen
 	 */
 	 */
-	if (memcmp(vec->expected_ss, sg_virt(req->dst),
+	if (memcmp(shared_secret, sg_virt(req->dst),
 		   vec->expected_ss_size)) {
 		   vec->expected_ss_size)) {
 		pr_err("alg: %s: compute shared secret test failed. Invalid output\n",
 		pr_err("alg: %s: compute shared secret test failed. Invalid output\n",
 		       alg);
 		       alg);
@@ -2074,8 +2126,10 @@ static int do_test_kpp(struct crypto_kpp *tfm, const struct kpp_testvec *vec,
 	}
 	}
 
 
 free_all:
 free_all:
+	kfree(a_ss);
 	kfree(input_buf);
 	kfree(input_buf);
 free_output:
 free_output:
+	kfree(a_public);
 	kfree(output_buf);
 	kfree(output_buf);
 free_req:
 free_req:
 	kpp_request_free(req);
 	kpp_request_free(req);
@@ -2168,8 +2222,11 @@ static int test_akcipher_one(struct crypto_akcipher *tfm,
 	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
 				      tcrypt_complete, &result);
 				      tcrypt_complete, &result);
 
 
-	/* Run RSA encrypt - c = m^e mod n;*/
-	err = wait_async_op(&result, crypto_akcipher_encrypt(req));
+	err = wait_async_op(&result, vecs->siggen_sigver_test ?
+				     /* Run asymmetric signature generation */
+				     crypto_akcipher_sign(req) :
+				     /* Run asymmetric encrypt */
+				     crypto_akcipher_encrypt(req));
 	if (err) {
 	if (err) {
 		pr_err("alg: akcipher: encrypt test failed. err %d\n", err);
 		pr_err("alg: akcipher: encrypt test failed. err %d\n", err);
 		goto free_all;
 		goto free_all;
@@ -2207,8 +2264,11 @@ static int test_akcipher_one(struct crypto_akcipher *tfm,
 	init_completion(&result.completion);
 	init_completion(&result.completion);
 	akcipher_request_set_crypt(req, &src, &dst, vecs->c_size, out_len_max);
 	akcipher_request_set_crypt(req, &src, &dst, vecs->c_size, out_len_max);
 
 
-	/* Run RSA decrypt - m = c^d mod n;*/
-	err = wait_async_op(&result, crypto_akcipher_decrypt(req));
+	err = wait_async_op(&result, vecs->siggen_sigver_test ?
+				     /* Run asymmetric signature verification */
+				     crypto_akcipher_verify(req) :
+				     /* Run asymmetric decrypt */
+				     crypto_akcipher_decrypt(req));
 	if (err) {
 	if (err) {
 		pr_err("alg: akcipher: decrypt test failed. err %d\n", err);
 		pr_err("alg: akcipher: decrypt test failed. err %d\n", err);
 		goto free_all;
 		goto free_all;
@@ -2306,6 +2366,7 @@ static const struct alg_test_desc alg_test_descs[] = {
 	}, {
 	}, {
 		.alg = "authenc(hmac(sha1),cbc(aes))",
 		.alg = "authenc(hmac(sha1),cbc(aes))",
 		.test = alg_test_aead,
 		.test = alg_test_aead,
+		.fips_allowed = 1,
 		.suite = {
 		.suite = {
 			.aead = {
 			.aead = {
 				.enc = __VECS(hmac_sha1_aes_cbc_enc_tv_temp)
 				.enc = __VECS(hmac_sha1_aes_cbc_enc_tv_temp)
@@ -3254,6 +3315,25 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.dec = __VECS(fcrypt_pcbc_dec_tv_template)
 				.dec = __VECS(fcrypt_pcbc_dec_tv_template)
 			}
 			}
 		}
 		}
+	}, {
+		.alg = "pkcs1pad(rsa,sha224)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
+	}, {
+		.alg = "pkcs1pad(rsa,sha256)",
+		.test = alg_test_akcipher,
+		.fips_allowed = 1,
+		.suite = {
+			.akcipher = __VECS(pkcs1pad_rsa_tv_template)
+		}
+	}, {
+		.alg = "pkcs1pad(rsa,sha384)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
+	}, {
+		.alg = "pkcs1pad(rsa,sha512)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
 	}, {
 	}, {
 		.alg = "poly1305",
 		.alg = "poly1305",
 		.test = alg_test_hash,
 		.test = alg_test_hash,

+ 143 - 0
crypto/testmgr.h

@@ -133,17 +133,21 @@ struct akcipher_testvec {
 	unsigned int m_size;
 	unsigned int m_size;
 	unsigned int c_size;
 	unsigned int c_size;
 	bool public_key_vec;
 	bool public_key_vec;
+	bool siggen_sigver_test;
 };
 };
 
 
 struct kpp_testvec {
 struct kpp_testvec {
 	const unsigned char *secret;
 	const unsigned char *secret;
+	const unsigned char *b_secret;
 	const unsigned char *b_public;
 	const unsigned char *b_public;
 	const unsigned char *expected_a_public;
 	const unsigned char *expected_a_public;
 	const unsigned char *expected_ss;
 	const unsigned char *expected_ss;
 	unsigned short secret_size;
 	unsigned short secret_size;
+	unsigned short b_secret_size;
 	unsigned short b_public_size;
 	unsigned short b_public_size;
 	unsigned short expected_a_public_size;
 	unsigned short expected_a_public_size;
 	unsigned short expected_ss_size;
 	unsigned short expected_ss_size;
+	bool genkey;
 };
 };
 
 
 static const char zeroed_string[48];
 static const char zeroed_string[48];
@@ -538,6 +542,101 @@ static const struct akcipher_testvec rsa_tv_template[] = {
 	}
 	}
 };
 };
 
 
+/*
+ * PKCS#1 RSA test vectors. Obtained from CAVS testing.
+ */
+static const struct akcipher_testvec pkcs1pad_rsa_tv_template[] = {
+	{
+	.key =
+	"\x30\x82\x03\x1f\x02\x01\x10\x02\x82\x01\x01\x00\xd7\x1e\x77\x82"
+	"\x8c\x92\x31\xe7\x69\x02\xa2\xd5\x5c\x78\xde\xa2\x0c\x8f\xfe\x28"
+	"\x59\x31\xdf\x40\x9c\x60\x61\x06\xb9\x2f\x62\x40\x80\x76\xcb\x67"
+	"\x4a\xb5\x59\x56\x69\x17\x07\xfa\xf9\x4c\xbd\x6c\x37\x7a\x46\x7d"
+	"\x70\xa7\x67\x22\xb3\x4d\x7a\x94\xc3\xba\x4b\x7c\x4b\xa9\x32\x7c"
+	"\xb7\x38\x95\x45\x64\xa4\x05\xa8\x9f\x12\x7c\x4e\xc6\xc8\x2d\x40"
+	"\x06\x30\xf4\x60\xa6\x91\xbb\x9b\xca\x04\x79\x11\x13\x75\xf0\xae"
+	"\xd3\x51\x89\xc5\x74\xb9\xaa\x3f\xb6\x83\xe4\x78\x6b\xcd\xf9\x5c"
+	"\x4c\x85\xea\x52\x3b\x51\x93\xfc\x14\x6b\x33\x5d\x30\x70\xfa\x50"
+	"\x1b\x1b\x38\x81\x13\x8d\xf7\xa5\x0c\xc0\x8e\xf9\x63\x52\x18\x4e"
+	"\xa9\xf9\xf8\x5c\x5d\xcd\x7a\x0d\xd4\x8e\x7b\xee\x91\x7b\xad\x7d"
+	"\xb4\x92\xd5\xab\x16\x3b\x0a\x8a\xce\x8e\xde\x47\x1a\x17\x01\x86"
+	"\x7b\xab\x99\xf1\x4b\x0c\x3a\x0d\x82\x47\xc1\x91\x8c\xbb\x2e\x22"
+	"\x9e\x49\x63\x6e\x02\xc1\xc9\x3a\x9b\xa5\x22\x1b\x07\x95\xd6\x10"
+	"\x02\x50\xfd\xfd\xd1\x9b\xbe\xab\xc2\xc0\x74\xd7\xec\x00\xfb\x11"
+	"\x71\xcb\x7a\xdc\x81\x79\x9f\x86\x68\x46\x63\x82\x4d\xb7\xf1\xe6"
+	"\x16\x6f\x42\x63\xf4\x94\xa0\xca\x33\xcc\x75\x13\x02\x82\x01\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x01"
+	"\x02\x82\x01\x00\x62\xb5\x60\x31\x4f\x3f\x66\x16\xc1\x60\xac\x47"
+	"\x2a\xff\x6b\x69\x00\x4a\xb2\x5c\xe1\x50\xb9\x18\x74\xa8\xe4\xdc"
+	"\xa8\xec\xcd\x30\xbb\xc1\xc6\xe3\xc6\xac\x20\x2a\x3e\x5e\x8b\x12"
+	"\xe6\x82\x08\x09\x38\x0b\xab\x7c\xb3\xcc\x9c\xce\x97\x67\xdd\xef"
+	"\x95\x40\x4e\x92\xe2\x44\xe9\x1d\xc1\x14\xfd\xa9\xb1\xdc\x71\x9c"
+	"\x46\x21\xbd\x58\x88\x6e\x22\x15\x56\xc1\xef\xe0\xc9\x8d\xe5\x80"
+	"\x3e\xda\x7e\x93\x0f\x52\xf6\xf5\xc1\x91\x90\x9e\x42\x49\x4f\x8d"
+	"\x9c\xba\x38\x83\xe9\x33\xc2\x50\x4f\xec\xc2\xf0\xa8\xb7\x6e\x28"
+	"\x25\x56\x6b\x62\x67\xfe\x08\xf1\x56\xe5\x6f\x0e\x99\xf1\xe5\x95"
+	"\x7b\xef\xeb\x0a\x2c\x92\x97\x57\x23\x33\x36\x07\xdd\xfb\xae\xf1"
+	"\xb1\xd8\x33\xb7\x96\x71\x42\x36\xc5\xa4\xa9\x19\x4b\x1b\x52\x4c"
+	"\x50\x69\x91\xf0\x0e\xfa\x80\x37\x4b\xb5\xd0\x2f\xb7\x44\x0d\xd4"
+	"\xf8\x39\x8d\xab\x71\x67\x59\x05\x88\x3d\xeb\x48\x48\x33\x88\x4e"
+	"\xfe\xf8\x27\x1b\xd6\x55\x60\x5e\x48\xb7\x6d\x9a\xa8\x37\xf9\x7a"
+	"\xde\x1b\xcd\x5d\x1a\x30\xd4\xe9\x9e\x5b\x3c\x15\xf8\x9c\x1f\xda"
+	"\xd1\x86\x48\x55\xce\x83\xee\x8e\x51\xc7\xde\x32\x12\x47\x7d\x46"
+	"\xb8\x35\xdf\x41\x02\x01\x30\x02\x01\x30\x02\x01\x30\x02\x01\x30"
+	"\x02\x01\x30",
+	.key_len = 804,
+	/*
+	 * m is SHA256 hash of following message:
+	 * "\x49\x41\xbe\x0a\x0c\xc9\xf6\x35\x51\xe4\x27\x56\x13\x71\x4b\xd0"
+	 * "\x36\x92\x84\x89\x1b\xf8\x56\x4a\x72\x61\x14\x69\x4f\x5e\x98\xa5"
+	 * "\x80\x5a\x37\x51\x1f\xd8\xf5\xb5\x63\xfc\xf4\xb1\xbb\x4d\x33\xa3"
+	 * "\x1e\xb9\x75\x8b\x9c\xda\x7e\x6d\x3a\x77\x85\xf7\xfc\x4e\xe7\x64"
+	 * "\x43\x10\x19\xa0\x59\xae\xe0\xad\x4b\xd3\xc4\x45\xf7\xb1\xc2\xc1"
+	 * "\x65\x01\x41\x39\x5b\x45\x47\xed\x2b\x51\xed\xe3\xd0\x09\x10\xd2"
+	 * "\x39\x6c\x4a\x3f\xe5\xd2\x20\xe6\xb0\x71\x7d\x5b\xed\x26\x60\xf1"
+	 * "\xb4\x73\xd1\xdb\x7d\xc4\x19\x91\xee\xf6\x32\x76\xf2\x19\x7d\xb7"
+	 */
+	.m =
+	"\x3e\xc8\xa1\x26\x20\x54\x44\x52\x48\x0d\xe5\x66\xf3\xb3\xf5\x04"
+	"\xbe\x10\xa8\x48\x94\x22\x2d\xdd\xba\x7a\xb4\x76\x8d\x79\x98\x89",
+	.m_size = 32,
+	.c =
+	"\xc7\xa3\x98\xeb\x43\xd1\x08\xc2\x3d\x78\x45\x04\x70\xc9\x01\xee"
+	"\xf8\x85\x37\x7c\x0b\xf9\x19\x70\x5c\x45\x7b\x2f\x3a\x0b\xb7\x8b"
+	"\xc4\x0d\x7b\x3a\x64\x0b\x0f\xdb\x78\xa9\x0b\xfd\x8d\x82\xa4\x86"
+	"\x39\xbf\x21\xb8\x84\xc4\xce\x9f\xc2\xe8\xb6\x61\x46\x17\xb9\x4e"
+	"\x0b\x57\x05\xb4\x4f\xf9\x9c\x93\x2d\x9b\xd5\x48\x1d\x80\x12\xef"
+	"\x3a\x77\x7f\xbc\xb5\x8e\x2b\x6b\x7c\xfc\x9f\x8c\x9d\xa2\xc4\x85"
+	"\xb0\x87\xe9\x17\x9b\xb6\x23\x62\xd2\xa9\x9f\x57\xe8\xf7\x04\x45"
+	"\x24\x3a\x45\xeb\xeb\x6a\x08\x8e\xaf\xc8\xa0\x84\xbc\x5d\x13\x38"
+	"\xf5\x17\x8c\xa3\x96\x9b\xa9\x38\x8d\xf0\x35\xad\x32\x8a\x72\x5b"
+	"\xdf\x21\xab\x4b\x0e\xa8\x29\xbb\x61\x54\xbf\x05\xdb\x84\x84\xde"
+	"\xdd\x16\x36\x31\xda\xf3\x42\x6d\x7a\x90\x22\x9b\x11\x29\xa6\xf8"
+	"\x30\x61\xda\xd3\x8b\x54\x1e\x42\xd1\x47\x1d\x6f\xd1\xcd\x42\x0b"
+	"\xd1\xe4\x15\x85\x7e\x08\xd6\x59\x64\x4c\x01\x34\x91\x92\x26\xe8"
+	"\xb0\x25\x8c\xf8\xf4\xfa\x8b\xc9\x31\x33\x76\x72\xfb\x64\x92\x9f"
+	"\xda\x62\x8d\xe1\x2a\x71\x91\x43\x40\x61\x3c\x5a\xbe\x86\xfc\x5b"
+	"\xe6\xf9\xa9\x16\x31\x1f\xaf\x25\x6d\xc2\x4a\x23\x6e\x63\x02\xa2",
+	.c_size = 256,
+	.siggen_sigver_test = true,
+	}
+};
+
 static const struct kpp_testvec dh_tv_template[] = {
 static const struct kpp_testvec dh_tv_template[] = {
 	{
 	{
 	.secret =
 	.secret =
@@ -840,6 +939,50 @@ static const struct kpp_testvec ecdh_tv_template[] = {
 	.b_public_size = 64,
 	.b_public_size = 64,
 	.expected_a_public_size = 64,
 	.expected_a_public_size = 64,
 	.expected_ss_size = 32
 	.expected_ss_size = 32
+	}, {
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x02\x00" /* type */
+	"\x08\x00" /* len */
+	"\x02\x00" /* curve_id */
+	"\x00\x00", /* key_size */
+#else
+	"\x00\x02" /* type */
+	"\x00\x08" /* len */
+	"\x00\x02" /* curve_id */
+	"\x00\x00", /* key_size */
+#endif
+	.b_secret =
+#ifdef __LITTLE_ENDIAN
+	"\x02\x00" /* type */
+	"\x28\x00" /* len */
+	"\x02\x00" /* curve_id */
+	"\x20\x00" /* key_size */
+#else
+	"\x00\x02" /* type */
+	"\x00\x28" /* len */
+	"\x00\x02" /* curve_id */
+	"\x00\x20" /* key_size */
+#endif
+	"\x24\xd1\x21\xeb\xe5\xcf\x2d\x83"
+	"\xf6\x62\x1b\x6e\x43\x84\x3a\xa3"
+	"\x8b\xe0\x86\xc3\x20\x19\xda\x92"
+	"\x50\x53\x03\xe1\xc0\xea\xb8\x82",
+	.b_public =
+	"\x1a\x7f\xeb\x52\x00\xbd\x3c\x31"
+	"\x7d\xb6\x70\xc1\x86\xa6\xc7\xc4"
+	"\x3b\xc5\x5f\x6c\x6f\x58\x3c\xf5"
+	"\xb6\x63\x82\x77\x33\x24\xa1\x5f"
+	"\x6a\xca\x43\x6f\xf7\x7e\xff\x02"
+	"\x37\x08\xcc\x40\x5e\x7a\xfd\x6a"
+	"\x6a\x02\x6e\x41\x87\x68\x38\x77"
+	"\xfa\xa9\x44\x43\x2d\xef\x09\xdf",
+	.secret_size = 8,
+	.b_secret_size = 40,
+	.b_public_size = 64,
+	.expected_a_public_size = 64,
+	.expected_ss_size = 32,
+	.genkey = true,
 	}
 	}
 };
 };
 
 

+ 42 - 0
drivers/char/hw_random/mtk-rng.c

@@ -25,6 +25,10 @@
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+/* Runtime PM autosuspend timeout: */
+#define RNG_AUTOSUSPEND_TIMEOUT		100
 
 
 #define USEC_POLL			2
 #define USEC_POLL			2
 #define TIMEOUT_POLL			20
 #define TIMEOUT_POLL			20
@@ -90,6 +94,8 @@ static int mtk_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
 	struct mtk_rng *priv = to_mtk_rng(rng);
 	struct mtk_rng *priv = to_mtk_rng(rng);
 	int retval = 0;
 	int retval = 0;
 
 
+	pm_runtime_get_sync((struct device *)priv->rng.priv);
+
 	while (max >= sizeof(u32)) {
 	while (max >= sizeof(u32)) {
 		if (!mtk_rng_wait_ready(rng, wait))
 		if (!mtk_rng_wait_ready(rng, wait))
 			break;
 			break;
@@ -100,6 +106,9 @@ static int mtk_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
 		max -= sizeof(u32);
 		max -= sizeof(u32);
 	}
 	}
 
 
+	pm_runtime_mark_last_busy((struct device *)priv->rng.priv);
+	pm_runtime_put_sync_autosuspend((struct device *)priv->rng.priv);
+
 	return retval || !wait ? retval : -EIO;
 	return retval || !wait ? retval : -EIO;
 }
 }
 
 
@@ -120,9 +129,12 @@ static int mtk_rng_probe(struct platform_device *pdev)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
 	priv->rng.name = pdev->name;
 	priv->rng.name = pdev->name;
+#ifndef CONFIG_PM
 	priv->rng.init = mtk_rng_init;
 	priv->rng.init = mtk_rng_init;
 	priv->rng.cleanup = mtk_rng_cleanup;
 	priv->rng.cleanup = mtk_rng_cleanup;
+#endif
 	priv->rng.read = mtk_rng_read;
 	priv->rng.read = mtk_rng_read;
+	priv->rng.priv = (unsigned long)&pdev->dev;
 
 
 	priv->clk = devm_clk_get(&pdev->dev, "rng");
 	priv->clk = devm_clk_get(&pdev->dev, "rng");
 	if (IS_ERR(priv->clk)) {
 	if (IS_ERR(priv->clk)) {
@@ -142,11 +154,40 @@ static int mtk_rng_probe(struct platform_device *pdev)
 		return ret;
 		return ret;
 	}
 	}
 
 
+	dev_set_drvdata(&pdev->dev, priv);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, RNG_AUTOSUSPEND_TIMEOUT);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
 	dev_info(&pdev->dev, "registered RNG driver\n");
 	dev_info(&pdev->dev, "registered RNG driver\n");
 
 
 	return 0;
 	return 0;
 }
 }
 
 
+#ifdef CONFIG_PM
+static int mtk_rng_runtime_suspend(struct device *dev)
+{
+	struct mtk_rng *priv = dev_get_drvdata(dev);
+
+	mtk_rng_cleanup(&priv->rng);
+
+	return 0;
+}
+
+static int mtk_rng_runtime_resume(struct device *dev)
+{
+	struct mtk_rng *priv = dev_get_drvdata(dev);
+
+	return mtk_rng_init(&priv->rng);
+}
+
+static UNIVERSAL_DEV_PM_OPS(mtk_rng_pm_ops, mtk_rng_runtime_suspend,
+			    mtk_rng_runtime_resume, NULL);
+#define MTK_RNG_PM_OPS (&mtk_rng_pm_ops)
+#else	/* CONFIG_PM */
+#define MTK_RNG_PM_OPS NULL
+#endif	/* CONFIG_PM */
+
 static const struct of_device_id mtk_rng_match[] = {
 static const struct of_device_id mtk_rng_match[] = {
 	{ .compatible = "mediatek,mt7623-rng" },
 	{ .compatible = "mediatek,mt7623-rng" },
 	{},
 	{},
@@ -157,6 +198,7 @@ static struct platform_driver mtk_rng_driver = {
 	.probe          = mtk_rng_probe,
 	.probe          = mtk_rng_probe,
 	.driver = {
 	.driver = {
 		.name = MTK_RNG_DEV,
 		.name = MTK_RNG_DEV,
+		.pm = MTK_RNG_PM_OPS,
 		.of_match_table = mtk_rng_match,
 		.of_match_table = mtk_rng_match,
 	},
 	},
 };
 };

+ 9 - 2
drivers/char/hw_random/omap3-rom-rng.c

@@ -53,7 +53,10 @@ static int omap3_rom_rng_get_random(void *buf, unsigned int count)
 
 
 	cancel_delayed_work_sync(&idle_work);
 	cancel_delayed_work_sync(&idle_work);
 	if (rng_idle) {
 	if (rng_idle) {
-		clk_prepare_enable(rng_clk);
+		r = clk_prepare_enable(rng_clk);
+		if (r)
+			return r;
+
 		r = omap3_rom_rng_call(0, 0, RNG_GEN_PRNG_HW_INIT);
 		r = omap3_rom_rng_call(0, 0, RNG_GEN_PRNG_HW_INIT);
 		if (r != 0) {
 		if (r != 0) {
 			clk_disable_unprepare(rng_clk);
 			clk_disable_unprepare(rng_clk);
@@ -88,6 +91,8 @@ static struct hwrng omap3_rom_rng_ops = {
 
 
 static int omap3_rom_rng_probe(struct platform_device *pdev)
 static int omap3_rom_rng_probe(struct platform_device *pdev)
 {
 {
+	int ret = 0;
+
 	pr_info("initializing\n");
 	pr_info("initializing\n");
 
 
 	omap3_rom_rng_call = pdev->dev.platform_data;
 	omap3_rom_rng_call = pdev->dev.platform_data;
@@ -104,7 +109,9 @@ static int omap3_rom_rng_probe(struct platform_device *pdev)
 	}
 	}
 
 
 	/* Leave the RNG in reset state. */
 	/* Leave the RNG in reset state. */
-	clk_prepare_enable(rng_clk);
+	ret = clk_prepare_enable(rng_clk);
+	if (ret)
+		return ret;
 	omap3_rom_rng_idle(0);
 	omap3_rom_rng_idle(0);
 
 
 	return hwrng_register(&omap3_rom_rng_ops);
 	return hwrng_register(&omap3_rom_rng_ops);

+ 7 - 0
drivers/char/hw_random/timeriomem-rng.c

@@ -151,8 +151,15 @@ static int timeriomem_rng_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev, "missing period\n");
 			dev_err(&pdev->dev, "missing period\n");
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}
+
+		if (!of_property_read_u32(pdev->dev.of_node,
+						"quality", &i))
+			priv->rng_ops.quality = i;
+		else
+			priv->rng_ops.quality = 0;
 	} else {
 	} else {
 		period = pdata->period;
 		period = pdata->period;
+		priv->rng_ops.quality = pdata->quality;
 	}
 	}
 
 
 	priv->period = ns_to_ktime(period * NSEC_PER_USEC);
 	priv->period = ns_to_ktime(period * NSEC_PER_USEC);

+ 30 - 0
drivers/crypto/Kconfig

@@ -327,6 +327,15 @@ config HW_RANDOM_PPC4XX
 	 This option provides the kernel-side support for the TRNG hardware
 	 This option provides the kernel-side support for the TRNG hardware
 	 found in the security function of some PowerPC 4xx SoCs.
 	 found in the security function of some PowerPC 4xx SoCs.
 
 
+config CRYPTO_DEV_OMAP
+	tristate "Support for OMAP crypto HW accelerators"
+	depends on ARCH_OMAP2PLUS
+	help
+	  OMAP processors have various crypto HW accelerators. Select this if
+          you want to use the OMAP modules for any of the crypto algorithms.
+
+if CRYPTO_DEV_OMAP
+
 config CRYPTO_DEV_OMAP_SHAM
 config CRYPTO_DEV_OMAP_SHAM
 	tristate "Support for OMAP MD5/SHA1/SHA2 hw accelerator"
 	tristate "Support for OMAP MD5/SHA1/SHA2 hw accelerator"
 	depends on ARCH_OMAP2PLUS
 	depends on ARCH_OMAP2PLUS
@@ -348,6 +357,7 @@ config CRYPTO_DEV_OMAP_AES
 	select CRYPTO_CBC
 	select CRYPTO_CBC
 	select CRYPTO_ECB
 	select CRYPTO_ECB
 	select CRYPTO_CTR
 	select CRYPTO_CTR
+	select CRYPTO_AEAD
 	help
 	help
 	  OMAP processors have AES module accelerator. Select this if you
 	  OMAP processors have AES module accelerator. Select this if you
 	  want to use the OMAP module for AES algorithms.
 	  want to use the OMAP module for AES algorithms.
@@ -364,6 +374,8 @@ config CRYPTO_DEV_OMAP_DES
 	  the ECB and CBC modes of operation are supported by the driver. Also
 	  the ECB and CBC modes of operation are supported by the driver. Also
 	  accesses made on unaligned boundaries are supported.
 	  accesses made on unaligned boundaries are supported.
 
 
+endif # CRYPTO_DEV_OMAP
+
 config CRYPTO_DEV_PICOXCELL
 config CRYPTO_DEV_PICOXCELL
 	tristate "Support for picoXcell IPSEC and Layer2 crypto engines"
 	tristate "Support for picoXcell IPSEC and Layer2 crypto engines"
 	depends on (ARCH_PICOXCELL || COMPILE_TEST) && HAVE_CLK
 	depends on (ARCH_PICOXCELL || COMPILE_TEST) && HAVE_CLK
@@ -542,6 +554,7 @@ config CRYPTO_DEV_MXS_DCP
 
 
 source "drivers/crypto/qat/Kconfig"
 source "drivers/crypto/qat/Kconfig"
 source "drivers/crypto/cavium/cpt/Kconfig"
 source "drivers/crypto/cavium/cpt/Kconfig"
+source "drivers/crypto/cavium/nitrox/Kconfig"
 
 
 config CRYPTO_DEV_CAVIUM_ZIP
 config CRYPTO_DEV_CAVIUM_ZIP
 	tristate "Cavium ZIP driver"
 	tristate "Cavium ZIP driver"
@@ -656,4 +669,21 @@ config CRYPTO_DEV_BCM_SPU
 
 
 source "drivers/crypto/stm32/Kconfig"
 source "drivers/crypto/stm32/Kconfig"
 
 
+config CRYPTO_DEV_SAFEXCEL
+	tristate "Inside Secure's SafeXcel cryptographic engine driver"
+	depends on HAS_DMA && OF
+	depends on (ARM64 && ARCH_MVEBU) || (COMPILE_TEST && 64BIT)
+	select CRYPTO_AES
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_HASH
+	select CRYPTO_HMAC
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	select CRYPTO_SHA512
+	help
+	  This driver interfaces with the SafeXcel EIP-197 cryptographic engine
+	  designed by Inside Secure. Select this if you want to use CBC/ECB
+	  chain mode, AES cipher mode and SHA1/SHA224/SHA256/SHA512 hash
+	  algorithms.
+
 endif # CRYPTO_HW
 endif # CRYPTO_HW

+ 5 - 1
drivers/crypto/Makefile

@@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CAVIUM_ZIP) += cavium/
 obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
 obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
 obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
 obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
 obj-$(CONFIG_CRYPTO_DEV_CPT) += cavium/cpt/
 obj-$(CONFIG_CRYPTO_DEV_CPT) += cavium/cpt/
+obj-$(CONFIG_CRYPTO_DEV_NITROX) += cavium/nitrox/
 obj-$(CONFIG_CRYPTO_DEV_EXYNOS_RNG) += exynos-rng.o
 obj-$(CONFIG_CRYPTO_DEV_EXYNOS_RNG) += exynos-rng.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
 obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
 obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
@@ -20,7 +21,9 @@ obj-$(CONFIG_CRYPTO_DEV_MXC_SCC) += mxc-scc.o
 obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
 obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
 n2_crypto-y := n2_core.o n2_asm.o
 n2_crypto-y := n2_core.o n2_asm.o
 obj-$(CONFIG_CRYPTO_DEV_NX) += nx/
 obj-$(CONFIG_CRYPTO_DEV_NX) += nx/
-obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
+obj-$(CONFIG_CRYPTO_DEV_OMAP) += omap-crypto.o
+obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes-driver.o
+omap-aes-driver-objs := omap-aes.o omap-aes-gcm.o
 obj-$(CONFIG_CRYPTO_DEV_OMAP_DES) += omap-des.o
 obj-$(CONFIG_CRYPTO_DEV_OMAP_DES) += omap-des.o
 obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
 obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
@@ -39,3 +42,4 @@ obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
 obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
 obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
 obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
 obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
 obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
 obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
+obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/

+ 1 - 0
drivers/crypto/amcc/crypto4xx_core.c

@@ -1179,6 +1179,7 @@ static int crypto4xx_probe(struct platform_device *ofdev)
 	dev_set_drvdata(dev, core_dev);
 	dev_set_drvdata(dev, core_dev);
 	core_dev->ofdev = ofdev;
 	core_dev->ofdev = ofdev;
 	core_dev->dev = kzalloc(sizeof(struct crypto4xx_device), GFP_KERNEL);
 	core_dev->dev = kzalloc(sizeof(struct crypto4xx_device), GFP_KERNEL);
+	rc = -ENOMEM;
 	if (!core_dev->dev)
 	if (!core_dev->dev)
 		goto err_alloc_dev;
 		goto err_alloc_dev;
 
 

+ 4 - 3
drivers/crypto/bcm/cipher.c

@@ -36,6 +36,7 @@
 #include <crypto/internal/aead.h>
 #include <crypto/internal/aead.h>
 #include <crypto/aes.h>
 #include <crypto/aes.h>
 #include <crypto/des.h>
 #include <crypto/des.h>
+#include <crypto/hmac.h>
 #include <crypto/sha.h>
 #include <crypto/sha.h>
 #include <crypto/md5.h>
 #include <crypto/md5.h>
 #include <crypto/authenc.h>
 #include <crypto/authenc.h>
@@ -2510,8 +2511,8 @@ static int ahash_hmac_setkey(struct crypto_ahash *ahash, const u8 *key,
 		memcpy(ctx->opad, ctx->ipad, blocksize);
 		memcpy(ctx->opad, ctx->ipad, blocksize);
 
 
 		for (index = 0; index < blocksize; index++) {
 		for (index = 0; index < blocksize; index++) {
-			ctx->ipad[index] ^= 0x36;
-			ctx->opad[index] ^= 0x5c;
+			ctx->ipad[index] ^= HMAC_IPAD_VALUE;
+			ctx->opad[index] ^= HMAC_OPAD_VALUE;
 		}
 		}
 
 
 		flow_dump("  ipad: ", ctx->ipad, blocksize);
 		flow_dump("  ipad: ", ctx->ipad, blocksize);
@@ -2638,7 +2639,7 @@ static int aead_need_fallback(struct aead_request *req)
 	    (spu->spu_type == SPU_TYPE_SPUM) &&
 	    (spu->spu_type == SPU_TYPE_SPUM) &&
 	    (ctx->digestsize != 8) && (ctx->digestsize != 12) &&
 	    (ctx->digestsize != 8) && (ctx->digestsize != 12) &&
 	    (ctx->digestsize != 16)) {
 	    (ctx->digestsize != 16)) {
-		flow_log("%s() AES CCM needs fallbck for digest size %d\n",
+		flow_log("%s() AES CCM needs fallback for digest size %d\n",
 			 __func__, ctx->digestsize);
 			 __func__, ctx->digestsize);
 		return 1;
 		return 1;
 	}
 	}

+ 4 - 6
drivers/crypto/caam/caamalg.c

@@ -1187,8 +1187,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	struct aead_edesc *edesc;
 	struct aead_edesc *edesc;
 	int sec4_sg_index, sec4_sg_len, sec4_sg_bytes;
 	int sec4_sg_index, sec4_sg_len, sec4_sg_bytes;
@@ -1475,8 +1475,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-					  CRYPTO_TFM_REQ_MAY_SLEEP)) ?
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	struct ablkcipher_edesc *edesc;
 	struct ablkcipher_edesc *edesc;
@@ -1681,8 +1680,7 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-					  CRYPTO_TFM_REQ_MAY_SLEEP)) ?
+	gfp_t flags = (req->base.flags &  CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
 	int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
 	struct ablkcipher_edesc *edesc;
 	struct ablkcipher_edesc *edesc;

+ 4 - 6
drivers/crypto/caam/caamalg_qi.c

@@ -555,8 +555,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 	struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead),
 	struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead),
 						 typeof(*alg), aead);
 						 typeof(*alg), aead);
 	struct device *qidev = ctx->qidev;
 	struct device *qidev = ctx->qidev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	struct aead_edesc *edesc;
 	struct aead_edesc *edesc;
 	dma_addr_t qm_sg_dma, iv_dma = 0;
 	dma_addr_t qm_sg_dma, iv_dma = 0;
@@ -808,8 +808,7 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *qidev = ctx->qidev;
 	struct device *qidev = ctx->qidev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-					  CRYPTO_TFM_REQ_MAY_SLEEP)) ?
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	struct ablkcipher_edesc *edesc;
 	struct ablkcipher_edesc *edesc;
@@ -953,8 +952,7 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *qidev = ctx->qidev;
 	struct device *qidev = ctx->qidev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-					  CRYPTO_TFM_REQ_MAY_SLEEP)) ?
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
 	int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
 	struct ablkcipher_edesc *edesc;
 	struct ablkcipher_edesc *edesc;

+ 16 - 16
drivers/crypto/caam/caamhash.c

@@ -719,8 +719,8 @@ static int ahash_update_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	u8 *buf = current_buf(state);
 	u8 *buf = current_buf(state);
 	int *buflen = current_buflen(state);
 	int *buflen = current_buflen(state);
 	u8 *next_buf = alt_buf(state);
 	u8 *next_buf = alt_buf(state);
@@ -849,8 +849,8 @@ static int ahash_final_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	int buflen = *current_buflen(state);
 	int buflen = *current_buflen(state);
 	u32 *desc;
 	u32 *desc;
 	int sec4_sg_bytes, sec4_sg_src_index;
 	int sec4_sg_bytes, sec4_sg_src_index;
@@ -926,8 +926,8 @@ static int ahash_finup_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	int buflen = *current_buflen(state);
 	int buflen = *current_buflen(state);
 	u32 *desc;
 	u32 *desc;
 	int sec4_sg_src_index;
 	int sec4_sg_src_index;
@@ -1013,8 +1013,8 @@ static int ahash_digest(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	u32 *desc;
 	u32 *desc;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	int src_nents, mapped_nents;
 	int src_nents, mapped_nents;
@@ -1093,8 +1093,8 @@ static int ahash_final_no_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	u8 *buf = current_buf(state);
 	u8 *buf = current_buf(state);
 	int buflen = *current_buflen(state);
 	int buflen = *current_buflen(state);
 	u32 *desc;
 	u32 *desc;
@@ -1154,8 +1154,8 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	u8 *buf = current_buf(state);
 	u8 *buf = current_buf(state);
 	int *buflen = current_buflen(state);
 	int *buflen = current_buflen(state);
 	u8 *next_buf = alt_buf(state);
 	u8 *next_buf = alt_buf(state);
@@ -1280,8 +1280,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	int buflen = *current_buflen(state);
 	int buflen = *current_buflen(state);
 	u32 *desc;
 	u32 *desc;
 	int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents;
 	int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents;
@@ -1370,8 +1370,8 @@ static int ahash_update_first(struct ahash_request *req)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	u8 *next_buf = alt_buf(state);
 	u8 *next_buf = alt_buf(state);
 	int *next_buflen = alt_buflen(state);
 	int *next_buflen = alt_buflen(state);
 	int to_hash;
 	int to_hash;

+ 446 - 26
drivers/crypto/caam/caampkc.c

@@ -18,6 +18,10 @@
 #define DESC_RSA_PUB_LEN	(2 * CAAM_CMD_SZ + sizeof(struct rsa_pub_pdb))
 #define DESC_RSA_PUB_LEN	(2 * CAAM_CMD_SZ + sizeof(struct rsa_pub_pdb))
 #define DESC_RSA_PRIV_F1_LEN	(2 * CAAM_CMD_SZ + \
 #define DESC_RSA_PRIV_F1_LEN	(2 * CAAM_CMD_SZ + \
 				 sizeof(struct rsa_priv_f1_pdb))
 				 sizeof(struct rsa_priv_f1_pdb))
+#define DESC_RSA_PRIV_F2_LEN	(2 * CAAM_CMD_SZ + \
+				 sizeof(struct rsa_priv_f2_pdb))
+#define DESC_RSA_PRIV_F3_LEN	(2 * CAAM_CMD_SZ + \
+				 sizeof(struct rsa_priv_f3_pdb))
 
 
 static void rsa_io_unmap(struct device *dev, struct rsa_edesc *edesc,
 static void rsa_io_unmap(struct device *dev, struct rsa_edesc *edesc,
 			 struct akcipher_request *req)
 			 struct akcipher_request *req)
@@ -54,6 +58,42 @@ static void rsa_priv_f1_unmap(struct device *dev, struct rsa_edesc *edesc,
 	dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE);
 	dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE);
 }
 }
 
 
+static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc,
+			      struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2;
+	size_t p_sz = key->p_sz;
+	size_t q_sz = key->p_sz;
+
+	dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE);
+}
+
+static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc,
+			      struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3;
+	size_t p_sz = key->p_sz;
+	size_t q_sz = key->p_sz;
+
+	dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->dp_dma, p_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->dq_dma, q_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE);
+	dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE);
+}
+
 /* RSA Job Completion handler */
 /* RSA Job Completion handler */
 static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context)
 static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context)
 {
 {
@@ -90,6 +130,42 @@ static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err,
 	akcipher_request_complete(req, err);
 	akcipher_request_complete(req, err);
 }
 }
 
 
+static void rsa_priv_f2_done(struct device *dev, u32 *desc, u32 err,
+			     void *context)
+{
+	struct akcipher_request *req = context;
+	struct rsa_edesc *edesc;
+
+	if (err)
+		caam_jr_strstatus(dev, err);
+
+	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
+
+	rsa_priv_f2_unmap(dev, edesc, req);
+	rsa_io_unmap(dev, edesc, req);
+	kfree(edesc);
+
+	akcipher_request_complete(req, err);
+}
+
+static void rsa_priv_f3_done(struct device *dev, u32 *desc, u32 err,
+			     void *context)
+{
+	struct akcipher_request *req = context;
+	struct rsa_edesc *edesc;
+
+	if (err)
+		caam_jr_strstatus(dev, err);
+
+	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
+
+	rsa_priv_f3_unmap(dev, edesc, req);
+	rsa_io_unmap(dev, edesc, req);
+	kfree(edesc);
+
+	akcipher_request_complete(req, err);
+}
+
 static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 					 size_t desclen)
 					 size_t desclen)
 {
 {
@@ -97,8 +173,8 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct device *dev = ctx->dev;
 	struct device *dev = ctx->dev;
 	struct rsa_edesc *edesc;
 	struct rsa_edesc *edesc;
-	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
+	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_KERNEL : GFP_ATOMIC;
 	int sgc;
 	int sgc;
 	int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
 	int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
 	int src_nents, dst_nents;
 	int src_nents, dst_nents;
@@ -258,6 +334,172 @@ static int set_rsa_priv_f1_pdb(struct akcipher_request *req,
 	return 0;
 	return 0;
 }
 }
 
 
+static int set_rsa_priv_f2_pdb(struct akcipher_request *req,
+			       struct rsa_edesc *edesc)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct device *dev = ctx->dev;
+	struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2;
+	int sec4_sg_index = 0;
+	size_t p_sz = key->p_sz;
+	size_t q_sz = key->p_sz;
+
+	pdb->d_dma = dma_map_single(dev, key->d, key->d_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->d_dma)) {
+		dev_err(dev, "Unable to map RSA private exponent memory\n");
+		return -ENOMEM;
+	}
+
+	pdb->p_dma = dma_map_single(dev, key->p, p_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->p_dma)) {
+		dev_err(dev, "Unable to map RSA prime factor p memory\n");
+		goto unmap_d;
+	}
+
+	pdb->q_dma = dma_map_single(dev, key->q, q_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->q_dma)) {
+		dev_err(dev, "Unable to map RSA prime factor q memory\n");
+		goto unmap_p;
+	}
+
+	pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->tmp1_dma)) {
+		dev_err(dev, "Unable to map RSA tmp1 memory\n");
+		goto unmap_q;
+	}
+
+	pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->tmp2_dma)) {
+		dev_err(dev, "Unable to map RSA tmp2 memory\n");
+		goto unmap_tmp1;
+	}
+
+	if (edesc->src_nents > 1) {
+		pdb->sgf |= RSA_PRIV_PDB_SGF_G;
+		pdb->g_dma = edesc->sec4_sg_dma;
+		sec4_sg_index += edesc->src_nents;
+	} else {
+		pdb->g_dma = sg_dma_address(req->src);
+	}
+
+	if (edesc->dst_nents > 1) {
+		pdb->sgf |= RSA_PRIV_PDB_SGF_F;
+		pdb->f_dma = edesc->sec4_sg_dma +
+			     sec4_sg_index * sizeof(struct sec4_sg_entry);
+	} else {
+		pdb->f_dma = sg_dma_address(req->dst);
+	}
+
+	pdb->sgf |= (key->d_sz << RSA_PDB_D_SHIFT) | key->n_sz;
+	pdb->p_q_len = (q_sz << RSA_PDB_Q_SHIFT) | p_sz;
+
+	return 0;
+
+unmap_tmp1:
+	dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE);
+unmap_q:
+	dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE);
+unmap_p:
+	dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
+unmap_d:
+	dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE);
+
+	return -ENOMEM;
+}
+
+static int set_rsa_priv_f3_pdb(struct akcipher_request *req,
+			       struct rsa_edesc *edesc)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	struct device *dev = ctx->dev;
+	struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3;
+	int sec4_sg_index = 0;
+	size_t p_sz = key->p_sz;
+	size_t q_sz = key->p_sz;
+
+	pdb->p_dma = dma_map_single(dev, key->p, p_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->p_dma)) {
+		dev_err(dev, "Unable to map RSA prime factor p memory\n");
+		return -ENOMEM;
+	}
+
+	pdb->q_dma = dma_map_single(dev, key->q, q_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->q_dma)) {
+		dev_err(dev, "Unable to map RSA prime factor q memory\n");
+		goto unmap_p;
+	}
+
+	pdb->dp_dma = dma_map_single(dev, key->dp, p_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->dp_dma)) {
+		dev_err(dev, "Unable to map RSA exponent dp memory\n");
+		goto unmap_q;
+	}
+
+	pdb->dq_dma = dma_map_single(dev, key->dq, q_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->dq_dma)) {
+		dev_err(dev, "Unable to map RSA exponent dq memory\n");
+		goto unmap_dp;
+	}
+
+	pdb->c_dma = dma_map_single(dev, key->qinv, p_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->c_dma)) {
+		dev_err(dev, "Unable to map RSA CRT coefficient qinv memory\n");
+		goto unmap_dq;
+	}
+
+	pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->tmp1_dma)) {
+		dev_err(dev, "Unable to map RSA tmp1 memory\n");
+		goto unmap_qinv;
+	}
+
+	pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, pdb->tmp2_dma)) {
+		dev_err(dev, "Unable to map RSA tmp2 memory\n");
+		goto unmap_tmp1;
+	}
+
+	if (edesc->src_nents > 1) {
+		pdb->sgf |= RSA_PRIV_PDB_SGF_G;
+		pdb->g_dma = edesc->sec4_sg_dma;
+		sec4_sg_index += edesc->src_nents;
+	} else {
+		pdb->g_dma = sg_dma_address(req->src);
+	}
+
+	if (edesc->dst_nents > 1) {
+		pdb->sgf |= RSA_PRIV_PDB_SGF_F;
+		pdb->f_dma = edesc->sec4_sg_dma +
+			     sec4_sg_index * sizeof(struct sec4_sg_entry);
+	} else {
+		pdb->f_dma = sg_dma_address(req->dst);
+	}
+
+	pdb->sgf |= key->n_sz;
+	pdb->p_q_len = (q_sz << RSA_PDB_Q_SHIFT) | p_sz;
+
+	return 0;
+
+unmap_tmp1:
+	dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE);
+unmap_qinv:
+	dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE);
+unmap_dq:
+	dma_unmap_single(dev, pdb->dq_dma, q_sz, DMA_TO_DEVICE);
+unmap_dp:
+	dma_unmap_single(dev, pdb->dp_dma, p_sz, DMA_TO_DEVICE);
+unmap_q:
+	dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE);
+unmap_p:
+	dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
+
+	return -ENOMEM;
+}
+
 static int caam_rsa_enc(struct akcipher_request *req)
 static int caam_rsa_enc(struct akcipher_request *req)
 {
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
@@ -301,24 +543,14 @@ init_fail:
 	return ret;
 	return ret;
 }
 }
 
 
-static int caam_rsa_dec(struct akcipher_request *req)
+static int caam_rsa_dec_priv_f1(struct akcipher_request *req)
 {
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
-	struct caam_rsa_key *key = &ctx->key;
 	struct device *jrdev = ctx->dev;
 	struct device *jrdev = ctx->dev;
 	struct rsa_edesc *edesc;
 	struct rsa_edesc *edesc;
 	int ret;
 	int ret;
 
 
-	if (unlikely(!key->n || !key->d))
-		return -EINVAL;
-
-	if (req->dst_len < key->n_sz) {
-		req->dst_len = key->n_sz;
-		dev_err(jrdev, "Output buffer length less than parameter n\n");
-		return -EOVERFLOW;
-	}
-
 	/* Allocate extended descriptor */
 	/* Allocate extended descriptor */
 	edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F1_LEN);
 	edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F1_LEN);
 	if (IS_ERR(edesc))
 	if (IS_ERR(edesc))
@@ -344,17 +576,147 @@ init_fail:
 	return ret;
 	return ret;
 }
 }
 
 
+static int caam_rsa_dec_priv_f2(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct device *jrdev = ctx->dev;
+	struct rsa_edesc *edesc;
+	int ret;
+
+	/* Allocate extended descriptor */
+	edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F2_LEN);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	/* Set RSA Decrypt Protocol Data Block - Private Key Form #2 */
+	ret = set_rsa_priv_f2_pdb(req, edesc);
+	if (ret)
+		goto init_fail;
+
+	/* Initialize Job Descriptor */
+	init_rsa_priv_f2_desc(edesc->hw_desc, &edesc->pdb.priv_f2);
+
+	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f2_done, req);
+	if (!ret)
+		return -EINPROGRESS;
+
+	rsa_priv_f2_unmap(jrdev, edesc, req);
+
+init_fail:
+	rsa_io_unmap(jrdev, edesc, req);
+	kfree(edesc);
+	return ret;
+}
+
+static int caam_rsa_dec_priv_f3(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct device *jrdev = ctx->dev;
+	struct rsa_edesc *edesc;
+	int ret;
+
+	/* Allocate extended descriptor */
+	edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F3_LEN);
+	if (IS_ERR(edesc))
+		return PTR_ERR(edesc);
+
+	/* Set RSA Decrypt Protocol Data Block - Private Key Form #3 */
+	ret = set_rsa_priv_f3_pdb(req, edesc);
+	if (ret)
+		goto init_fail;
+
+	/* Initialize Job Descriptor */
+	init_rsa_priv_f3_desc(edesc->hw_desc, &edesc->pdb.priv_f3);
+
+	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f3_done, req);
+	if (!ret)
+		return -EINPROGRESS;
+
+	rsa_priv_f3_unmap(jrdev, edesc, req);
+
+init_fail:
+	rsa_io_unmap(jrdev, edesc, req);
+	kfree(edesc);
+	return ret;
+}
+
+static int caam_rsa_dec(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	int ret;
+
+	if (unlikely(!key->n || !key->d))
+		return -EINVAL;
+
+	if (req->dst_len < key->n_sz) {
+		req->dst_len = key->n_sz;
+		dev_err(ctx->dev, "Output buffer length less than parameter n\n");
+		return -EOVERFLOW;
+	}
+
+	if (key->priv_form == FORM3)
+		ret = caam_rsa_dec_priv_f3(req);
+	else if (key->priv_form == FORM2)
+		ret = caam_rsa_dec_priv_f2(req);
+	else
+		ret = caam_rsa_dec_priv_f1(req);
+
+	return ret;
+}
+
 static void caam_rsa_free_key(struct caam_rsa_key *key)
 static void caam_rsa_free_key(struct caam_rsa_key *key)
 {
 {
 	kzfree(key->d);
 	kzfree(key->d);
+	kzfree(key->p);
+	kzfree(key->q);
+	kzfree(key->dp);
+	kzfree(key->dq);
+	kzfree(key->qinv);
+	kzfree(key->tmp1);
+	kzfree(key->tmp2);
 	kfree(key->e);
 	kfree(key->e);
 	kfree(key->n);
 	kfree(key->n);
-	key->d = NULL;
-	key->e = NULL;
-	key->n = NULL;
-	key->d_sz = 0;
-	key->e_sz = 0;
-	key->n_sz = 0;
+	memset(key, 0, sizeof(*key));
+}
+
+static void caam_rsa_drop_leading_zeros(const u8 **ptr, size_t *nbytes)
+{
+	while (!**ptr && *nbytes) {
+		(*ptr)++;
+		(*nbytes)--;
+	}
+}
+
+/**
+ * caam_read_rsa_crt - Used for reading dP, dQ, qInv CRT members.
+ * dP, dQ and qInv could decode to less than corresponding p, q length, as the
+ * BER-encoding requires that the minimum number of bytes be used to encode the
+ * integer. dP, dQ, qInv decoded values have to be zero-padded to appropriate
+ * length.
+ *
+ * @ptr   : pointer to {dP, dQ, qInv} CRT member
+ * @nbytes: length in bytes of {dP, dQ, qInv} CRT member
+ * @dstlen: length in bytes of corresponding p or q prime factor
+ */
+static u8 *caam_read_rsa_crt(const u8 *ptr, size_t nbytes, size_t dstlen)
+{
+	u8 *dst;
+
+	caam_rsa_drop_leading_zeros(&ptr, &nbytes);
+	if (!nbytes)
+		return NULL;
+
+	dst = kzalloc(dstlen, GFP_DMA | GFP_KERNEL);
+	if (!dst)
+		return NULL;
+
+	memcpy(dst + (dstlen - nbytes), ptr, nbytes);
+
+	return dst;
 }
 }
 
 
 /**
 /**
@@ -370,10 +732,9 @@ static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes)
 {
 {
 	u8 *val;
 	u8 *val;
 
 
-	while (!*buf && *nbytes) {
-		buf++;
-		(*nbytes)--;
-	}
+	caam_rsa_drop_leading_zeros(&buf, nbytes);
+	if (!*nbytes)
+		return NULL;
 
 
 	val = kzalloc(*nbytes, GFP_DMA | GFP_KERNEL);
 	val = kzalloc(*nbytes, GFP_DMA | GFP_KERNEL);
 	if (!val)
 	if (!val)
@@ -437,6 +798,64 @@ err:
 	return -ENOMEM;
 	return -ENOMEM;
 }
 }
 
 
+static void caam_rsa_set_priv_key_form(struct caam_rsa_ctx *ctx,
+				       struct rsa_key *raw_key)
+{
+	struct caam_rsa_key *rsa_key = &ctx->key;
+	size_t p_sz = raw_key->p_sz;
+	size_t q_sz = raw_key->q_sz;
+
+	rsa_key->p = caam_read_raw_data(raw_key->p, &p_sz);
+	if (!rsa_key->p)
+		return;
+	rsa_key->p_sz = p_sz;
+
+	rsa_key->q = caam_read_raw_data(raw_key->q, &q_sz);
+	if (!rsa_key->q)
+		goto free_p;
+	rsa_key->q_sz = q_sz;
+
+	rsa_key->tmp1 = kzalloc(raw_key->p_sz, GFP_DMA | GFP_KERNEL);
+	if (!rsa_key->tmp1)
+		goto free_q;
+
+	rsa_key->tmp2 = kzalloc(raw_key->q_sz, GFP_DMA | GFP_KERNEL);
+	if (!rsa_key->tmp2)
+		goto free_tmp1;
+
+	rsa_key->priv_form = FORM2;
+
+	rsa_key->dp = caam_read_rsa_crt(raw_key->dp, raw_key->dp_sz, p_sz);
+	if (!rsa_key->dp)
+		goto free_tmp2;
+
+	rsa_key->dq = caam_read_rsa_crt(raw_key->dq, raw_key->dq_sz, q_sz);
+	if (!rsa_key->dq)
+		goto free_dp;
+
+	rsa_key->qinv = caam_read_rsa_crt(raw_key->qinv, raw_key->qinv_sz,
+					  q_sz);
+	if (!rsa_key->qinv)
+		goto free_dq;
+
+	rsa_key->priv_form = FORM3;
+
+	return;
+
+free_dq:
+	kzfree(rsa_key->dq);
+free_dp:
+	kzfree(rsa_key->dp);
+free_tmp2:
+	kzfree(rsa_key->tmp2);
+free_tmp1:
+	kzfree(rsa_key->tmp1);
+free_q:
+	kzfree(rsa_key->q);
+free_p:
+	kzfree(rsa_key->p);
+}
+
 static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 				 unsigned int keylen)
 				 unsigned int keylen)
 {
 {
@@ -483,6 +902,8 @@ static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 	memcpy(rsa_key->d, raw_key.d, raw_key.d_sz);
 	memcpy(rsa_key->d, raw_key.d, raw_key.d_sz);
 	memcpy(rsa_key->e, raw_key.e, raw_key.e_sz);
 	memcpy(rsa_key->e, raw_key.e, raw_key.e_sz);
 
 
+	caam_rsa_set_priv_key_form(ctx, &raw_key);
+
 	return 0;
 	return 0;
 
 
 err:
 err:
@@ -490,12 +911,11 @@ err:
 	return -ENOMEM;
 	return -ENOMEM;
 }
 }
 
 
-static int caam_rsa_max_size(struct crypto_akcipher *tfm)
+static unsigned int caam_rsa_max_size(struct crypto_akcipher *tfm)
 {
 {
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
-	struct caam_rsa_key *key = &ctx->key;
 
 
-	return (key->n) ? key->n_sz : -EINVAL;
+	return ctx->key.n_sz;
 }
 }
 
 
 /* Per session pkc's driver context creation function */
 /* Per session pkc's driver context creation function */

+ 58 - 0
drivers/crypto/caam/caampkc.h

@@ -12,22 +12,76 @@
 #include "compat.h"
 #include "compat.h"
 #include "pdb.h"
 #include "pdb.h"
 
 
+/**
+ * caam_priv_key_form - CAAM RSA private key representation
+ * CAAM RSA private key may have either of three forms.
+ *
+ * 1. The first representation consists of the pair (n, d), where the
+ *    components have the following meanings:
+ *        n      the RSA modulus
+ *        d      the RSA private exponent
+ *
+ * 2. The second representation consists of the triplet (p, q, d), where the
+ *    components have the following meanings:
+ *        p      the first prime factor of the RSA modulus n
+ *        q      the second prime factor of the RSA modulus n
+ *        d      the RSA private exponent
+ *
+ * 3. The third representation consists of the quintuple (p, q, dP, dQ, qInv),
+ *    where the components have the following meanings:
+ *        p      the first prime factor of the RSA modulus n
+ *        q      the second prime factor of the RSA modulus n
+ *        dP     the first factors's CRT exponent
+ *        dQ     the second factors's CRT exponent
+ *        qInv   the (first) CRT coefficient
+ *
+ * The benefit of using the third or the second key form is lower computational
+ * cost for the decryption and signature operations.
+ */
+enum caam_priv_key_form {
+	FORM1,
+	FORM2,
+	FORM3
+};
+
 /**
 /**
  * caam_rsa_key - CAAM RSA key structure. Keys are allocated in DMA zone.
  * caam_rsa_key - CAAM RSA key structure. Keys are allocated in DMA zone.
  * @n           : RSA modulus raw byte stream
  * @n           : RSA modulus raw byte stream
  * @e           : RSA public exponent raw byte stream
  * @e           : RSA public exponent raw byte stream
  * @d           : RSA private exponent raw byte stream
  * @d           : RSA private exponent raw byte stream
+ * @p           : RSA prime factor p of RSA modulus n
+ * @q           : RSA prime factor q of RSA modulus n
+ * @dp          : RSA CRT exponent of p
+ * @dp          : RSA CRT exponent of q
+ * @qinv        : RSA CRT coefficient
+ * @tmp1        : CAAM uses this temporary buffer as internal state buffer.
+ *                It is assumed to be as long as p.
+ * @tmp2        : CAAM uses this temporary buffer as internal state buffer.
+ *                It is assumed to be as long as q.
  * @n_sz        : length in bytes of RSA modulus n
  * @n_sz        : length in bytes of RSA modulus n
  * @e_sz        : length in bytes of RSA public exponent
  * @e_sz        : length in bytes of RSA public exponent
  * @d_sz        : length in bytes of RSA private exponent
  * @d_sz        : length in bytes of RSA private exponent
+ * @p_sz        : length in bytes of RSA prime factor p of RSA modulus n
+ * @q_sz        : length in bytes of RSA prime factor q of RSA modulus n
+ * @priv_form   : CAAM RSA private key representation
  */
  */
 struct caam_rsa_key {
 struct caam_rsa_key {
 	u8 *n;
 	u8 *n;
 	u8 *e;
 	u8 *e;
 	u8 *d;
 	u8 *d;
+	u8 *p;
+	u8 *q;
+	u8 *dp;
+	u8 *dq;
+	u8 *qinv;
+	u8 *tmp1;
+	u8 *tmp2;
 	size_t n_sz;
 	size_t n_sz;
 	size_t e_sz;
 	size_t e_sz;
 	size_t d_sz;
 	size_t d_sz;
+	size_t p_sz;
+	size_t q_sz;
+	enum caam_priv_key_form priv_form;
 };
 };
 
 
 /**
 /**
@@ -59,6 +113,8 @@ struct rsa_edesc {
 	union {
 	union {
 		struct rsa_pub_pdb pub;
 		struct rsa_pub_pdb pub;
 		struct rsa_priv_f1_pdb priv_f1;
 		struct rsa_priv_f1_pdb priv_f1;
+		struct rsa_priv_f2_pdb priv_f2;
+		struct rsa_priv_f3_pdb priv_f3;
 	} pdb;
 	} pdb;
 	u32 hw_desc[];
 	u32 hw_desc[];
 };
 };
@@ -66,5 +122,7 @@ struct rsa_edesc {
 /* Descriptor construction primitives. */
 /* Descriptor construction primitives. */
 void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb);
 void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb);
 void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb);
 void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb);
+void init_rsa_priv_f2_desc(u32 *desc, struct rsa_priv_f2_pdb *pdb);
+void init_rsa_priv_f3_desc(u32 *desc, struct rsa_priv_f3_pdb *pdb);
 
 
 #endif
 #endif

+ 1 - 1
drivers/crypto/caam/jr.c

@@ -536,7 +536,7 @@ static int caam_jr_probe(struct platform_device *pdev)
 	return 0;
 	return 0;
 }
 }
 
 
-static struct of_device_id caam_jr_match[] = {
+static const struct of_device_id caam_jr_match[] = {
 	{
 	{
 		.compatible = "fsl,sec-v4.0-job-ring",
 		.compatible = "fsl,sec-v4.0-job-ring",
 	},
 	},

+ 62 - 0
drivers/crypto/caam/pdb.h

@@ -483,6 +483,8 @@ struct dsa_verify_pdb {
 #define RSA_PDB_E_MASK          (0xFFF << RSA_PDB_E_SHIFT)
 #define RSA_PDB_E_MASK          (0xFFF << RSA_PDB_E_SHIFT)
 #define RSA_PDB_D_SHIFT         12
 #define RSA_PDB_D_SHIFT         12
 #define RSA_PDB_D_MASK          (0xFFF << RSA_PDB_D_SHIFT)
 #define RSA_PDB_D_MASK          (0xFFF << RSA_PDB_D_SHIFT)
+#define RSA_PDB_Q_SHIFT         12
+#define RSA_PDB_Q_MASK          (0xFFF << RSA_PDB_Q_SHIFT)
 
 
 #define RSA_PDB_SGF_F           (0x8 << RSA_PDB_SGF_SHIFT)
 #define RSA_PDB_SGF_F           (0x8 << RSA_PDB_SGF_SHIFT)
 #define RSA_PDB_SGF_G           (0x4 << RSA_PDB_SGF_SHIFT)
 #define RSA_PDB_SGF_G           (0x4 << RSA_PDB_SGF_SHIFT)
@@ -490,6 +492,8 @@ struct dsa_verify_pdb {
 #define RSA_PRIV_PDB_SGF_G      (0x8 << RSA_PDB_SGF_SHIFT)
 #define RSA_PRIV_PDB_SGF_G      (0x8 << RSA_PDB_SGF_SHIFT)
 
 
 #define RSA_PRIV_KEY_FRM_1      0
 #define RSA_PRIV_KEY_FRM_1      0
+#define RSA_PRIV_KEY_FRM_2      1
+#define RSA_PRIV_KEY_FRM_3      2
 
 
 /**
 /**
  * RSA Encrypt Protocol Data Block
  * RSA Encrypt Protocol Data Block
@@ -525,4 +529,62 @@ struct rsa_priv_f1_pdb {
 	dma_addr_t	d_dma;
 	dma_addr_t	d_dma;
 } __packed;
 } __packed;
 
 
+/**
+ * RSA Decrypt PDB - Private Key Form #2
+ * @sgf     : scatter-gather field
+ * @g_dma   : dma address of encrypted input data
+ * @f_dma   : dma address of output data
+ * @d_dma   : dma address of RSA private exponent
+ * @p_dma   : dma address of RSA prime factor p of RSA modulus n
+ * @q_dma   : dma address of RSA prime factor q of RSA modulus n
+ * @tmp1_dma: dma address of temporary buffer. CAAM uses this temporary buffer
+ *            as internal state buffer. It is assumed to be as long as p.
+ * @tmp2_dma: dma address of temporary buffer. CAAM uses this temporary buffer
+ *            as internal state buffer. It is assumed to be as long as q.
+ * @p_q_len : length in bytes of first two prime factors of the RSA modulus n
+ */
+struct rsa_priv_f2_pdb {
+	u32		sgf;
+	dma_addr_t	g_dma;
+	dma_addr_t	f_dma;
+	dma_addr_t	d_dma;
+	dma_addr_t	p_dma;
+	dma_addr_t	q_dma;
+	dma_addr_t	tmp1_dma;
+	dma_addr_t	tmp2_dma;
+	u32		p_q_len;
+} __packed;
+
+/**
+ * RSA Decrypt PDB - Private Key Form #3
+ * This is the RSA Chinese Reminder Theorem (CRT) form for two prime factors of
+ * the RSA modulus.
+ * @sgf     : scatter-gather field
+ * @g_dma   : dma address of encrypted input data
+ * @f_dma   : dma address of output data
+ * @c_dma   : dma address of RSA CRT coefficient
+ * @p_dma   : dma address of RSA prime factor p of RSA modulus n
+ * @q_dma   : dma address of RSA prime factor q of RSA modulus n
+ * @dp_dma  : dma address of RSA CRT exponent of RSA prime factor p
+ * @dp_dma  : dma address of RSA CRT exponent of RSA prime factor q
+ * @tmp1_dma: dma address of temporary buffer. CAAM uses this temporary buffer
+ *            as internal state buffer. It is assumed to be as long as p.
+ * @tmp2_dma: dma address of temporary buffer. CAAM uses this temporary buffer
+ *            as internal state buffer. It is assumed to be as long as q.
+ * @p_q_len : length in bytes of first two prime factors of the RSA modulus n
+ */
+struct rsa_priv_f3_pdb {
+	u32		sgf;
+	dma_addr_t	g_dma;
+	dma_addr_t	f_dma;
+	dma_addr_t	c_dma;
+	dma_addr_t	p_dma;
+	dma_addr_t	q_dma;
+	dma_addr_t	dp_dma;
+	dma_addr_t	dq_dma;
+	dma_addr_t	tmp1_dma;
+	dma_addr_t	tmp2_dma;
+	u32		p_q_len;
+} __packed;
+
 #endif
 #endif

+ 36 - 0
drivers/crypto/caam/pkc_desc.c

@@ -34,3 +34,39 @@ void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb)
 	append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY |
 	append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY |
 			 RSA_PRIV_KEY_FRM_1);
 			 RSA_PRIV_KEY_FRM_1);
 }
 }
+
+/* Descriptor for RSA Private operation - Private Key Form #2 */
+void init_rsa_priv_f2_desc(u32 *desc, struct rsa_priv_f2_pdb *pdb)
+{
+	init_job_desc_pdb(desc, 0, sizeof(*pdb));
+	append_cmd(desc, pdb->sgf);
+	append_ptr(desc, pdb->g_dma);
+	append_ptr(desc, pdb->f_dma);
+	append_ptr(desc, pdb->d_dma);
+	append_ptr(desc, pdb->p_dma);
+	append_ptr(desc, pdb->q_dma);
+	append_ptr(desc, pdb->tmp1_dma);
+	append_ptr(desc, pdb->tmp2_dma);
+	append_cmd(desc, pdb->p_q_len);
+	append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY |
+			 RSA_PRIV_KEY_FRM_2);
+}
+
+/* Descriptor for RSA Private operation - Private Key Form #3 */
+void init_rsa_priv_f3_desc(u32 *desc, struct rsa_priv_f3_pdb *pdb)
+{
+	init_job_desc_pdb(desc, 0, sizeof(*pdb));
+	append_cmd(desc, pdb->sgf);
+	append_ptr(desc, pdb->g_dma);
+	append_ptr(desc, pdb->f_dma);
+	append_ptr(desc, pdb->c_dma);
+	append_ptr(desc, pdb->p_dma);
+	append_ptr(desc, pdb->q_dma);
+	append_ptr(desc, pdb->dp_dma);
+	append_ptr(desc, pdb->dq_dma);
+	append_ptr(desc, pdb->tmp1_dma);
+	append_ptr(desc, pdb->tmp2_dma);
+	append_cmd(desc, pdb->p_q_len);
+	append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY |
+			 RSA_PRIV_KEY_FRM_3);
+}

+ 157 - 77
drivers/crypto/cavium/cpt/cptvf_algs.c

@@ -98,7 +98,6 @@ static inline void update_output_data(struct cpt_request_info *req_info,
 }
 }
 
 
 static inline u32 create_ctx_hdr(struct ablkcipher_request *req, u32 enc,
 static inline u32 create_ctx_hdr(struct ablkcipher_request *req, u32 enc,
-				 u32 cipher_type, u32 aes_key_type,
 				 u32 *argcnt)
 				 u32 *argcnt)
 {
 {
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
@@ -124,11 +123,11 @@ static inline u32 create_ctx_hdr(struct ablkcipher_request *req, u32 enc,
 	req_info->req.param1 = req->nbytes; /* Encryption Data length */
 	req_info->req.param1 = req->nbytes; /* Encryption Data length */
 	req_info->req.param2 = 0; /*Auth data length */
 	req_info->req.param2 = 0; /*Auth data length */
 
 
-	fctx->enc.enc_ctrl.e.enc_cipher = cipher_type;
-	fctx->enc.enc_ctrl.e.aes_key = aes_key_type;
+	fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type;
+	fctx->enc.enc_ctrl.e.aes_key = ctx->key_type;
 	fctx->enc.enc_ctrl.e.iv_source = FROM_DPTR;
 	fctx->enc.enc_ctrl.e.iv_source = FROM_DPTR;
 
 
-	if (cipher_type == AES_XTS)
+	if (ctx->cipher_type == AES_XTS)
 		memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2);
 		memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2);
 	else
 	else
 		memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len);
 		memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len);
@@ -154,14 +153,13 @@ static inline u32 create_ctx_hdr(struct ablkcipher_request *req, u32 enc,
 }
 }
 
 
 static inline u32 create_input_list(struct ablkcipher_request  *req, u32 enc,
 static inline u32 create_input_list(struct ablkcipher_request  *req, u32 enc,
-				    u32 cipher_type, u32 aes_key_type,
 				    u32 enc_iv_len)
 				    u32 enc_iv_len)
 {
 {
 	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
 	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
 	struct cpt_request_info *req_info = &rctx->cpt_req;
 	struct cpt_request_info *req_info = &rctx->cpt_req;
 	u32 argcnt =  0;
 	u32 argcnt =  0;
 
 
-	create_ctx_hdr(req, enc, cipher_type, aes_key_type, &argcnt);
+	create_ctx_hdr(req, enc, &argcnt);
 	update_input_iv(req_info, req->info, enc_iv_len, &argcnt);
 	update_input_iv(req_info, req->info, enc_iv_len, &argcnt);
 	update_input_data(req_info, req->src, req->nbytes, &argcnt);
 	update_input_data(req_info, req->src, req->nbytes, &argcnt);
 	req_info->incnt = argcnt;
 	req_info->incnt = argcnt;
@@ -177,7 +175,6 @@ static inline void store_cb_info(struct ablkcipher_request *req,
 }
 }
 
 
 static inline void create_output_list(struct ablkcipher_request *req,
 static inline void create_output_list(struct ablkcipher_request *req,
-				      u32 cipher_type,
 				      u32 enc_iv_len)
 				      u32 enc_iv_len)
 {
 {
 	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
 	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
@@ -197,12 +194,9 @@ static inline void create_output_list(struct ablkcipher_request *req,
 	req_info->outcnt = argcnt;
 	req_info->outcnt = argcnt;
 }
 }
 
 
-static inline int cvm_enc_dec(struct ablkcipher_request *req, u32 enc,
-			      u32 cipher_type)
+static inline int cvm_enc_dec(struct ablkcipher_request *req, u32 enc)
 {
 {
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
-	struct cvm_enc_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	u32 key_type = AES_128_BIT;
 	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
 	struct cvm_req_ctx *rctx = ablkcipher_request_ctx(req);
 	u32 enc_iv_len = crypto_ablkcipher_ivsize(tfm);
 	u32 enc_iv_len = crypto_ablkcipher_ivsize(tfm);
 	struct fc_context *fctx = &rctx->fctx;
 	struct fc_context *fctx = &rctx->fctx;
@@ -210,36 +204,10 @@ static inline int cvm_enc_dec(struct ablkcipher_request *req, u32 enc,
 	void *cdev = NULL;
 	void *cdev = NULL;
 	int status;
 	int status;
 
 
-	switch (ctx->key_len) {
-	case 16:
-		key_type = AES_128_BIT;
-		break;
-	case 24:
-		key_type = AES_192_BIT;
-		break;
-	case 32:
-		if (cipher_type == AES_XTS)
-			key_type = AES_128_BIT;
-		else
-			key_type = AES_256_BIT;
-		break;
-	case 64:
-		if (cipher_type == AES_XTS)
-			key_type = AES_256_BIT;
-		else
-			return -EINVAL;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	if (cipher_type == DES3_CBC)
-		key_type = 0;
-
 	memset(req_info, 0, sizeof(struct cpt_request_info));
 	memset(req_info, 0, sizeof(struct cpt_request_info));
 	memset(fctx, 0, sizeof(struct fc_context));
 	memset(fctx, 0, sizeof(struct fc_context));
-	create_input_list(req, enc, cipher_type, key_type, enc_iv_len);
-	create_output_list(req, cipher_type, enc_iv_len);
+	create_input_list(req, enc, enc_iv_len);
+	create_output_list(req, enc_iv_len);
 	store_cb_info(req, req_info);
 	store_cb_info(req, req_info);
 	cdev = dev_handle.cdev[smp_processor_id()];
 	cdev = dev_handle.cdev[smp_processor_id()];
 	status = cptvf_do_request(cdev, req_info);
 	status = cptvf_do_request(cdev, req_info);
@@ -254,34 +222,14 @@ static inline int cvm_enc_dec(struct ablkcipher_request *req, u32 enc,
 		return -EINPROGRESS;
 		return -EINPROGRESS;
 }
 }
 
 
-int cvm_des3_encrypt_cbc(struct ablkcipher_request *req)
+int cvm_encrypt(struct ablkcipher_request *req)
 {
 {
-	return cvm_enc_dec(req, true, DES3_CBC);
+	return cvm_enc_dec(req, true);
 }
 }
 
 
-int cvm_des3_decrypt_cbc(struct ablkcipher_request *req)
+int cvm_decrypt(struct ablkcipher_request *req)
 {
 {
-	return cvm_enc_dec(req, false, DES3_CBC);
-}
-
-int cvm_aes_encrypt_xts(struct ablkcipher_request *req)
-{
-	return cvm_enc_dec(req, true, AES_XTS);
-}
-
-int cvm_aes_decrypt_xts(struct ablkcipher_request *req)
-{
-	return cvm_enc_dec(req, false, AES_XTS);
-}
-
-int cvm_aes_encrypt_cbc(struct ablkcipher_request *req)
-{
-	return cvm_enc_dec(req, true, AES_CBC);
-}
-
-int cvm_aes_decrypt_cbc(struct ablkcipher_request *req)
-{
-	return cvm_enc_dec(req, false, AES_CBC);
+	return cvm_enc_dec(req, false);
 }
 }
 
 
 int cvm_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 int cvm_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
@@ -299,24 +247,93 @@ int cvm_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	ctx->key_len = keylen;
 	ctx->key_len = keylen;
 	memcpy(ctx->enc_key, key1, keylen / 2);
 	memcpy(ctx->enc_key, key1, keylen / 2);
 	memcpy(ctx->enc_key + KEY2_OFFSET, key2, keylen / 2);
 	memcpy(ctx->enc_key + KEY2_OFFSET, key2, keylen / 2);
+	ctx->cipher_type = AES_XTS;
+	switch (ctx->key_len) {
+	case 32:
+		ctx->key_type = AES_128_BIT;
+		break;
+	case 64:
+		ctx->key_type = AES_256_BIT;
+		break;
+	default:
+		return -EINVAL;
+	}
 
 
 	return 0;
 	return 0;
 }
 }
 
 
-int cvm_enc_dec_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
-		       u32 keylen)
+static int cvm_validate_keylen(struct cvm_enc_ctx *ctx, u32 keylen)
+{
+	if ((keylen == 16) || (keylen == 24) || (keylen == 32)) {
+		ctx->key_len = keylen;
+		switch (ctx->key_len) {
+		case 16:
+			ctx->key_type = AES_128_BIT;
+			break;
+		case 24:
+			ctx->key_type = AES_192_BIT;
+			break;
+		case 32:
+			ctx->key_type = AES_256_BIT;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (ctx->cipher_type == DES3_CBC)
+			ctx->key_type = 0;
+
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int cvm_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+		      u32 keylen, u8 cipher_type)
 {
 {
 	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
 	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
 	struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct cvm_enc_ctx *ctx = crypto_tfm_ctx(tfm);
 
 
-	if ((keylen == 16) || (keylen == 24) || (keylen == 32)) {
-		ctx->key_len = keylen;
+	ctx->cipher_type = cipher_type;
+	if (!cvm_validate_keylen(ctx, keylen)) {
 		memcpy(ctx->enc_key, key, keylen);
 		memcpy(ctx->enc_key, key, keylen);
 		return 0;
 		return 0;
+	} else {
+		crypto_ablkcipher_set_flags(cipher,
+					    CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
 	}
 	}
-	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+}
 
 
-	return -EINVAL;
+static int cvm_cbc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			      u32 keylen)
+{
+	return cvm_setkey(cipher, key, keylen, AES_CBC);
+}
+
+static int cvm_ecb_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			      u32 keylen)
+{
+	return cvm_setkey(cipher, key, keylen, AES_ECB);
+}
+
+static int cvm_cfb_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			      u32 keylen)
+{
+	return cvm_setkey(cipher, key, keylen, AES_CFB);
+}
+
+static int cvm_cbc_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			       u32 keylen)
+{
+	return cvm_setkey(cipher, key, keylen, DES3_CBC);
+}
+
+static int cvm_ecb_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+			       u32 keylen)
+{
+	return cvm_setkey(cipher, key, keylen, DES3_ECB);
 }
 }
 
 
 int cvm_enc_dec_init(struct crypto_tfm *tfm)
 int cvm_enc_dec_init(struct crypto_tfm *tfm)
@@ -349,8 +366,8 @@ struct crypto_alg algs[] = { {
 			.min_keysize = 2 * AES_MIN_KEY_SIZE,
 			.min_keysize = 2 * AES_MIN_KEY_SIZE,
 			.max_keysize = 2 * AES_MAX_KEY_SIZE,
 			.max_keysize = 2 * AES_MAX_KEY_SIZE,
 			.setkey = cvm_xts_setkey,
 			.setkey = cvm_xts_setkey,
-			.encrypt = cvm_aes_encrypt_xts,
-			.decrypt = cvm_aes_decrypt_xts,
+			.encrypt = cvm_encrypt,
+			.decrypt = cvm_decrypt,
 		},
 		},
 	},
 	},
 	.cra_init = cvm_enc_dec_init,
 	.cra_init = cvm_enc_dec_init,
@@ -369,9 +386,51 @@ struct crypto_alg algs[] = { {
 			.ivsize = AES_BLOCK_SIZE,
 			.ivsize = AES_BLOCK_SIZE,
 			.min_keysize = AES_MIN_KEY_SIZE,
 			.min_keysize = AES_MIN_KEY_SIZE,
 			.max_keysize = AES_MAX_KEY_SIZE,
 			.max_keysize = AES_MAX_KEY_SIZE,
-			.setkey = cvm_enc_dec_setkey,
-			.encrypt = cvm_aes_encrypt_cbc,
-			.decrypt = cvm_aes_decrypt_cbc,
+			.setkey = cvm_cbc_aes_setkey,
+			.encrypt = cvm_encrypt,
+			.decrypt = cvm_decrypt,
+		},
+	},
+	.cra_init = cvm_enc_dec_init,
+	.cra_module = THIS_MODULE,
+}, {
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = AES_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct cvm_enc_ctx),
+	.cra_alignmask = 7,
+	.cra_priority = 4001,
+	.cra_name = "ecb(aes)",
+	.cra_driver_name = "cavium-ecb-aes",
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_u = {
+		.ablkcipher = {
+			.ivsize = AES_BLOCK_SIZE,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.setkey = cvm_ecb_aes_setkey,
+			.encrypt = cvm_encrypt,
+			.decrypt = cvm_decrypt,
+		},
+	},
+	.cra_init = cvm_enc_dec_init,
+	.cra_module = THIS_MODULE,
+}, {
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = AES_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct cvm_enc_ctx),
+	.cra_alignmask = 7,
+	.cra_priority = 4001,
+	.cra_name = "cfb(aes)",
+	.cra_driver_name = "cavium-cfb-aes",
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_u = {
+		.ablkcipher = {
+			.ivsize = AES_BLOCK_SIZE,
+			.min_keysize = AES_MIN_KEY_SIZE,
+			.max_keysize = AES_MAX_KEY_SIZE,
+			.setkey = cvm_cfb_aes_setkey,
+			.encrypt = cvm_encrypt,
+			.decrypt = cvm_decrypt,
 		},
 		},
 	},
 	},
 	.cra_init = cvm_enc_dec_init,
 	.cra_init = cvm_enc_dec_init,
@@ -390,9 +449,30 @@ struct crypto_alg algs[] = { {
 			.min_keysize = DES3_EDE_KEY_SIZE,
 			.min_keysize = DES3_EDE_KEY_SIZE,
 			.max_keysize = DES3_EDE_KEY_SIZE,
 			.max_keysize = DES3_EDE_KEY_SIZE,
 			.ivsize = DES_BLOCK_SIZE,
 			.ivsize = DES_BLOCK_SIZE,
-			.setkey = cvm_enc_dec_setkey,
-			.encrypt = cvm_des3_encrypt_cbc,
-			.decrypt = cvm_des3_decrypt_cbc,
+			.setkey = cvm_cbc_des3_setkey,
+			.encrypt = cvm_encrypt,
+			.decrypt = cvm_decrypt,
+		},
+	},
+	.cra_init = cvm_enc_dec_init,
+	.cra_module = THIS_MODULE,
+}, {
+	.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct cvm_des3_ctx),
+	.cra_alignmask = 7,
+	.cra_priority = 4001,
+	.cra_name = "ecb(des3_ede)",
+	.cra_driver_name = "cavium-ecb-des3_ede",
+	.cra_type = &crypto_ablkcipher_type,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize = DES3_EDE_KEY_SIZE,
+			.max_keysize = DES3_EDE_KEY_SIZE,
+			.ivsize = DES_BLOCK_SIZE,
+			.setkey = cvm_ecb_des3_setkey,
+			.encrypt = cvm_encrypt,
+			.decrypt = cvm_decrypt,
 		},
 		},
 	},
 	},
 	.cra_init = cvm_enc_dec_init,
 	.cra_init = cvm_enc_dec_init,

+ 7 - 0
drivers/crypto/cavium/cpt/cptvf_algs.h

@@ -77,6 +77,11 @@ union encr_ctrl {
 	} e;
 	} e;
 };
 };
 
 
+struct cvm_cipher {
+	const char *name;
+	u8 value;
+};
+
 struct enc_context {
 struct enc_context {
 	union encr_ctrl enc_ctrl;
 	union encr_ctrl enc_ctrl;
 	u8 encr_key[32];
 	u8 encr_key[32];
@@ -96,6 +101,8 @@ struct fc_context {
 struct cvm_enc_ctx {
 struct cvm_enc_ctx {
 	u32 key_len;
 	u32 key_len;
 	u8 enc_key[MAX_KEY_SIZE];
 	u8 enc_key[MAX_KEY_SIZE];
+	u8 cipher_type:4;
+	u8 key_type:2;
 };
 };
 
 
 struct cvm_des3_ctx {
 struct cvm_des3_ctx {

+ 1 - 1
drivers/crypto/cavium/cpt/cptvf_main.c

@@ -525,7 +525,7 @@ static irqreturn_t cptvf_misc_intr_handler(int irq, void *cptvf_irq)
 	intr = cptvf_read_vf_misc_intr_status(cptvf);
 	intr = cptvf_read_vf_misc_intr_status(cptvf);
 	/*Check for MISC interrupt types*/
 	/*Check for MISC interrupt types*/
 	if (likely(intr & CPT_VF_INTR_MBOX_MASK)) {
 	if (likely(intr & CPT_VF_INTR_MBOX_MASK)) {
-		dev_err(&pdev->dev, "Mailbox interrupt 0x%llx on CPT VF %d\n",
+		dev_dbg(&pdev->dev, "Mailbox interrupt 0x%llx on CPT VF %d\n",
 			intr, cptvf->vfid);
 			intr, cptvf->vfid);
 		cptvf_handle_mbox_intr(cptvf);
 		cptvf_handle_mbox_intr(cptvf);
 		cptvf_clear_mbox_intr(cptvf);
 		cptvf_clear_mbox_intr(cptvf);

+ 21 - 0
drivers/crypto/cavium/nitrox/Kconfig

@@ -0,0 +1,21 @@
+#
+# Cavium NITROX Crypto Device configuration
+#
+config CRYPTO_DEV_NITROX
+	tristate
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_AES
+	select CRYPTO_DES
+	select FW_LOADER
+
+config CRYPTO_DEV_NITROX_CNN55XX
+	tristate "Support for Cavium CNN55XX driver"
+	depends on PCI_MSI && 64BIT
+	select CRYPTO_DEV_NITROX
+	default m
+	help
+	  Support for Cavium NITROX family CNN55XX driver
+	  for accelerating crypto workloads.
+
+	  To compile this as a module, choose M here: the module
+	  will be called n5pf.

+ 8 - 0
drivers/crypto/cavium/nitrox/Makefile

@@ -0,0 +1,8 @@
+obj-$(CONFIG_CRYPTO_DEV_NITROX_CNN55XX) += n5pf.o
+
+n5pf-objs := nitrox_main.o \
+	nitrox_isr.o \
+	nitrox_lib.o \
+	nitrox_hal.o \
+	nitrox_reqmgr.o \
+	nitrox_algs.o

+ 457 - 0
drivers/crypto/cavium/nitrox/nitrox_algs.c

@@ -0,0 +1,457 @@
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+#include <crypto/aes.h>
+#include <crypto/skcipher.h>
+#include <crypto/ctr.h>
+#include <crypto/des.h>
+#include <crypto/xts.h>
+
+#include "nitrox_dev.h"
+#include "nitrox_common.h"
+#include "nitrox_req.h"
+
+#define PRIO 4001
+
+struct nitrox_cipher {
+	const char *name;
+	enum flexi_cipher value;
+};
+
+/**
+ * supported cipher list
+ */
+static const struct nitrox_cipher flexi_cipher_table[] = {
+	{ "null",		CIPHER_NULL },
+	{ "cbc(des3_ede)",	CIPHER_3DES_CBC },
+	{ "ecb(des3_ede)",	CIPHER_3DES_ECB },
+	{ "cbc(aes)",		CIPHER_AES_CBC },
+	{ "ecb(aes)",		CIPHER_AES_ECB },
+	{ "cfb(aes)",		CIPHER_AES_CFB },
+	{ "rfc3686(ctr(aes))",	CIPHER_AES_CTR },
+	{ "xts(aes)",		CIPHER_AES_XTS },
+	{ "cts(cbc(aes))",	CIPHER_AES_CBC_CTS },
+	{ NULL,			CIPHER_INVALID }
+};
+
+static enum flexi_cipher flexi_cipher_type(const char *name)
+{
+	const struct nitrox_cipher *cipher = flexi_cipher_table;
+
+	while (cipher->name) {
+		if (!strcmp(cipher->name, name))
+			break;
+		cipher++;
+	}
+	return cipher->value;
+}
+
+static int flexi_aes_keylen(int keylen)
+{
+	int aes_keylen;
+
+	switch (keylen) {
+	case AES_KEYSIZE_128:
+		aes_keylen = 1;
+		break;
+	case AES_KEYSIZE_192:
+		aes_keylen = 2;
+		break;
+	case AES_KEYSIZE_256:
+		aes_keylen = 3;
+		break;
+	default:
+		aes_keylen = -EINVAL;
+		break;
+	}
+	return aes_keylen;
+}
+
+static int nitrox_skcipher_init(struct crypto_skcipher *tfm)
+{
+	struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(tfm);
+	void *fctx;
+
+	/* get the first device */
+	nctx->ndev = nitrox_get_first_device();
+	if (!nctx->ndev)
+		return -ENODEV;
+
+	/* allocate nitrox crypto context */
+	fctx = crypto_alloc_context(nctx->ndev);
+	if (!fctx) {
+		nitrox_put_device(nctx->ndev);
+		return -ENOMEM;
+	}
+	nctx->u.ctx_handle = (uintptr_t)fctx;
+	crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(tfm) +
+				    sizeof(struct nitrox_kcrypt_request));
+	return 0;
+}
+
+static void nitrox_skcipher_exit(struct crypto_skcipher *tfm)
+{
+	struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(tfm);
+
+	/* free the nitrox crypto context */
+	if (nctx->u.ctx_handle) {
+		struct flexi_crypto_context *fctx = nctx->u.fctx;
+
+		memset(&fctx->crypto, 0, sizeof(struct crypto_keys));
+		memset(&fctx->auth, 0, sizeof(struct auth_keys));
+		crypto_free_context((void *)fctx);
+	}
+	nitrox_put_device(nctx->ndev);
+
+	nctx->u.ctx_handle = 0;
+	nctx->ndev = NULL;
+}
+
+static inline int nitrox_skcipher_setkey(struct crypto_skcipher *cipher,
+					 int aes_keylen, const u8 *key,
+					 unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
+	struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm);
+	struct flexi_crypto_context *fctx;
+	enum flexi_cipher cipher_type;
+	const char *name;
+
+	name = crypto_tfm_alg_name(tfm);
+	cipher_type = flexi_cipher_type(name);
+	if (unlikely(cipher_type == CIPHER_INVALID)) {
+		pr_err("unsupported cipher: %s\n", name);
+		return -EINVAL;
+	}
+
+	/* fill crypto context */
+	fctx = nctx->u.fctx;
+	fctx->flags = 0;
+	fctx->w0.cipher_type = cipher_type;
+	fctx->w0.aes_keylen = aes_keylen;
+	fctx->w0.iv_source = IV_FROM_DPTR;
+	fctx->flags = cpu_to_be64(*(u64 *)&fctx->w0);
+	/* copy the key to context */
+	memcpy(fctx->crypto.u.key, key, keylen);
+
+	return 0;
+}
+
+static int nitrox_aes_setkey(struct crypto_skcipher *cipher, const u8 *key,
+			     unsigned int keylen)
+{
+	int aes_keylen;
+
+	aes_keylen = flexi_aes_keylen(keylen);
+	if (aes_keylen < 0) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	return nitrox_skcipher_setkey(cipher, aes_keylen, key, keylen);
+}
+
+static void nitrox_skcipher_callback(struct skcipher_request *skreq,
+				     int err)
+{
+	if (err) {
+		pr_err_ratelimited("request failed status 0x%0x\n", err);
+		err = -EINVAL;
+	}
+	skcipher_request_complete(skreq, err);
+}
+
+static int nitrox_skcipher_crypt(struct skcipher_request *skreq, bool enc)
+{
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(skreq);
+	struct nitrox_crypto_ctx *nctx = crypto_skcipher_ctx(cipher);
+	struct nitrox_kcrypt_request *nkreq = skcipher_request_ctx(skreq);
+	int ivsize = crypto_skcipher_ivsize(cipher);
+	struct se_crypto_request *creq;
+
+	creq = &nkreq->creq;
+	creq->flags = skreq->base.flags;
+	creq->gfp = (skreq->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		     GFP_KERNEL : GFP_ATOMIC;
+
+	/* fill the request */
+	creq->ctrl.value = 0;
+	creq->opcode = FLEXI_CRYPTO_ENCRYPT_HMAC;
+	creq->ctrl.s.arg = (enc ? ENCRYPT : DECRYPT);
+	/* param0: length of the data to be encrypted */
+	creq->gph.param0 = cpu_to_be16(skreq->cryptlen);
+	creq->gph.param1 = 0;
+	/* param2: encryption data offset */
+	creq->gph.param2 = cpu_to_be16(ivsize);
+	creq->gph.param3 = 0;
+
+	creq->ctx_handle = nctx->u.ctx_handle;
+	creq->ctrl.s.ctxl = sizeof(struct flexi_crypto_context);
+
+	/* copy the iv */
+	memcpy(creq->iv, skreq->iv, ivsize);
+	creq->ivsize = ivsize;
+	creq->src = skreq->src;
+	creq->dst = skreq->dst;
+
+	nkreq->nctx = nctx;
+	nkreq->skreq = skreq;
+
+	/* send the crypto request */
+	return nitrox_process_se_request(nctx->ndev, creq,
+					 nitrox_skcipher_callback, skreq);
+}
+
+static int nitrox_aes_encrypt(struct skcipher_request *skreq)
+{
+	return nitrox_skcipher_crypt(skreq, true);
+}
+
+static int nitrox_aes_decrypt(struct skcipher_request *skreq)
+{
+	return nitrox_skcipher_crypt(skreq, false);
+}
+
+static int nitrox_3des_setkey(struct crypto_skcipher *cipher,
+			      const u8 *key, unsigned int keylen)
+{
+	if (keylen != DES3_EDE_KEY_SIZE) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	return nitrox_skcipher_setkey(cipher, 0, key, keylen);
+}
+
+static int nitrox_3des_encrypt(struct skcipher_request *skreq)
+{
+	return nitrox_skcipher_crypt(skreq, true);
+}
+
+static int nitrox_3des_decrypt(struct skcipher_request *skreq)
+{
+	return nitrox_skcipher_crypt(skreq, false);
+}
+
+static int nitrox_aes_xts_setkey(struct crypto_skcipher *cipher,
+				 const u8 *key, unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
+	struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm);
+	struct flexi_crypto_context *fctx;
+	int aes_keylen, ret;
+
+	ret = xts_check_key(tfm, key, keylen);
+	if (ret)
+		return ret;
+
+	keylen /= 2;
+
+	aes_keylen = flexi_aes_keylen(keylen);
+	if (aes_keylen < 0) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	fctx = nctx->u.fctx;
+	/* copy KEY2 */
+	memcpy(fctx->auth.u.key2, (key + keylen), keylen);
+
+	return nitrox_skcipher_setkey(cipher, aes_keylen, key, keylen);
+}
+
+static int nitrox_aes_ctr_rfc3686_setkey(struct crypto_skcipher *cipher,
+					 const u8 *key, unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
+	struct nitrox_crypto_ctx *nctx = crypto_tfm_ctx(tfm);
+	struct flexi_crypto_context *fctx;
+	int aes_keylen;
+
+	if (keylen < CTR_RFC3686_NONCE_SIZE)
+		return -EINVAL;
+
+	fctx = nctx->u.fctx;
+
+	memcpy(fctx->crypto.iv, key + (keylen - CTR_RFC3686_NONCE_SIZE),
+	       CTR_RFC3686_NONCE_SIZE);
+
+	keylen -= CTR_RFC3686_NONCE_SIZE;
+
+	aes_keylen = flexi_aes_keylen(keylen);
+	if (aes_keylen < 0) {
+		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	return nitrox_skcipher_setkey(cipher, aes_keylen, key, keylen);
+}
+
+static struct skcipher_alg nitrox_skciphers[] = { {
+	.base = {
+		.cra_name = "cbc(aes)",
+		.cra_driver_name = "n5_cbc(aes)",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = AES_MIN_KEY_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
+	.setkey = nitrox_aes_setkey,
+	.encrypt = nitrox_aes_encrypt,
+	.decrypt = nitrox_aes_decrypt,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+}, {
+	.base = {
+		.cra_name = "ecb(aes)",
+		.cra_driver_name = "n5_ecb(aes)",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = AES_MIN_KEY_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
+	.setkey = nitrox_aes_setkey,
+	.encrypt = nitrox_aes_encrypt,
+	.decrypt = nitrox_aes_decrypt,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+}, {
+	.base = {
+		.cra_name = "cfb(aes)",
+		.cra_driver_name = "n5_cfb(aes)",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = AES_MIN_KEY_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
+	.setkey = nitrox_aes_setkey,
+	.encrypt = nitrox_aes_encrypt,
+	.decrypt = nitrox_aes_decrypt,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+}, {
+	.base = {
+		.cra_name = "xts(aes)",
+		.cra_driver_name = "n5_xts(aes)",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = 2 * AES_MIN_KEY_SIZE,
+	.max_keysize = 2 * AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
+	.setkey = nitrox_aes_xts_setkey,
+	.encrypt = nitrox_aes_encrypt,
+	.decrypt = nitrox_aes_decrypt,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+}, {
+	.base = {
+		.cra_name = "rfc3686(ctr(aes))",
+		.cra_driver_name = "n5_rfc3686(ctr(aes))",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = 1,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+	.ivsize = CTR_RFC3686_IV_SIZE,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+	.setkey = nitrox_aes_ctr_rfc3686_setkey,
+	.encrypt = nitrox_aes_encrypt,
+	.decrypt = nitrox_aes_decrypt,
+}, {
+	.base = {
+		.cra_name = "cts(cbc(aes))",
+		.cra_driver_name = "n5_cts(cbc(aes))",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_type = &crypto_ablkcipher_type,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = AES_MIN_KEY_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
+	.setkey = nitrox_aes_setkey,
+	.encrypt = nitrox_aes_encrypt,
+	.decrypt = nitrox_aes_decrypt,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+}, {
+	.base = {
+		.cra_name = "cbc(des3_ede)",
+		.cra_driver_name = "n5_cbc(des3_ede)",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = DES3_EDE_KEY_SIZE,
+	.max_keysize = DES3_EDE_KEY_SIZE,
+	.ivsize = DES3_EDE_BLOCK_SIZE,
+	.setkey = nitrox_3des_setkey,
+	.encrypt = nitrox_3des_encrypt,
+	.decrypt = nitrox_3des_decrypt,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+}, {
+	.base = {
+		.cra_name = "ecb(des3_ede)",
+		.cra_driver_name = "n5_ecb(des3_ede)",
+		.cra_priority = PRIO,
+		.cra_flags = CRYPTO_ALG_ASYNC,
+		.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct nitrox_crypto_ctx),
+		.cra_alignmask = 0,
+		.cra_module = THIS_MODULE,
+	},
+	.min_keysize = DES3_EDE_KEY_SIZE,
+	.max_keysize = DES3_EDE_KEY_SIZE,
+	.ivsize = DES3_EDE_BLOCK_SIZE,
+	.setkey = nitrox_3des_setkey,
+	.encrypt = nitrox_3des_encrypt,
+	.decrypt = nitrox_3des_decrypt,
+	.init = nitrox_skcipher_init,
+	.exit = nitrox_skcipher_exit,
+}
+
+};
+
+int nitrox_crypto_register(void)
+{
+	return crypto_register_skciphers(nitrox_skciphers,
+					 ARRAY_SIZE(nitrox_skciphers));
+}
+
+void nitrox_crypto_unregister(void)
+{
+	crypto_unregister_skciphers(nitrox_skciphers,
+				    ARRAY_SIZE(nitrox_skciphers));
+}

+ 42 - 0
drivers/crypto/cavium/nitrox/nitrox_common.h

@@ -0,0 +1,42 @@
+#ifndef __NITROX_COMMON_H
+#define __NITROX_COMMON_H
+
+#include "nitrox_dev.h"
+#include "nitrox_req.h"
+
+int nitrox_crypto_register(void);
+void nitrox_crypto_unregister(void);
+void *crypto_alloc_context(struct nitrox_device *ndev);
+void crypto_free_context(void *ctx);
+struct nitrox_device *nitrox_get_first_device(void);
+void nitrox_put_device(struct nitrox_device *ndev);
+
+void nitrox_pf_cleanup_isr(struct nitrox_device *ndev);
+int nitrox_pf_init_isr(struct nitrox_device *ndev);
+
+int nitrox_common_sw_init(struct nitrox_device *ndev);
+void nitrox_common_sw_cleanup(struct nitrox_device *ndev);
+
+void pkt_slc_resp_handler(unsigned long data);
+int nitrox_process_se_request(struct nitrox_device *ndev,
+			      struct se_crypto_request *req,
+			      completion_t cb,
+			      struct skcipher_request *skreq);
+void backlog_qflush_work(struct work_struct *work);
+
+void nitrox_config_emu_unit(struct nitrox_device *ndev);
+void nitrox_config_pkt_input_rings(struct nitrox_device *ndev);
+void nitrox_config_pkt_solicit_ports(struct nitrox_device *ndev);
+void nitrox_config_vfmode(struct nitrox_device *ndev, int mode);
+void nitrox_config_nps_unit(struct nitrox_device *ndev);
+void nitrox_config_pom_unit(struct nitrox_device *ndev);
+void nitrox_config_rand_unit(struct nitrox_device *ndev);
+void nitrox_config_efl_unit(struct nitrox_device *ndev);
+void nitrox_config_bmi_unit(struct nitrox_device *ndev);
+void nitrox_config_bmo_unit(struct nitrox_device *ndev);
+void nitrox_config_lbc_unit(struct nitrox_device *ndev);
+void invalidate_lbc(struct nitrox_device *ndev);
+void enable_pkt_input_ring(struct nitrox_device *ndev, int ring);
+void enable_pkt_solicit_port(struct nitrox_device *ndev, int port);
+
+#endif /* __NITROX_COMMON_H */

+ 1084 - 0
drivers/crypto/cavium/nitrox/nitrox_csr.h

@@ -0,0 +1,1084 @@
+#ifndef __NITROX_CSR_H
+#define __NITROX_CSR_H
+
+#include <asm/byteorder.h>
+#include <linux/types.h>
+
+/* EMU clusters */
+#define NR_CLUSTERS		4
+#define AE_CORES_PER_CLUSTER	20
+#define SE_CORES_PER_CLUSTER	16
+
+/* BIST registers */
+#define EMU_BIST_STATUSX(_i)	(0x1402700 + ((_i) * 0x40000))
+#define UCD_BIST_STATUS		0x12C0070
+#define NPS_CORE_BIST_REG	0x10000E8
+#define NPS_CORE_NPC_BIST_REG	0x1000128
+#define NPS_PKT_SLC_BIST_REG	0x1040088
+#define NPS_PKT_IN_BIST_REG	0x1040100
+#define POM_BIST_REG		0x11C0100
+#define BMI_BIST_REG		0x1140080
+#define EFL_CORE_BIST_REGX(_i)	(0x1240100 + ((_i) * 0x400))
+#define EFL_TOP_BIST_STAT	0x1241090
+#define BMO_BIST_REG		0x1180080
+#define LBC_BIST_STATUS		0x1200020
+#define PEM_BIST_STATUSX(_i)	(0x1080468 | ((_i) << 18))
+
+/* EMU registers */
+#define EMU_SE_ENABLEX(_i)	(0x1400000 + ((_i) * 0x40000))
+#define EMU_AE_ENABLEX(_i)	(0x1400008 + ((_i) * 0x40000))
+#define EMU_WD_INT_ENA_W1SX(_i)	(0x1402318 + ((_i) * 0x40000))
+#define EMU_GE_INT_ENA_W1SX(_i)	(0x1402518 + ((_i) * 0x40000))
+#define EMU_FUSE_MAPX(_i)	(0x1402708 + ((_i) * 0x40000))
+
+/* UCD registers */
+#define UCD_UCODE_LOAD_BLOCK_NUM	0x12C0010
+#define UCD_UCODE_LOAD_IDX_DATAX(_i)	(0x12C0018 + ((_i) * 0x20))
+#define UCD_SE_EID_UCODE_BLOCK_NUMX(_i)	(0x12C0000 + ((_i) * 0x1000))
+
+/* NPS core registers */
+#define NPS_CORE_GBL_VFCFG	0x1000000
+#define NPS_CORE_CONTROL	0x1000008
+#define NPS_CORE_INT_ACTIVE	0x1000080
+#define NPS_CORE_INT		0x10000A0
+#define NPS_CORE_INT_ENA_W1S	0x10000B8
+#define NPS_STATS_PKT_DMA_RD_CNT	0x1000180
+#define NPS_STATS_PKT_DMA_WR_CNT	0x1000190
+
+/* NPS packet registers */
+#define NPS_PKT_INT				0x1040018
+#define NPS_PKT_IN_RERR_HI		0x1040108
+#define NPS_PKT_IN_RERR_HI_ENA_W1S	0x1040120
+#define NPS_PKT_IN_RERR_LO		0x1040128
+#define NPS_PKT_IN_RERR_LO_ENA_W1S	0x1040140
+#define NPS_PKT_IN_ERR_TYPE		0x1040148
+#define NPS_PKT_IN_ERR_TYPE_ENA_W1S	0x1040160
+#define NPS_PKT_IN_INSTR_CTLX(_i)	(0x10060 + ((_i) * 0x40000))
+#define NPS_PKT_IN_INSTR_BADDRX(_i)	(0x10068 + ((_i) * 0x40000))
+#define NPS_PKT_IN_INSTR_RSIZEX(_i)	(0x10070 + ((_i) * 0x40000))
+#define NPS_PKT_IN_DONE_CNTSX(_i)	(0x10080 + ((_i) * 0x40000))
+#define NPS_PKT_IN_INSTR_BAOFF_DBELLX(_i)	(0x10078 + ((_i) * 0x40000))
+#define NPS_PKT_IN_INT_LEVELSX(_i)		(0x10088 + ((_i) * 0x40000))
+
+#define NPS_PKT_SLC_RERR_HI		0x1040208
+#define NPS_PKT_SLC_RERR_HI_ENA_W1S	0x1040220
+#define NPS_PKT_SLC_RERR_LO		0x1040228
+#define NPS_PKT_SLC_RERR_LO_ENA_W1S	0x1040240
+#define NPS_PKT_SLC_ERR_TYPE		0x1040248
+#define NPS_PKT_SLC_ERR_TYPE_ENA_W1S	0x1040260
+#define NPS_PKT_SLC_CTLX(_i)		(0x10000 + ((_i) * 0x40000))
+#define NPS_PKT_SLC_CNTSX(_i)		(0x10008 + ((_i) * 0x40000))
+#define NPS_PKT_SLC_INT_LEVELSX(_i)	(0x10010 + ((_i) * 0x40000))
+
+/* POM registers */
+#define POM_INT_ENA_W1S		0x11C0018
+#define POM_GRP_EXECMASKX(_i)	(0x11C1100 | ((_i) * 8))
+#define POM_INT		0x11C0000
+#define POM_PERF_CTL	0x11CC400
+
+/* BMI registers */
+#define BMI_INT		0x1140000
+#define BMI_CTL		0x1140020
+#define BMI_INT_ENA_W1S	0x1140018
+#define BMI_NPS_PKT_CNT	0x1140070
+
+/* EFL registers */
+#define EFL_CORE_INT_ENA_W1SX(_i)		(0x1240018 + ((_i) * 0x400))
+#define EFL_CORE_VF_ERR_INT0X(_i)		(0x1240050 + ((_i) * 0x400))
+#define EFL_CORE_VF_ERR_INT0_ENA_W1SX(_i)	(0x1240068 + ((_i) * 0x400))
+#define EFL_CORE_VF_ERR_INT1X(_i)		(0x1240070 + ((_i) * 0x400))
+#define EFL_CORE_VF_ERR_INT1_ENA_W1SX(_i)	(0x1240088 + ((_i) * 0x400))
+#define EFL_CORE_SE_ERR_INTX(_i)		(0x12400A0 + ((_i) * 0x400))
+#define EFL_RNM_CTL_STATUS			0x1241800
+#define EFL_CORE_INTX(_i)			(0x1240000 + ((_i) * 0x400))
+
+/* BMO registers */
+#define BMO_CTL2		0x1180028
+#define BMO_NPS_SLC_PKT_CNT	0x1180078
+
+/* LBC registers */
+#define LBC_INT			0x1200000
+#define LBC_INVAL_CTL		0x1201010
+#define LBC_PLM_VF1_64_INT	0x1202008
+#define LBC_INVAL_STATUS	0x1202010
+#define LBC_INT_ENA_W1S		0x1203000
+#define LBC_PLM_VF1_64_INT_ENA_W1S	0x1205008
+#define LBC_PLM_VF65_128_INT		0x1206008
+#define LBC_ELM_VF1_64_INT		0x1208000
+#define LBC_PLM_VF65_128_INT_ENA_W1S	0x1209008
+#define LBC_ELM_VF1_64_INT_ENA_W1S	0x120B000
+#define LBC_ELM_VF65_128_INT		0x120C000
+#define LBC_ELM_VF65_128_INT_ENA_W1S	0x120F000
+
+/* PEM registers */
+#define PEM0_INT 0x1080428
+
+/**
+ * struct emu_fuse_map - EMU Fuse Map Registers
+ * @ae_fuse: Fuse settings for AE 19..0
+ * @se_fuse: Fuse settings for SE 15..0
+ *
+ * A set bit indicates the unit is fuse disabled.
+ */
+union emu_fuse_map {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 valid : 1;
+		u64 raz_52_62 : 11;
+		u64 ae_fuse : 20;
+		u64 raz_16_31 : 16;
+		u64 se_fuse : 16;
+#else
+		u64 se_fuse : 16;
+		u64 raz_16_31 : 16;
+		u64 ae_fuse : 20;
+		u64 raz_52_62 : 11;
+		u64 valid : 1;
+#endif
+	} s;
+};
+
+/**
+ * struct emu_se_enable - Symmetric Engine Enable Registers
+ * @enable: Individual enables for each of the clusters
+ *   16 symmetric engines.
+ */
+union emu_se_enable {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz	: 48;
+		u64 enable : 16;
+#else
+		u64 enable : 16;
+		u64 raz	: 48;
+#endif
+	} s;
+};
+
+/**
+ * struct emu_ae_enable - EMU Asymmetric engines.
+ * @enable: Individual enables for each of the cluster's
+ *   20 Asymmetric Engines.
+ */
+union emu_ae_enable {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz	: 44;
+		u64 enable : 20;
+#else
+		u64 enable : 20;
+		u64 raz	: 44;
+#endif
+	} s;
+};
+
+/**
+ * struct emu_wd_int_ena_w1s - EMU Interrupt Enable Registers
+ * @ae_wd: Reads or sets enable for EMU(0..3)_WD_INT[AE_WD]
+ * @se_wd: Reads or sets enable for EMU(0..3)_WD_INT[SE_WD]
+ */
+union emu_wd_int_ena_w1s {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz2 : 12;
+		u64 ae_wd : 20;
+		u64 raz1 : 16;
+		u64 se_wd : 16;
+#else
+		u64 se_wd : 16;
+		u64 raz1 : 16;
+		u64 ae_wd : 20;
+		u64 raz2 : 12;
+#endif
+	} s;
+};
+
+/**
+ * struct emu_ge_int_ena_w1s - EMU Interrupt Enable set registers
+ * @ae_ge: Reads or sets enable for EMU(0..3)_GE_INT[AE_GE]
+ * @se_ge: Reads or sets enable for EMU(0..3)_GE_INT[SE_GE]
+ */
+union emu_ge_int_ena_w1s {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_52_63 : 12;
+		u64 ae_ge : 20;
+		u64 raz_16_31: 16;
+		u64 se_ge : 16;
+#else
+		u64 se_ge : 16;
+		u64 raz_16_31: 16;
+		u64 ae_ge : 20;
+		u64 raz_52_63 : 12;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_slc_ctl - Solicited Packet Out Control Registers
+ * @rh: Indicates whether to remove or include the response header
+ *   1 = Include, 0 = Remove
+ * @z: If set, 8 trailing 0x00 bytes will be added to the end of the
+ *   outgoing packet.
+ * @enb: Enable for this port.
+ */
+union nps_pkt_slc_ctl {
+	u64 value;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 raz : 61;
+		u64 rh : 1;
+		u64 z : 1;
+		u64 enb : 1;
+#else
+		u64 enb : 1;
+		u64 z : 1;
+		u64 rh : 1;
+		u64 raz : 61;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_slc_cnts - Solicited Packet Out Count Registers
+ * @slc_int: Returns a 1 when:
+ *   NPS_PKT_SLC(i)_CNTS[CNT] > NPS_PKT_SLC(i)_INT_LEVELS[CNT], or
+ *   NPS_PKT_SLC(i)_CNTS[TIMER] > NPS_PKT_SLC(i)_INT_LEVELS[TIMET].
+ *   To clear the bit, the CNTS register must be written to clear.
+ * @in_int: Returns a 1 when:
+ *   NPS_PKT_IN(i)_DONE_CNTS[CNT] > NPS_PKT_IN(i)_INT_LEVELS[CNT].
+ *   To clear the bit, the DONE_CNTS register must be written to clear.
+ * @mbox_int: Returns a 1 when:
+ *   NPS_PKT_MBOX_PF_VF(i)_INT[INTR] is set. To clear the bit,
+ *   write NPS_PKT_MBOX_PF_VF(i)_INT[INTR] with 1.
+ * @timer: Timer, incremented every 2048 coprocessor clock cycles
+ *   when [CNT] is not zero. The hardware clears both [TIMER] and
+ *   [INT] when [CNT] goes to 0.
+ * @cnt: Packet counter. Hardware adds to [CNT] as it sends packets out.
+ *   On a write to this CSR, hardware subtracts the amount written to the
+ *   [CNT] field from [CNT].
+ */
+union nps_pkt_slc_cnts {
+	u64 value;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 slc_int : 1;
+		u64 uns_int : 1;
+		u64 in_int : 1;
+		u64 mbox_int : 1;
+		u64 resend : 1;
+		u64 raz : 5;
+		u64 timer : 22;
+		u64 cnt : 32;
+#else
+		u64 cnt	: 32;
+		u64 timer : 22;
+		u64 raz	: 5;
+		u64 resend : 1;
+		u64 mbox_int : 1;
+		u64 in_int : 1;
+		u64 uns_int : 1;
+		u64 slc_int : 1;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_slc_int_levels - Solicited Packet Out Interrupt Levels
+ *   Registers.
+ * @bmode: Determines whether NPS_PKT_SLC_CNTS[CNT] is a byte or
+ *   packet counter.
+ * @timet: Output port counter time interrupt threshold.
+ * @cnt: Output port counter interrupt threshold.
+ */
+union nps_pkt_slc_int_levels {
+	u64 value;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 bmode : 1;
+		u64 raz	: 9;
+		u64 timet : 22;
+		u64 cnt	: 32;
+#else
+		u64 cnt : 32;
+		u64 timet : 22;
+		u64 raz : 9;
+		u64 bmode : 1;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_inst - NPS Packet Interrupt Register
+ * @in_err: Set when any NPS_PKT_IN_RERR_HI/LO bit and
+ *    corresponding NPS_PKT_IN_RERR_*_ENA_* bit are bot set.
+ * @uns_err: Set when any NSP_PKT_UNS_RERR_HI/LO bit and
+ *    corresponding NPS_PKT_UNS_RERR_*_ENA_* bit are both set.
+ * @slc_er: Set when any NSP_PKT_SLC_RERR_HI/LO bit and
+ *    corresponding NPS_PKT_SLC_RERR_*_ENA_* bit are both set.
+ */
+union nps_pkt_int {
+	u64 value;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 raz	: 54;
+		u64 uns_wto : 1;
+		u64 in_err : 1;
+		u64 uns_err : 1;
+		u64 slc_err : 1;
+		u64 in_dbe : 1;
+		u64 in_sbe : 1;
+		u64 uns_dbe : 1;
+		u64 uns_sbe : 1;
+		u64 slc_dbe : 1;
+		u64 slc_sbe : 1;
+#else
+		u64 slc_sbe : 1;
+		u64 slc_dbe : 1;
+		u64 uns_sbe : 1;
+		u64 uns_dbe : 1;
+		u64 in_sbe : 1;
+		u64 in_dbe : 1;
+		u64 slc_err : 1;
+		u64 uns_err : 1;
+		u64 in_err : 1;
+		u64 uns_wto : 1;
+		u64 raz	: 54;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_in_done_cnts - Input instruction ring counts registers
+ * @slc_cnt: Returns a 1 when:
+ *    NPS_PKT_SLC(i)_CNTS[CNT] > NPS_PKT_SLC(i)_INT_LEVELS[CNT], or
+ *    NPS_PKT_SLC(i)_CNTS[TIMER] > NPS_PKT_SCL(i)_INT_LEVELS[TIMET]
+ *    To clear the bit, the CNTS register must be
+ *    written to clear the underlying condition
+ * @uns_int: Return a 1 when:
+ *    NPS_PKT_UNS(i)_CNTS[CNT] > NPS_PKT_UNS(i)_INT_LEVELS[CNT], or
+ *    NPS_PKT_UNS(i)_CNTS[TIMER] > NPS_PKT_UNS(i)_INT_LEVELS[TIMET]
+ *    To clear the bit, the CNTS register must be
+ *    written to clear the underlying condition
+ * @in_int: Returns a 1 when:
+ *    NPS_PKT_IN(i)_DONE_CNTS[CNT] > NPS_PKT_IN(i)_INT_LEVELS[CNT]
+ *    To clear the bit, the DONE_CNTS register
+ *    must be written to clear the underlying condition
+ * @mbox_int: Returns a 1 when:
+ *    NPS_PKT_MBOX_PF_VF(i)_INT[INTR] is set.
+ *    To clear the bit, write NPS_PKT_MBOX_PF_VF(i)_INT[INTR]
+ *    with 1.
+ * @resend: A write of 1 will resend an MSI-X interrupt message if any
+ *    of the following conditions are true for this ring "i".
+ *    NPS_PKT_SLC(i)_CNTS[CNT] > NPS_PKT_SLC(i)_INT_LEVELS[CNT]
+ *    NPS_PKT_SLC(i)_CNTS[TIMER] > NPS_PKT_SLC(i)_INT_LEVELS[TIMET]
+ *    NPS_PKT_UNS(i)_CNTS[CNT] > NPS_PKT_UNS(i)_INT_LEVELS[CNT]
+ *    NPS_PKT_UNS(i)_CNTS[TIMER] > NPS_PKT_UNS(i)_INT_LEVELS[TIMET]
+ *    NPS_PKT_IN(i)_DONE_CNTS[CNT] > NPS_PKT_IN(i)_INT_LEVELS[CNT]
+ *    NPS_PKT_MBOX_PF_VF(i)_INT[INTR] is set
+ * @cnt: Packet counter. Hardware adds to [CNT] as it reads
+ *    packets. On a write to this CSR, hardware substracts the
+ *    amount written to the [CNT] field from [CNT], which will
+ *    clear PKT_IN(i)_INT_STATUS[INTR] if [CNT] becomes <=
+ *    NPS_PKT_IN(i)_INT_LEVELS[CNT]. This register should be
+ *    cleared before enabling a ring by reading the current
+ *    value and writing it back.
+ */
+union nps_pkt_in_done_cnts {
+	u64 value;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 slc_int : 1;
+		u64 uns_int : 1;
+		u64 in_int : 1;
+		u64 mbox_int : 1;
+		u64 resend : 1;
+		u64 raz : 27;
+		u64 cnt	: 32;
+#else
+		u64 cnt	: 32;
+		u64 raz	: 27;
+		u64 resend : 1;
+		u64 mbox_int : 1;
+		u64 in_int : 1;
+		u64 uns_int : 1;
+		u64 slc_int : 1;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_in_instr_ctl - Input Instruction Ring Control Registers.
+ * @is64b: If 1, the ring uses 64-byte instructions. If 0, the
+ *   ring uses 32-byte instructions.
+ * @enb: Enable for the input ring.
+ */
+union nps_pkt_in_instr_ctl {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz	: 62;
+		u64 is64b : 1;
+		u64 enb	: 1;
+#else
+		u64 enb	: 1;
+		u64 is64b : 1;
+		u64 raz : 62;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_in_instr_rsize - Input instruction ring size registers
+ * @rsize: Ring size (number of instructions)
+ */
+union nps_pkt_in_instr_rsize {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz	: 32;
+		u64 rsize : 32;
+#else
+		u64 rsize : 32;
+		u64 raz	: 32;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_pkt_in_instr_baoff_dbell - Input instruction ring
+ *   base address offset and doorbell registers
+ * @aoff: Address offset. The offset from the NPS_PKT_IN_INSTR_BADDR
+ *   where the next pointer is read.
+ * @dbell: Pointer list doorbell count. Write operations to this field
+ *   increments the present value here. Read operations return the
+ *   present value.
+ */
+union nps_pkt_in_instr_baoff_dbell {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 aoff : 32;
+		u64 dbell : 32;
+#else
+		u64 dbell : 32;
+		u64 aoff : 32;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_core_int_ena_w1s - NPS core interrupt enable set register
+ * @host_nps_wr_err: Reads or sets enable for
+ *   NPS_CORE_INT[HOST_NPS_WR_ERR].
+ * @npco_dma_malform: Reads or sets enable for
+ *   NPS_CORE_INT[NPCO_DMA_MALFORM].
+ * @exec_wr_timeout: Reads or sets enable for
+ *   NPS_CORE_INT[EXEC_WR_TIMEOUT].
+ * @host_wr_timeout: Reads or sets enable for
+ *   NPS_CORE_INT[HOST_WR_TIMEOUT].
+ * @host_wr_err: Reads or sets enable for
+ *   NPS_CORE_INT[HOST_WR_ERR]
+ */
+union nps_core_int_ena_w1s {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz4 : 55;
+		u64 host_nps_wr_err : 1;
+		u64 npco_dma_malform : 1;
+		u64 exec_wr_timeout : 1;
+		u64 host_wr_timeout : 1;
+		u64 host_wr_err : 1;
+		u64 raz3 : 1;
+		u64 raz2 : 1;
+		u64 raz1 : 1;
+		u64 raz0 : 1;
+#else
+		u64 raz0 : 1;
+		u64 raz1 : 1;
+		u64 raz2 : 1;
+		u64 raz3 : 1;
+		u64 host_wr_err	: 1;
+		u64 host_wr_timeout : 1;
+		u64 exec_wr_timeout : 1;
+		u64 npco_dma_malform : 1;
+		u64 host_nps_wr_err : 1;
+		u64 raz4 : 55;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_core_gbl_vfcfg - Global VF Configuration Register.
+ * @ilk_disable: When set, this bit indicates that the ILK interface has
+ *    been disabled.
+ * @obaf: BMO allocation control
+ *    0 = allocate per queue
+ *    1 = allocate per VF
+ * @ibaf: BMI allocation control
+ *    0 = allocate per queue
+ *    1 = allocate per VF
+ * @zaf: ZIP allocation control
+ *    0 = allocate per queue
+ *    1 = allocate per VF
+ * @aeaf: AE allocation control
+ *    0 = allocate per queue
+ *    1 = allocate per VF
+ * @seaf: SE allocation control
+ *    0 = allocation per queue
+ *    1 = allocate per VF
+ * @cfg: VF/PF mode.
+ */
+union nps_core_gbl_vfcfg {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64  raz :55;
+		u64  ilk_disable :1;
+		u64  obaf :1;
+		u64  ibaf :1;
+		u64  zaf :1;
+		u64  aeaf :1;
+		u64  seaf :1;
+		u64  cfg :3;
+#else
+		u64  cfg :3;
+		u64  seaf :1;
+		u64  aeaf :1;
+		u64  zaf :1;
+		u64  ibaf :1;
+		u64  obaf :1;
+		u64  ilk_disable :1;
+		u64  raz :55;
+#endif
+	} s;
+};
+
+/**
+ * struct nps_core_int_active - NPS Core Interrupt Active Register
+ * @resend: Resend MSI-X interrupt if needs to handle interrupts
+ *    Sofware can set this bit and then exit the ISR.
+ * @ocla: Set when any OCLA(0)_INT and corresponding OCLA(0_INT_ENA_W1C
+ *    bit are set
+ * @mbox: Set when any NPS_PKT_MBOX_INT_LO/HI and corresponding
+ *    NPS_PKT_MBOX_INT_LO_ENA_W1C/HI_ENA_W1C bits are set
+ * @emu: bit i is set in [EMU] when any EMU(i)_INT bit is set
+ * @bmo: Set when any BMO_INT bit is set
+ * @bmi: Set when any BMI_INT bit is set or when any non-RO
+ *    BMI_INT and corresponding BMI_INT_ENA_W1C bits are both set
+ * @aqm: Set when any AQM_INT bit is set
+ * @zqm: Set when any ZQM_INT bit is set
+ * @efl: Set when any EFL_INT RO bit is set or when any non-RO EFL_INT
+ *    and corresponding EFL_INT_ENA_W1C bits are both set
+ * @ilk: Set when any ILK_INT bit is set
+ * @lbc: Set when any LBC_INT RO bit is set or when any non-RO LBC_INT
+ *    and corresponding LBC_INT_ENA_W1C bits are bot set
+ * @pem: Set when any PEM(0)_INT RO bit is set or when any non-RO
+ *    PEM(0)_INT and corresponding PEM(0)_INT_ENA_W1C bit are both set
+ * @ucd: Set when any UCD_INT bit is set
+ * @zctl: Set when any ZIP_INT RO bit is set or when any non-RO ZIP_INT
+ *    and corresponding ZIP_INT_ENA_W1C bits are both set
+ * @lbm: Set when any LBM_INT bit is set
+ * @nps_pkt: Set when any NPS_PKT_INT bit is set
+ * @nps_core: Set when any NPS_CORE_INT RO bit is set or when non-RO
+ *    NPS_CORE_INT and corresponding NSP_CORE_INT_ENA_W1C bits are both set
+ */
+union nps_core_int_active {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 resend : 1;
+		u64 raz	: 43;
+		u64 ocla : 1;
+		u64 mbox : 1;
+		u64 emu	: 4;
+		u64 bmo	: 1;
+		u64 bmi	: 1;
+		u64 aqm	: 1;
+		u64 zqm	: 1;
+		u64 efl	: 1;
+		u64 ilk	: 1;
+		u64 lbc	: 1;
+		u64 pem	: 1;
+		u64 pom	: 1;
+		u64 ucd	: 1;
+		u64 zctl : 1;
+		u64 lbm	: 1;
+		u64 nps_pkt : 1;
+		u64 nps_core : 1;
+#else
+		u64 nps_core : 1;
+		u64 nps_pkt : 1;
+		u64 lbm	: 1;
+		u64 zctl: 1;
+		u64 ucd	: 1;
+		u64 pom	: 1;
+		u64 pem	: 1;
+		u64 lbc	: 1;
+		u64 ilk	: 1;
+		u64 efl	: 1;
+		u64 zqm	: 1;
+		u64 aqm	: 1;
+		u64 bmi	: 1;
+		u64 bmo	: 1;
+		u64 emu	: 4;
+		u64 mbox : 1;
+		u64 ocla : 1;
+		u64 raz	: 43;
+		u64 resend : 1;
+#endif
+	} s;
+};
+
+/**
+ * struct efl_core_int - EFL Interrupt Registers
+ * @epci_decode_err: EPCI decoded a transacation that was unknown
+ *    This error should only occurred when there is a micrcode/SE error
+ *    and should be considered fatal
+ * @ae_err: An AE uncorrectable error occurred.
+ *    See EFL_CORE(0..3)_AE_ERR_INT
+ * @se_err: An SE uncorrectable error occurred.
+ *    See EFL_CORE(0..3)_SE_ERR_INT
+ * @dbe: Double-bit error occurred in EFL
+ * @sbe: Single-bit error occurred in EFL
+ * @d_left: Asserted when new POM-Header-BMI-data is
+ *    being sent to an Exec, and that Exec has Not read all BMI
+ *    data associated with the previous POM header
+ * @len_ovr: Asserted when an Exec-Read is issued that is more than
+ *    14 greater in length that the BMI data left to be read
+ */
+union efl_core_int {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz	: 57;
+		u64 epci_decode_err : 1;
+		u64 ae_err : 1;
+		u64 se_err : 1;
+		u64 dbe	: 1;
+		u64 sbe	: 1;
+		u64 d_left : 1;
+		u64 len_ovr : 1;
+#else
+		u64 len_ovr : 1;
+		u64 d_left : 1;
+		u64 sbe	: 1;
+		u64 dbe	: 1;
+		u64 se_err : 1;
+		u64 ae_err : 1;
+		u64 epci_decode_err  : 1;
+		u64 raz	: 57;
+#endif
+	} s;
+};
+
+/**
+ * struct efl_core_int_ena_w1s - EFL core interrupt enable set register
+ * @epci_decode_err: Reads or sets enable for
+ *   EFL_CORE(0..3)_INT[EPCI_DECODE_ERR].
+ * @d_left: Reads or sets enable for
+ *   EFL_CORE(0..3)_INT[D_LEFT].
+ * @len_ovr: Reads or sets enable for
+ *   EFL_CORE(0..3)_INT[LEN_OVR].
+ */
+union efl_core_int_ena_w1s {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_7_63 : 57;
+		u64 epci_decode_err : 1;
+		u64 raz_2_5 : 4;
+		u64 d_left : 1;
+		u64 len_ovr : 1;
+#else
+		u64 len_ovr : 1;
+		u64 d_left : 1;
+		u64 raz_2_5 : 4;
+		u64 epci_decode_err : 1;
+		u64 raz_7_63 : 57;
+#endif
+	} s;
+};
+
+/**
+ * struct efl_rnm_ctl_status - RNM Control and Status Register
+ * @ent_sel: Select input to RNM FIFO
+ * @exp_ent: Exported entropy enable for random number generator
+ * @rng_rst: Reset to RNG. Setting this bit to 1 cancels the generation
+ *    of the current random number.
+ * @rnm_rst: Reset the RNM. Setting this bit to 1 clears all sorted numbers
+ *    in the random number memory.
+ * @rng_en: Enabled the output of the RNG.
+ * @ent_en: Entropy enable for random number generator.
+ */
+union efl_rnm_ctl_status {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_9_63 : 55;
+		u64 ent_sel : 4;
+		u64 exp_ent : 1;
+		u64 rng_rst : 1;
+		u64 rnm_rst : 1;
+		u64 rng_en : 1;
+		u64 ent_en : 1;
+#else
+		u64 ent_en : 1;
+		u64 rng_en : 1;
+		u64 rnm_rst : 1;
+		u64 rng_rst : 1;
+		u64 exp_ent : 1;
+		u64 ent_sel : 4;
+		u64 raz_9_63 : 55;
+#endif
+	} s;
+};
+
+/**
+ * struct bmi_ctl - BMI control register
+ * @ilk_hdrq_thrsh: Maximum number of header queue locations
+ *   that ILK packets may consume. When the threshold is
+ *   exceeded ILK_XOFF is sent to the BMI_X2P_ARB.
+ * @nps_hdrq_thrsh: Maximum number of header queue locations
+ *   that NPS packets may consume. When the threshold is
+ *   exceeded NPS_XOFF is sent to the BMI_X2P_ARB.
+ * @totl_hdrq_thrsh: Maximum number of header queue locations
+ *   that the sum of ILK and NPS packets may consume.
+ * @ilk_free_thrsh: Maximum number of buffers that ILK packet
+ *   flows may consume before ILK_XOFF is sent to the BMI_X2P_ARB.
+ * @nps_free_thrsh: Maximum number of buffers that NPS packet
+ *   flows may consume before NPS XOFF is sent to the BMI_X2p_ARB.
+ * @totl_free_thrsh: Maximum number of buffers that bot ILK and NPS
+ *   packet flows may consume before both NPS_XOFF and ILK_XOFF
+ *   are asserted to the BMI_X2P_ARB.
+ * @max_pkt_len: Maximum packet length, integral number of 256B
+ *   buffers.
+ */
+union bmi_ctl {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_56_63 : 8;
+		u64 ilk_hdrq_thrsh : 8;
+		u64 nps_hdrq_thrsh : 8;
+		u64 totl_hdrq_thrsh : 8;
+		u64 ilk_free_thrsh : 8;
+		u64 nps_free_thrsh : 8;
+		u64 totl_free_thrsh : 8;
+		u64 max_pkt_len : 8;
+#else
+		u64 max_pkt_len : 8;
+		u64 totl_free_thrsh : 8;
+		u64 nps_free_thrsh : 8;
+		u64 ilk_free_thrsh : 8;
+		u64 totl_hdrq_thrsh : 8;
+		u64 nps_hdrq_thrsh : 8;
+		u64 ilk_hdrq_thrsh : 8;
+		u64 raz_56_63 : 8;
+#endif
+	} s;
+};
+
+/**
+ * struct bmi_int_ena_w1s - BMI interrupt enable set register
+ * @ilk_req_oflw: Reads or sets enable for
+ *   BMI_INT[ILK_REQ_OFLW].
+ * @nps_req_oflw: Reads or sets enable for
+ *   BMI_INT[NPS_REQ_OFLW].
+ * @fpf_undrrn: Reads or sets enable for
+ *   BMI_INT[FPF_UNDRRN].
+ * @eop_err_ilk: Reads or sets enable for
+ *   BMI_INT[EOP_ERR_ILK].
+ * @eop_err_nps: Reads or sets enable for
+ *   BMI_INT[EOP_ERR_NPS].
+ * @sop_err_ilk: Reads or sets enable for
+ *   BMI_INT[SOP_ERR_ILK].
+ * @sop_err_nps: Reads or sets enable for
+ *   BMI_INT[SOP_ERR_NPS].
+ * @pkt_rcv_err_ilk: Reads or sets enable for
+ *   BMI_INT[PKT_RCV_ERR_ILK].
+ * @pkt_rcv_err_nps: Reads or sets enable for
+ *   BMI_INT[PKT_RCV_ERR_NPS].
+ * @max_len_err_ilk: Reads or sets enable for
+ *   BMI_INT[MAX_LEN_ERR_ILK].
+ * @max_len_err_nps: Reads or sets enable for
+ *   BMI_INT[MAX_LEN_ERR_NPS].
+ */
+union bmi_int_ena_w1s {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_13_63	: 51;
+		u64 ilk_req_oflw : 1;
+		u64 nps_req_oflw : 1;
+		u64 raz_10 : 1;
+		u64 raz_9 : 1;
+		u64 fpf_undrrn	: 1;
+		u64 eop_err_ilk	: 1;
+		u64 eop_err_nps	: 1;
+		u64 sop_err_ilk	: 1;
+		u64 sop_err_nps	: 1;
+		u64 pkt_rcv_err_ilk : 1;
+		u64 pkt_rcv_err_nps : 1;
+		u64 max_len_err_ilk : 1;
+		u64 max_len_err_nps : 1;
+#else
+		u64 max_len_err_nps : 1;
+		u64 max_len_err_ilk : 1;
+		u64 pkt_rcv_err_nps : 1;
+		u64 pkt_rcv_err_ilk : 1;
+		u64 sop_err_nps	: 1;
+		u64 sop_err_ilk	: 1;
+		u64 eop_err_nps	: 1;
+		u64 eop_err_ilk	: 1;
+		u64 fpf_undrrn	: 1;
+		u64 raz_9 : 1;
+		u64 raz_10 : 1;
+		u64 nps_req_oflw : 1;
+		u64 ilk_req_oflw : 1;
+		u64 raz_13_63 : 51;
+#endif
+	} s;
+};
+
+/**
+ * struct bmo_ctl2 - BMO Control2 Register
+ * @arb_sel: Determines P2X Arbitration
+ * @ilk_buf_thrsh: Maximum number of buffers that the
+ *    ILK packet flows may consume before ILK XOFF is
+ *    asserted to the POM.
+ * @nps_slc_buf_thrsh: Maximum number of buffers that the
+ *    NPS_SLC packet flow may consume before NPS_SLC XOFF is
+ *    asserted to the POM.
+ * @nps_uns_buf_thrsh: Maximum number of buffers that the
+ *    NPS_UNS packet flow may consume before NPS_UNS XOFF is
+ *    asserted to the POM.
+ * @totl_buf_thrsh: Maximum number of buffers that ILK, NPS_UNS and
+ *    NPS_SLC packet flows may consume before NPS_UNS XOFF, NSP_SLC and
+ *    ILK_XOFF are all asserted POM.
+ */
+union bmo_ctl2 {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 arb_sel : 1;
+		u64 raz_32_62 : 31;
+		u64 ilk_buf_thrsh : 8;
+		u64 nps_slc_buf_thrsh : 8;
+		u64 nps_uns_buf_thrsh : 8;
+		u64 totl_buf_thrsh : 8;
+#else
+		u64 totl_buf_thrsh : 8;
+		u64 nps_uns_buf_thrsh : 8;
+		u64 nps_slc_buf_thrsh : 8;
+		u64 ilk_buf_thrsh : 8;
+		u64 raz_32_62 : 31;
+		u64 arb_sel : 1;
+#endif
+	} s;
+};
+
+/**
+ * struct pom_int_ena_w1s - POM interrupt enable set register
+ * @illegal_intf: Reads or sets enable for POM_INT[ILLEGAL_INTF].
+ * @illegal_dport: Reads or sets enable for POM_INT[ILLEGAL_DPORT].
+ */
+union pom_int_ena_w1s {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz2 : 60;
+		u64 illegal_intf : 1;
+		u64 illegal_dport : 1;
+		u64 raz1 : 1;
+		u64 raz0 : 1;
+#else
+		u64 raz0 : 1;
+		u64 raz1 : 1;
+		u64 illegal_dport : 1;
+		u64 illegal_intf : 1;
+		u64 raz2 : 60;
+#endif
+	} s;
+};
+
+/**
+ * struct lbc_inval_ctl - LBC invalidation control register
+ * @wait_timer: Wait timer for wait state. [WAIT_TIMER] must
+ *   always be written with its reset value.
+ * @cam_inval_start: Software should write [CAM_INVAL_START]=1
+ *   to initiate an LBC cache invalidation. After this, software
+ *   should read LBC_INVAL_STATUS until LBC_INVAL_STATUS[DONE] is set.
+ *   LBC hardware clears [CAVM_INVAL_START] before software can
+ *   observed LBC_INVAL_STATUS[DONE] to be set
+ */
+union lbc_inval_ctl {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz2 : 48;
+		u64 wait_timer : 8;
+		u64 raz1 : 6;
+		u64 cam_inval_start : 1;
+		u64 raz0 : 1;
+#else
+		u64 raz0 : 1;
+		u64 cam_inval_start : 1;
+		u64 raz1 : 6;
+		u64 wait_timer : 8;
+		u64 raz2 : 48;
+#endif
+	} s;
+};
+
+/**
+ * struct lbc_int_ena_w1s - LBC interrupt enable set register
+ * @cam_hard_err: Reads or sets enable for LBC_INT[CAM_HARD_ERR].
+ * @cam_inval_abort: Reads or sets enable for LBC_INT[CAM_INVAL_ABORT].
+ * @over_fetch_err: Reads or sets enable for LBC_INT[OVER_FETCH_ERR].
+ * @cache_line_to_err: Reads or sets enable for
+ *   LBC_INT[CACHE_LINE_TO_ERR].
+ * @cam_soft_err: Reads or sets enable for
+ *   LBC_INT[CAM_SOFT_ERR].
+ * @dma_rd_err: Reads or sets enable for
+ *   LBC_INT[DMA_RD_ERR].
+ */
+union lbc_int_ena_w1s {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_10_63 : 54;
+		u64 cam_hard_err : 1;
+		u64 cam_inval_abort : 1;
+		u64 over_fetch_err : 1;
+		u64 cache_line_to_err : 1;
+		u64 raz_2_5 : 4;
+		u64 cam_soft_err : 1;
+		u64 dma_rd_err : 1;
+#else
+		u64 dma_rd_err : 1;
+		u64 cam_soft_err : 1;
+		u64 raz_2_5 : 4;
+		u64 cache_line_to_err : 1;
+		u64 over_fetch_err : 1;
+		u64 cam_inval_abort : 1;
+		u64 cam_hard_err : 1;
+		u64 raz_10_63 : 54;
+#endif
+	} s;
+};
+
+/**
+ * struct lbc_int - LBC interrupt summary register
+ * @cam_hard_err: indicates a fatal hardware error.
+ *   It requires system reset.
+ *   When [CAM_HARD_ERR] is set, LBC stops logging any new information in
+ *   LBC_POM_MISS_INFO_LOG,
+ *   LBC_POM_MISS_ADDR_LOG,
+ *   LBC_EFL_MISS_INFO_LOG, and
+ *   LBC_EFL_MISS_ADDR_LOG.
+ *   Software should sample them.
+ * @cam_inval_abort: indicates a fatal hardware error.
+ *   System reset is required.
+ * @over_fetch_err: indicates a fatal hardware error
+ *   System reset is required
+ * @cache_line_to_err: is a debug feature.
+ *   This timeout interrupt bit tells the software that
+ *   a cacheline in LBC has non-zero usage and the context
+ *   has not been used for greater than the
+ *   LBC_TO_CNT[TO_CNT] time interval.
+ * @sbe: Memory SBE error. This is recoverable via ECC.
+ *   See LBC_ECC_INT for more details.
+ * @dbe: Memory DBE error. This is a fatal and requires a
+ *   system reset.
+ * @pref_dat_len_mismatch_err: Summary bit for context length
+ *   mismatch errors.
+ * @rd_dat_len_mismatch_err: Summary bit for SE read data length
+ *   greater than data prefect length errors.
+ * @cam_soft_err: is recoverable. Software must complete a
+ *   LBC_INVAL_CTL[CAM_INVAL_START] invalidation sequence and
+ *   then clear [CAM_SOFT_ERR].
+ * @dma_rd_err: A context prefect read of host memory returned with
+ *   a read error.
+ */
+union lbc_int {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_10_63 : 54;
+		u64 cam_hard_err : 1;
+		u64 cam_inval_abort : 1;
+		u64 over_fetch_err : 1;
+		u64 cache_line_to_err : 1;
+		u64 sbe : 1;
+		u64 dbe	: 1;
+		u64 pref_dat_len_mismatch_err : 1;
+		u64 rd_dat_len_mismatch_err : 1;
+		u64 cam_soft_err : 1;
+		u64 dma_rd_err : 1;
+#else
+		u64 dma_rd_err : 1;
+		u64 cam_soft_err : 1;
+		u64 rd_dat_len_mismatch_err : 1;
+		u64 pref_dat_len_mismatch_err : 1;
+		u64 dbe	: 1;
+		u64 sbe	: 1;
+		u64 cache_line_to_err : 1;
+		u64 over_fetch_err : 1;
+		u64 cam_inval_abort : 1;
+		u64 cam_hard_err : 1;
+		u64 raz_10_63 : 54;
+#endif
+	} s;
+};
+
+/**
+ * struct lbc_inval_status: LBC Invalidation status register
+ * @cam_clean_entry_complete_cnt: The number of entries that are
+ *   cleaned up successfully.
+ * @cam_clean_entry_cnt: The number of entries that have the CAM
+ *   inval command issued.
+ * @cam_inval_state: cam invalidation FSM state
+ * @cam_inval_abort: cam invalidation abort
+ * @cam_rst_rdy: lbc_cam reset ready
+ * @done: LBC clears [DONE] when
+ *   LBC_INVAL_CTL[CAM_INVAL_START] is written with a one,
+ *   and sets [DONE] when it completes the invalidation
+ *   sequence.
+ */
+union lbc_inval_status {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz3 : 23;
+		u64 cam_clean_entry_complete_cnt : 9;
+		u64 raz2 : 7;
+		u64 cam_clean_entry_cnt : 9;
+		u64 raz1 : 5;
+		u64 cam_inval_state : 3;
+		u64 raz0 : 5;
+		u64 cam_inval_abort : 1;
+		u64 cam_rst_rdy	: 1;
+		u64 done : 1;
+#else
+		u64 done : 1;
+		u64 cam_rst_rdy : 1;
+		u64 cam_inval_abort : 1;
+		u64 raz0 : 5;
+		u64 cam_inval_state : 3;
+		u64 raz1 : 5;
+		u64 cam_clean_entry_cnt : 9;
+		u64 raz2 : 7;
+		u64 cam_clean_entry_complete_cnt : 9;
+		u64 raz3 : 23;
+#endif
+	} s;
+};
+
+#endif /* __NITROX_CSR_H */

+ 179 - 0
drivers/crypto/cavium/nitrox/nitrox_dev.h

@@ -0,0 +1,179 @@
+#ifndef __NITROX_DEV_H
+#define __NITROX_DEV_H
+
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+
+#define VERSION_LEN 32
+
+struct nitrox_cmdq {
+	/* command queue lock */
+	spinlock_t cmdq_lock;
+	/* response list lock */
+	spinlock_t response_lock;
+	/* backlog list lock */
+	spinlock_t backlog_lock;
+
+	/* request submitted to chip, in progress */
+	struct list_head response_head;
+	/* hw queue full, hold in backlog list */
+	struct list_head backlog_head;
+
+	/* doorbell address */
+	u8 __iomem *dbell_csr_addr;
+	/* base address of the queue */
+	u8 *head;
+
+	struct nitrox_device *ndev;
+	/* flush pending backlog commands */
+	struct work_struct backlog_qflush;
+
+	/* requests posted waiting for completion */
+	atomic_t pending_count;
+	/* requests in backlog queues */
+	atomic_t backlog_count;
+
+	/* command size 32B/64B */
+	u8 instr_size;
+	u8 qno;
+	u32 qsize;
+
+	/* unaligned addresses */
+	u8 *head_unaligned;
+	dma_addr_t dma_unaligned;
+	/* dma address of the base */
+	dma_addr_t dma;
+};
+
+struct nitrox_hw {
+	/* firmware version */
+	char fw_name[VERSION_LEN];
+
+	u16 vendor_id;
+	u16 device_id;
+	u8 revision_id;
+
+	/* CNN55XX cores */
+	u8 se_cores;
+	u8 ae_cores;
+	u8 zip_cores;
+};
+
+#define MAX_MSIX_VECTOR_NAME	20
+/**
+ * vectors for queues (64 AE, 64 SE and 64 ZIP) and
+ * error condition/mailbox.
+ */
+#define MAX_MSIX_VECTORS	192
+
+struct nitrox_msix {
+	struct msix_entry *entries;
+	char **names;
+	DECLARE_BITMAP(irqs, MAX_MSIX_VECTORS);
+	u32 nr_entries;
+};
+
+struct bh_data {
+	/* slc port completion count address */
+	u8 __iomem *completion_cnt_csr_addr;
+
+	struct nitrox_cmdq *cmdq;
+	struct tasklet_struct resp_handler;
+};
+
+struct nitrox_bh {
+	struct bh_data *slc;
+};
+
+/* NITROX-5 driver state */
+#define NITROX_UCODE_LOADED	0
+#define NITROX_READY		1
+
+/* command queue size */
+#define DEFAULT_CMD_QLEN 2048
+/* command timeout in milliseconds */
+#define CMD_TIMEOUT 2000
+
+#define DEV(ndev) ((struct device *)(&(ndev)->pdev->dev))
+#define PF_MODE 0
+
+#define NITROX_CSR_ADDR(ndev, offset) \
+	((ndev)->bar_addr + (offset))
+
+/**
+ * struct nitrox_device - NITROX Device Information.
+ * @list: pointer to linked list of devices
+ * @bar_addr: iomap address
+ * @pdev: PCI device information
+ * @status: NITROX status
+ * @timeout: Request timeout in jiffies
+ * @refcnt: Device usage count
+ * @idx: device index (0..N)
+ * @node: NUMA node id attached
+ * @qlen: Command queue length
+ * @nr_queues: Number of command queues
+ * @ctx_pool: DMA pool for crypto context
+ * @pkt_cmdqs: SE Command queues
+ * @msix: MSI-X information
+ * @bh: post processing work
+ * @hw: hardware information
+ * @debugfs_dir: debugfs directory
+ */
+struct nitrox_device {
+	struct list_head list;
+
+	u8 __iomem *bar_addr;
+	struct pci_dev *pdev;
+
+	unsigned long status;
+	unsigned long timeout;
+	refcount_t refcnt;
+
+	u8 idx;
+	int node;
+	u16 qlen;
+	u16 nr_queues;
+
+	struct dma_pool *ctx_pool;
+	struct nitrox_cmdq *pkt_cmdqs;
+
+	struct nitrox_msix msix;
+	struct nitrox_bh bh;
+
+	struct nitrox_hw hw;
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+	struct dentry *debugfs_dir;
+#endif
+};
+
+/**
+ * nitrox_read_csr - Read from device register
+ * @ndev: NITROX device
+ * @offset: offset of the register to read
+ *
+ * Returns: value read
+ */
+static inline u64 nitrox_read_csr(struct nitrox_device *ndev, u64 offset)
+{
+	return readq(ndev->bar_addr + offset);
+}
+
+/**
+ * nitrox_write_csr - Write to device register
+ * @ndev: NITROX device
+ * @offset: offset of the register to write
+ * @value: value to write
+ */
+static inline void nitrox_write_csr(struct nitrox_device *ndev, u64 offset,
+				    u64 value)
+{
+	writeq(value, (ndev->bar_addr + offset));
+}
+
+static inline int nitrox_ready(struct nitrox_device *ndev)
+{
+	return test_bit(NITROX_READY, &ndev->status);
+}
+
+#endif /* __NITROX_DEV_H */

+ 401 - 0
drivers/crypto/cavium/nitrox/nitrox_hal.c

@@ -0,0 +1,401 @@
+#include <linux/delay.h>
+
+#include "nitrox_dev.h"
+#include "nitrox_csr.h"
+
+/**
+ * emu_enable_cores - Enable EMU cluster cores.
+ * @ndev: N5 device
+ */
+static void emu_enable_cores(struct nitrox_device *ndev)
+{
+	union emu_se_enable emu_se;
+	union emu_ae_enable emu_ae;
+	int i;
+
+	/* AE cores 20 per cluster */
+	emu_ae.value = 0;
+	emu_ae.s.enable = 0xfffff;
+
+	/* SE cores 16 per cluster */
+	emu_se.value = 0;
+	emu_se.s.enable = 0xffff;
+
+	/* enable per cluster cores */
+	for (i = 0; i < NR_CLUSTERS; i++) {
+		nitrox_write_csr(ndev, EMU_AE_ENABLEX(i), emu_ae.value);
+		nitrox_write_csr(ndev, EMU_SE_ENABLEX(i), emu_se.value);
+	}
+}
+
+/**
+ * nitrox_config_emu_unit - configure EMU unit.
+ * @ndev: N5 device
+ */
+void nitrox_config_emu_unit(struct nitrox_device *ndev)
+{
+	union emu_wd_int_ena_w1s emu_wd_int;
+	union emu_ge_int_ena_w1s emu_ge_int;
+	u64 offset;
+	int i;
+
+	/* enable cores */
+	emu_enable_cores(ndev);
+
+	/* enable general error and watch dog interrupts */
+	emu_ge_int.value = 0;
+	emu_ge_int.s.se_ge = 0xffff;
+	emu_ge_int.s.ae_ge = 0xfffff;
+	emu_wd_int.value = 0;
+	emu_wd_int.s.se_wd = 1;
+
+	for (i = 0; i < NR_CLUSTERS; i++) {
+		offset = EMU_WD_INT_ENA_W1SX(i);
+		nitrox_write_csr(ndev, offset, emu_wd_int.value);
+		offset = EMU_GE_INT_ENA_W1SX(i);
+		nitrox_write_csr(ndev, offset, emu_ge_int.value);
+	}
+}
+
+static void reset_pkt_input_ring(struct nitrox_device *ndev, int ring)
+{
+	union nps_pkt_in_instr_ctl pkt_in_ctl;
+	union nps_pkt_in_instr_baoff_dbell pkt_in_dbell;
+	union nps_pkt_in_done_cnts pkt_in_cnts;
+	u64 offset;
+
+	offset = NPS_PKT_IN_INSTR_CTLX(ring);
+	/* disable the ring */
+	pkt_in_ctl.value = nitrox_read_csr(ndev, offset);
+	pkt_in_ctl.s.enb = 0;
+	nitrox_write_csr(ndev, offset, pkt_in_ctl.value);
+	usleep_range(100, 150);
+
+	/* wait to clear [ENB] */
+	do {
+		pkt_in_ctl.value = nitrox_read_csr(ndev, offset);
+	} while (pkt_in_ctl.s.enb);
+
+	/* clear off door bell counts */
+	offset = NPS_PKT_IN_INSTR_BAOFF_DBELLX(ring);
+	pkt_in_dbell.value = 0;
+	pkt_in_dbell.s.dbell = 0xffffffff;
+	nitrox_write_csr(ndev, offset, pkt_in_dbell.value);
+
+	/* clear done counts */
+	offset = NPS_PKT_IN_DONE_CNTSX(ring);
+	pkt_in_cnts.value = nitrox_read_csr(ndev, offset);
+	nitrox_write_csr(ndev, offset, pkt_in_cnts.value);
+	usleep_range(50, 100);
+}
+
+void enable_pkt_input_ring(struct nitrox_device *ndev, int ring)
+{
+	union nps_pkt_in_instr_ctl pkt_in_ctl;
+	u64 offset;
+
+	/* 64-byte instruction size */
+	offset = NPS_PKT_IN_INSTR_CTLX(ring);
+	pkt_in_ctl.value = nitrox_read_csr(ndev, offset);
+	pkt_in_ctl.s.is64b = 1;
+	pkt_in_ctl.s.enb = 1;
+	nitrox_write_csr(ndev, offset, pkt_in_ctl.value);
+
+	/* wait for set [ENB] */
+	do {
+		pkt_in_ctl.value = nitrox_read_csr(ndev, offset);
+	} while (!pkt_in_ctl.s.enb);
+}
+
+/**
+ * nitrox_config_pkt_input_rings - configure Packet Input Rings
+ * @ndev: N5 device
+ */
+void nitrox_config_pkt_input_rings(struct nitrox_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < ndev->nr_queues; i++) {
+		struct nitrox_cmdq *cmdq = &ndev->pkt_cmdqs[i];
+		union nps_pkt_in_instr_rsize pkt_in_rsize;
+		u64 offset;
+
+		reset_pkt_input_ring(ndev, i);
+
+		/* configure ring base address 16-byte aligned,
+		 * size and interrupt threshold.
+		 */
+		offset = NPS_PKT_IN_INSTR_BADDRX(i);
+		nitrox_write_csr(ndev, NPS_PKT_IN_INSTR_BADDRX(i), cmdq->dma);
+
+		/* configure ring size */
+		offset = NPS_PKT_IN_INSTR_RSIZEX(i);
+		pkt_in_rsize.value = 0;
+		pkt_in_rsize.s.rsize = ndev->qlen;
+		nitrox_write_csr(ndev, offset, pkt_in_rsize.value);
+
+		/* set high threshold for pkt input ring interrupts */
+		offset = NPS_PKT_IN_INT_LEVELSX(i);
+		nitrox_write_csr(ndev, offset, 0xffffffff);
+
+		enable_pkt_input_ring(ndev, i);
+	}
+}
+
+static void reset_pkt_solicit_port(struct nitrox_device *ndev, int port)
+{
+	union nps_pkt_slc_ctl pkt_slc_ctl;
+	union nps_pkt_slc_cnts pkt_slc_cnts;
+	u64 offset;
+
+	/* disable slc port */
+	offset = NPS_PKT_SLC_CTLX(port);
+	pkt_slc_ctl.value = nitrox_read_csr(ndev, offset);
+	pkt_slc_ctl.s.enb = 0;
+	nitrox_write_csr(ndev, offset, pkt_slc_ctl.value);
+	usleep_range(100, 150);
+
+	/* wait to clear [ENB] */
+	do {
+		pkt_slc_ctl.value = nitrox_read_csr(ndev, offset);
+	} while (pkt_slc_ctl.s.enb);
+
+	/* clear slc counters */
+	offset = NPS_PKT_SLC_CNTSX(port);
+	pkt_slc_cnts.value = nitrox_read_csr(ndev, offset);
+	nitrox_write_csr(ndev, offset, pkt_slc_cnts.value);
+	usleep_range(50, 100);
+}
+
+void enable_pkt_solicit_port(struct nitrox_device *ndev, int port)
+{
+	union nps_pkt_slc_ctl pkt_slc_ctl;
+	u64 offset;
+
+	offset = NPS_PKT_SLC_CTLX(port);
+	pkt_slc_ctl.value = 0;
+	pkt_slc_ctl.s.enb = 1;
+
+	/*
+	 * 8 trailing 0x00 bytes will be added
+	 * to the end of the outgoing packet.
+	 */
+	pkt_slc_ctl.s.z = 1;
+	/* enable response header */
+	pkt_slc_ctl.s.rh = 1;
+	nitrox_write_csr(ndev, offset, pkt_slc_ctl.value);
+
+	/* wait to set [ENB] */
+	do {
+		pkt_slc_ctl.value = nitrox_read_csr(ndev, offset);
+	} while (!pkt_slc_ctl.s.enb);
+}
+
+static void config_single_pkt_solicit_port(struct nitrox_device *ndev,
+					   int port)
+{
+	union nps_pkt_slc_int_levels pkt_slc_int;
+	u64 offset;
+
+	reset_pkt_solicit_port(ndev, port);
+
+	offset = NPS_PKT_SLC_INT_LEVELSX(port);
+	pkt_slc_int.value = 0;
+	/* time interrupt threshold */
+	pkt_slc_int.s.timet = 0x3fffff;
+	nitrox_write_csr(ndev, offset, pkt_slc_int.value);
+
+	enable_pkt_solicit_port(ndev, port);
+}
+
+void nitrox_config_pkt_solicit_ports(struct nitrox_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < ndev->nr_queues; i++)
+		config_single_pkt_solicit_port(ndev, i);
+}
+
+/**
+ * enable_nps_interrupts - enable NPS interrutps
+ * @ndev: N5 device.
+ *
+ * This includes NPS core, packet in and slc interrupts.
+ */
+static void enable_nps_interrupts(struct nitrox_device *ndev)
+{
+	union nps_core_int_ena_w1s core_int;
+
+	/* NPS core interrutps */
+	core_int.value = 0;
+	core_int.s.host_wr_err = 1;
+	core_int.s.host_wr_timeout = 1;
+	core_int.s.exec_wr_timeout = 1;
+	core_int.s.npco_dma_malform = 1;
+	core_int.s.host_nps_wr_err = 1;
+	nitrox_write_csr(ndev, NPS_CORE_INT_ENA_W1S, core_int.value);
+
+	/* NPS packet in ring interrupts */
+	nitrox_write_csr(ndev, NPS_PKT_IN_RERR_LO_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, NPS_PKT_IN_RERR_HI_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, NPS_PKT_IN_ERR_TYPE_ENA_W1S, (~0ULL));
+	/* NPS packet slc port interrupts */
+	nitrox_write_csr(ndev, NPS_PKT_SLC_RERR_HI_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, NPS_PKT_SLC_RERR_LO_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, NPS_PKT_SLC_ERR_TYPE_ENA_W1S, (~0uLL));
+}
+
+void nitrox_config_nps_unit(struct nitrox_device *ndev)
+{
+	union nps_core_gbl_vfcfg core_gbl_vfcfg;
+
+	/* endian control information */
+	nitrox_write_csr(ndev, NPS_CORE_CONTROL, 1ULL);
+
+	/* disable ILK interface */
+	core_gbl_vfcfg.value = 0;
+	core_gbl_vfcfg.s.ilk_disable = 1;
+	core_gbl_vfcfg.s.cfg = PF_MODE;
+	nitrox_write_csr(ndev, NPS_CORE_GBL_VFCFG, core_gbl_vfcfg.value);
+	/* config input and solicit ports */
+	nitrox_config_pkt_input_rings(ndev);
+	nitrox_config_pkt_solicit_ports(ndev);
+
+	/* enable interrupts */
+	enable_nps_interrupts(ndev);
+}
+
+void nitrox_config_pom_unit(struct nitrox_device *ndev)
+{
+	union pom_int_ena_w1s pom_int;
+	int i;
+
+	/* enable pom interrupts */
+	pom_int.value = 0;
+	pom_int.s.illegal_dport = 1;
+	nitrox_write_csr(ndev, POM_INT_ENA_W1S, pom_int.value);
+
+	/* enable perf counters */
+	for (i = 0; i < ndev->hw.se_cores; i++)
+		nitrox_write_csr(ndev, POM_PERF_CTL, BIT_ULL(i));
+}
+
+/**
+ * nitrox_config_rand_unit - enable N5 random number unit
+ * @ndev: N5 device
+ */
+void nitrox_config_rand_unit(struct nitrox_device *ndev)
+{
+	union efl_rnm_ctl_status efl_rnm_ctl;
+	u64 offset;
+
+	offset = EFL_RNM_CTL_STATUS;
+	efl_rnm_ctl.value = nitrox_read_csr(ndev, offset);
+	efl_rnm_ctl.s.ent_en = 1;
+	efl_rnm_ctl.s.rng_en = 1;
+	nitrox_write_csr(ndev, offset, efl_rnm_ctl.value);
+}
+
+void nitrox_config_efl_unit(struct nitrox_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < NR_CLUSTERS; i++) {
+		union efl_core_int_ena_w1s efl_core_int;
+		u64 offset;
+
+		/* EFL core interrupts */
+		offset = EFL_CORE_INT_ENA_W1SX(i);
+		efl_core_int.value = 0;
+		efl_core_int.s.len_ovr = 1;
+		efl_core_int.s.d_left = 1;
+		efl_core_int.s.epci_decode_err = 1;
+		nitrox_write_csr(ndev, offset, efl_core_int.value);
+
+		offset = EFL_CORE_VF_ERR_INT0_ENA_W1SX(i);
+		nitrox_write_csr(ndev, offset, (~0ULL));
+		offset = EFL_CORE_VF_ERR_INT1_ENA_W1SX(i);
+		nitrox_write_csr(ndev, offset, (~0ULL));
+	}
+}
+
+void nitrox_config_bmi_unit(struct nitrox_device *ndev)
+{
+	union bmi_ctl bmi_ctl;
+	union bmi_int_ena_w1s bmi_int_ena;
+	u64 offset;
+
+	/* no threshold limits for PCIe */
+	offset = BMI_CTL;
+	bmi_ctl.value = nitrox_read_csr(ndev, offset);
+	bmi_ctl.s.max_pkt_len = 0xff;
+	bmi_ctl.s.nps_free_thrsh = 0xff;
+	bmi_ctl.s.nps_hdrq_thrsh = 0x7a;
+	nitrox_write_csr(ndev, offset, bmi_ctl.value);
+
+	/* enable interrupts */
+	offset = BMI_INT_ENA_W1S;
+	bmi_int_ena.value = 0;
+	bmi_int_ena.s.max_len_err_nps = 1;
+	bmi_int_ena.s.pkt_rcv_err_nps = 1;
+	bmi_int_ena.s.fpf_undrrn = 1;
+	nitrox_write_csr(ndev, offset, bmi_int_ena.value);
+}
+
+void nitrox_config_bmo_unit(struct nitrox_device *ndev)
+{
+	union bmo_ctl2 bmo_ctl2;
+	u64 offset;
+
+	/* no threshold limits for PCIe */
+	offset = BMO_CTL2;
+	bmo_ctl2.value = nitrox_read_csr(ndev, offset);
+	bmo_ctl2.s.nps_slc_buf_thrsh = 0xff;
+	nitrox_write_csr(ndev, offset, bmo_ctl2.value);
+}
+
+void invalidate_lbc(struct nitrox_device *ndev)
+{
+	union lbc_inval_ctl lbc_ctl;
+	union lbc_inval_status lbc_stat;
+	u64 offset;
+
+	/* invalidate LBC */
+	offset = LBC_INVAL_CTL;
+	lbc_ctl.value = nitrox_read_csr(ndev, offset);
+	lbc_ctl.s.cam_inval_start = 1;
+	nitrox_write_csr(ndev, offset, lbc_ctl.value);
+
+	offset = LBC_INVAL_STATUS;
+
+	do {
+		lbc_stat.value = nitrox_read_csr(ndev, offset);
+	} while (!lbc_stat.s.done);
+}
+
+void nitrox_config_lbc_unit(struct nitrox_device *ndev)
+{
+	union lbc_int_ena_w1s lbc_int_ena;
+	u64 offset;
+
+	invalidate_lbc(ndev);
+
+	/* enable interrupts */
+	offset = LBC_INT_ENA_W1S;
+	lbc_int_ena.value = 0;
+	lbc_int_ena.s.dma_rd_err = 1;
+	lbc_int_ena.s.over_fetch_err = 1;
+	lbc_int_ena.s.cam_inval_abort = 1;
+	lbc_int_ena.s.cam_hard_err = 1;
+	nitrox_write_csr(ndev, offset, lbc_int_ena.value);
+
+	offset = LBC_PLM_VF1_64_INT_ENA_W1S;
+	nitrox_write_csr(ndev, offset, (~0ULL));
+	offset = LBC_PLM_VF65_128_INT_ENA_W1S;
+	nitrox_write_csr(ndev, offset, (~0ULL));
+
+	offset = LBC_ELM_VF1_64_INT_ENA_W1S;
+	nitrox_write_csr(ndev, offset, (~0ULL));
+	offset = LBC_ELM_VF65_128_INT_ENA_W1S;
+	nitrox_write_csr(ndev, offset, (~0ULL));
+}

+ 467 - 0
drivers/crypto/cavium/nitrox/nitrox_isr.c

@@ -0,0 +1,467 @@
+#include <linux/pci.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+
+#include "nitrox_dev.h"
+#include "nitrox_csr.h"
+#include "nitrox_common.h"
+
+#define NR_RING_VECTORS 3
+#define NPS_CORE_INT_ACTIVE_ENTRY 192
+
+/**
+ * nps_pkt_slc_isr - IRQ handler for NPS solicit port
+ * @irq: irq number
+ * @data: argument
+ */
+static irqreturn_t nps_pkt_slc_isr(int irq, void *data)
+{
+	struct bh_data *slc = data;
+	union nps_pkt_slc_cnts pkt_slc_cnts;
+
+	pkt_slc_cnts.value = readq(slc->completion_cnt_csr_addr);
+	/* New packet on SLC output port */
+	if (pkt_slc_cnts.s.slc_int)
+		tasklet_hi_schedule(&slc->resp_handler);
+
+	return IRQ_HANDLED;
+}
+
+static void clear_nps_core_err_intr(struct nitrox_device *ndev)
+{
+	u64 value;
+
+	/* Write 1 to clear */
+	value = nitrox_read_csr(ndev, NPS_CORE_INT);
+	nitrox_write_csr(ndev, NPS_CORE_INT, value);
+
+	dev_err_ratelimited(DEV(ndev), "NSP_CORE_INT  0x%016llx\n", value);
+}
+
+static void clear_nps_pkt_err_intr(struct nitrox_device *ndev)
+{
+	union nps_pkt_int pkt_int;
+	unsigned long value, offset;
+	int i;
+
+	pkt_int.value = nitrox_read_csr(ndev, NPS_PKT_INT);
+	dev_err_ratelimited(DEV(ndev), "NPS_PKT_INT  0x%016llx\n",
+			    pkt_int.value);
+
+	if (pkt_int.s.slc_err) {
+		offset = NPS_PKT_SLC_ERR_TYPE;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		dev_err_ratelimited(DEV(ndev),
+				    "NPS_PKT_SLC_ERR_TYPE  0x%016lx\n", value);
+
+		offset = NPS_PKT_SLC_RERR_LO;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		/* enable the solicit ports */
+		for_each_set_bit(i, &value, BITS_PER_LONG)
+			enable_pkt_solicit_port(ndev, i);
+
+		dev_err_ratelimited(DEV(ndev),
+				    "NPS_PKT_SLC_RERR_LO  0x%016lx\n", value);
+
+		offset = NPS_PKT_SLC_RERR_HI;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		dev_err_ratelimited(DEV(ndev),
+				    "NPS_PKT_SLC_RERR_HI  0x%016lx\n", value);
+	}
+
+	if (pkt_int.s.in_err) {
+		offset = NPS_PKT_IN_ERR_TYPE;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		dev_err_ratelimited(DEV(ndev),
+				    "NPS_PKT_IN_ERR_TYPE  0x%016lx\n", value);
+		offset = NPS_PKT_IN_RERR_LO;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		/* enable the input ring */
+		for_each_set_bit(i, &value, BITS_PER_LONG)
+			enable_pkt_input_ring(ndev, i);
+
+		dev_err_ratelimited(DEV(ndev),
+				    "NPS_PKT_IN_RERR_LO  0x%016lx\n", value);
+
+		offset = NPS_PKT_IN_RERR_HI;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		dev_err_ratelimited(DEV(ndev),
+				    "NPS_PKT_IN_RERR_HI  0x%016lx\n", value);
+	}
+}
+
+static void clear_pom_err_intr(struct nitrox_device *ndev)
+{
+	u64 value;
+
+	value = nitrox_read_csr(ndev, POM_INT);
+	nitrox_write_csr(ndev, POM_INT, value);
+	dev_err_ratelimited(DEV(ndev), "POM_INT  0x%016llx\n", value);
+}
+
+static void clear_pem_err_intr(struct nitrox_device *ndev)
+{
+	u64 value;
+
+	value = nitrox_read_csr(ndev, PEM0_INT);
+	nitrox_write_csr(ndev, PEM0_INT, value);
+	dev_err_ratelimited(DEV(ndev), "PEM(0)_INT  0x%016llx\n", value);
+}
+
+static void clear_lbc_err_intr(struct nitrox_device *ndev)
+{
+	union lbc_int lbc_int;
+	u64 value, offset;
+	int i;
+
+	lbc_int.value = nitrox_read_csr(ndev, LBC_INT);
+	dev_err_ratelimited(DEV(ndev), "LBC_INT  0x%016llx\n", lbc_int.value);
+
+	if (lbc_int.s.dma_rd_err) {
+		for (i = 0; i < NR_CLUSTERS; i++) {
+			offset = EFL_CORE_VF_ERR_INT0X(i);
+			value = nitrox_read_csr(ndev, offset);
+			nitrox_write_csr(ndev, offset, value);
+			offset = EFL_CORE_VF_ERR_INT1X(i);
+			value = nitrox_read_csr(ndev, offset);
+			nitrox_write_csr(ndev, offset, value);
+		}
+	}
+
+	if (lbc_int.s.cam_soft_err) {
+		dev_err_ratelimited(DEV(ndev), "CAM_SOFT_ERR, invalidating LBC\n");
+		invalidate_lbc(ndev);
+	}
+
+	if (lbc_int.s.pref_dat_len_mismatch_err) {
+		offset = LBC_PLM_VF1_64_INT;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		offset = LBC_PLM_VF65_128_INT;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+	}
+
+	if (lbc_int.s.rd_dat_len_mismatch_err) {
+		offset = LBC_ELM_VF1_64_INT;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+		offset = LBC_ELM_VF65_128_INT;
+		value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, value);
+	}
+	nitrox_write_csr(ndev, LBC_INT, lbc_int.value);
+}
+
+static void clear_efl_err_intr(struct nitrox_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < NR_CLUSTERS; i++) {
+		union efl_core_int core_int;
+		u64 value, offset;
+
+		offset = EFL_CORE_INTX(i);
+		core_int.value = nitrox_read_csr(ndev, offset);
+		nitrox_write_csr(ndev, offset, core_int.value);
+		dev_err_ratelimited(DEV(ndev), "ELF_CORE(%d)_INT  0x%016llx\n",
+				    i, core_int.value);
+		if (core_int.s.se_err) {
+			offset = EFL_CORE_SE_ERR_INTX(i);
+			value = nitrox_read_csr(ndev, offset);
+			nitrox_write_csr(ndev, offset, value);
+		}
+	}
+}
+
+static void clear_bmi_err_intr(struct nitrox_device *ndev)
+{
+	u64 value;
+
+	value = nitrox_read_csr(ndev, BMI_INT);
+	nitrox_write_csr(ndev, BMI_INT, value);
+	dev_err_ratelimited(DEV(ndev), "BMI_INT  0x%016llx\n", value);
+}
+
+/**
+ * clear_nps_core_int_active - clear NPS_CORE_INT_ACTIVE interrupts
+ * @ndev: NITROX device
+ */
+static void clear_nps_core_int_active(struct nitrox_device *ndev)
+{
+	union nps_core_int_active core_int_active;
+
+	core_int_active.value = nitrox_read_csr(ndev, NPS_CORE_INT_ACTIVE);
+
+	if (core_int_active.s.nps_core)
+		clear_nps_core_err_intr(ndev);
+
+	if (core_int_active.s.nps_pkt)
+		clear_nps_pkt_err_intr(ndev);
+
+	if (core_int_active.s.pom)
+		clear_pom_err_intr(ndev);
+
+	if (core_int_active.s.pem)
+		clear_pem_err_intr(ndev);
+
+	if (core_int_active.s.lbc)
+		clear_lbc_err_intr(ndev);
+
+	if (core_int_active.s.efl)
+		clear_efl_err_intr(ndev);
+
+	if (core_int_active.s.bmi)
+		clear_bmi_err_intr(ndev);
+
+	/* If more work callback the ISR, set resend */
+	core_int_active.s.resend = 1;
+	nitrox_write_csr(ndev, NPS_CORE_INT_ACTIVE, core_int_active.value);
+}
+
+static irqreturn_t nps_core_int_isr(int irq, void *data)
+{
+	struct nitrox_device *ndev = data;
+
+	clear_nps_core_int_active(ndev);
+
+	return IRQ_HANDLED;
+}
+
+static int nitrox_enable_msix(struct nitrox_device *ndev)
+{
+	struct msix_entry *entries;
+	char **names;
+	int i, nr_entries, ret;
+
+	/*
+	 * PF MSI-X vectors
+	 *
+	 * Entry 0: NPS PKT ring 0
+	 * Entry 1: AQMQ ring 0
+	 * Entry 2: ZQM ring 0
+	 * Entry 3: NPS PKT ring 1
+	 * Entry 4: AQMQ ring 1
+	 * Entry 5: ZQM ring 1
+	 * ....
+	 * Entry 192: NPS_CORE_INT_ACTIVE
+	 */
+	nr_entries = (ndev->nr_queues * NR_RING_VECTORS) + 1;
+	entries = kzalloc_node(nr_entries * sizeof(struct msix_entry),
+			       GFP_KERNEL, ndev->node);
+	if (!entries)
+		return -ENOMEM;
+
+	names = kcalloc(nr_entries, sizeof(char *), GFP_KERNEL);
+	if (!names) {
+		kfree(entries);
+		return -ENOMEM;
+	}
+
+	/* fill entires */
+	for (i = 0; i < (nr_entries - 1); i++)
+		entries[i].entry = i;
+
+	entries[i].entry = NPS_CORE_INT_ACTIVE_ENTRY;
+
+	for (i = 0; i < nr_entries; i++) {
+		*(names + i) = kzalloc(MAX_MSIX_VECTOR_NAME, GFP_KERNEL);
+		if (!(*(names + i))) {
+			ret = -ENOMEM;
+			goto msix_fail;
+		}
+	}
+	ndev->msix.entries = entries;
+	ndev->msix.names = names;
+	ndev->msix.nr_entries = nr_entries;
+
+	ret = pci_enable_msix_exact(ndev->pdev, ndev->msix.entries,
+				    ndev->msix.nr_entries);
+	if (ret) {
+		dev_err(&ndev->pdev->dev, "Failed to enable MSI-X IRQ(s) %d\n",
+			ret);
+		goto msix_fail;
+	}
+	return 0;
+
+msix_fail:
+	for (i = 0; i < nr_entries; i++)
+		kfree(*(names + i));
+
+	kfree(entries);
+	kfree(names);
+	return ret;
+}
+
+static void nitrox_cleanup_pkt_slc_bh(struct nitrox_device *ndev)
+{
+	int i;
+
+	if (!ndev->bh.slc)
+		return;
+
+	for (i = 0; i < ndev->nr_queues; i++) {
+		struct bh_data *bh = &ndev->bh.slc[i];
+
+		tasklet_disable(&bh->resp_handler);
+		tasklet_kill(&bh->resp_handler);
+	}
+	kfree(ndev->bh.slc);
+	ndev->bh.slc = NULL;
+}
+
+static int nitrox_setup_pkt_slc_bh(struct nitrox_device *ndev)
+{
+	u32 size;
+	int i;
+
+	size = ndev->nr_queues * sizeof(struct bh_data);
+	ndev->bh.slc = kzalloc(size, GFP_KERNEL);
+	if (!ndev->bh.slc)
+		return -ENOMEM;
+
+	for (i = 0; i < ndev->nr_queues; i++) {
+		struct bh_data *bh = &ndev->bh.slc[i];
+		u64 offset;
+
+		offset = NPS_PKT_SLC_CNTSX(i);
+		/* pre calculate completion count address */
+		bh->completion_cnt_csr_addr = NITROX_CSR_ADDR(ndev, offset);
+		bh->cmdq = &ndev->pkt_cmdqs[i];
+
+		tasklet_init(&bh->resp_handler, pkt_slc_resp_handler,
+			     (unsigned long)bh);
+	}
+
+	return 0;
+}
+
+static int nitrox_request_irqs(struct nitrox_device *ndev)
+{
+	struct pci_dev *pdev = ndev->pdev;
+	struct msix_entry *msix_ent = ndev->msix.entries;
+	int nr_ring_vectors, i = 0, ring, cpu, ret;
+	char *name;
+
+	/*
+	 * PF MSI-X vectors
+	 *
+	 * Entry 0: NPS PKT ring 0
+	 * Entry 1: AQMQ ring 0
+	 * Entry 2: ZQM ring 0
+	 * Entry 3: NPS PKT ring 1
+	 * ....
+	 * Entry 192: NPS_CORE_INT_ACTIVE
+	 */
+	nr_ring_vectors = ndev->nr_queues * NR_RING_VECTORS;
+
+	/* request irq for pkt ring/ports only */
+	while (i < nr_ring_vectors) {
+		name = *(ndev->msix.names + i);
+		ring = (i / NR_RING_VECTORS);
+		snprintf(name, MAX_MSIX_VECTOR_NAME, "n5(%d)-slc-ring%d",
+			 ndev->idx, ring);
+
+		ret = request_irq(msix_ent[i].vector, nps_pkt_slc_isr, 0,
+				  name, &ndev->bh.slc[ring]);
+		if (ret) {
+			dev_err(&pdev->dev, "failed to get irq %d for %s\n",
+				msix_ent[i].vector, name);
+			return ret;
+		}
+		cpu = ring % num_online_cpus();
+		irq_set_affinity_hint(msix_ent[i].vector, get_cpu_mask(cpu));
+
+		set_bit(i, ndev->msix.irqs);
+		i += NR_RING_VECTORS;
+	}
+
+	/* Request IRQ for NPS_CORE_INT_ACTIVE */
+	name = *(ndev->msix.names + i);
+	snprintf(name, MAX_MSIX_VECTOR_NAME, "n5(%d)-nps-core-int", ndev->idx);
+	ret = request_irq(msix_ent[i].vector, nps_core_int_isr, 0, name, ndev);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to get irq %d for %s\n",
+			msix_ent[i].vector, name);
+		return ret;
+	}
+	set_bit(i, ndev->msix.irqs);
+
+	return 0;
+}
+
+static void nitrox_disable_msix(struct nitrox_device *ndev)
+{
+	struct msix_entry *msix_ent = ndev->msix.entries;
+	char **names = ndev->msix.names;
+	int i = 0, ring, nr_ring_vectors;
+
+	nr_ring_vectors = ndev->msix.nr_entries - 1;
+
+	/* clear pkt ring irqs */
+	while (i < nr_ring_vectors) {
+		if (test_and_clear_bit(i, ndev->msix.irqs)) {
+			ring = (i / NR_RING_VECTORS);
+			irq_set_affinity_hint(msix_ent[i].vector, NULL);
+			free_irq(msix_ent[i].vector, &ndev->bh.slc[ring]);
+		}
+		i += NR_RING_VECTORS;
+	}
+	irq_set_affinity_hint(msix_ent[i].vector, NULL);
+	free_irq(msix_ent[i].vector, ndev);
+	clear_bit(i, ndev->msix.irqs);
+
+	kfree(ndev->msix.entries);
+	for (i = 0; i < ndev->msix.nr_entries; i++)
+		kfree(*(names + i));
+
+	kfree(names);
+	pci_disable_msix(ndev->pdev);
+}
+
+/**
+ * nitrox_pf_cleanup_isr: Cleanup PF MSI-X and IRQ
+ * @ndev: NITROX device
+ */
+void nitrox_pf_cleanup_isr(struct nitrox_device *ndev)
+{
+	nitrox_disable_msix(ndev);
+	nitrox_cleanup_pkt_slc_bh(ndev);
+}
+
+/**
+ * nitrox_init_isr - Initialize PF MSI-X vectors and IRQ
+ * @ndev: NITROX device
+ *
+ * Return: 0 on success, a negative value on failure.
+ */
+int nitrox_pf_init_isr(struct nitrox_device *ndev)
+{
+	int err;
+
+	err = nitrox_setup_pkt_slc_bh(ndev);
+	if (err)
+		return err;
+
+	err = nitrox_enable_msix(ndev);
+	if (err)
+		goto msix_fail;
+
+	err = nitrox_request_irqs(ndev);
+	if (err)
+		goto irq_fail;
+
+	return 0;
+
+irq_fail:
+	nitrox_disable_msix(ndev);
+msix_fail:
+	nitrox_cleanup_pkt_slc_bh(ndev);
+	return err;
+}

+ 210 - 0
drivers/crypto/cavium/nitrox/nitrox_lib.c

@@ -0,0 +1,210 @@
+#include <linux/cpumask.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/delay.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci_regs.h>
+#include <linux/vmalloc.h>
+#include <linux/pci.h>
+
+#include "nitrox_dev.h"
+#include "nitrox_common.h"
+#include "nitrox_req.h"
+#include "nitrox_csr.h"
+
+#define CRYPTO_CTX_SIZE	256
+
+/* command queue alignments */
+#define PKT_IN_ALIGN	16
+
+static int cmdq_common_init(struct nitrox_cmdq *cmdq)
+{
+	struct nitrox_device *ndev = cmdq->ndev;
+	u32 qsize;
+
+	qsize = (ndev->qlen) * cmdq->instr_size;
+	cmdq->head_unaligned = dma_zalloc_coherent(DEV(ndev),
+						   (qsize + PKT_IN_ALIGN),
+						   &cmdq->dma_unaligned,
+						   GFP_KERNEL);
+	if (!cmdq->head_unaligned)
+		return -ENOMEM;
+
+	cmdq->head = PTR_ALIGN(cmdq->head_unaligned, PKT_IN_ALIGN);
+	cmdq->dma = PTR_ALIGN(cmdq->dma_unaligned, PKT_IN_ALIGN);
+	cmdq->qsize = (qsize + PKT_IN_ALIGN);
+
+	spin_lock_init(&cmdq->response_lock);
+	spin_lock_init(&cmdq->cmdq_lock);
+	spin_lock_init(&cmdq->backlog_lock);
+
+	INIT_LIST_HEAD(&cmdq->response_head);
+	INIT_LIST_HEAD(&cmdq->backlog_head);
+	INIT_WORK(&cmdq->backlog_qflush, backlog_qflush_work);
+
+	atomic_set(&cmdq->pending_count, 0);
+	atomic_set(&cmdq->backlog_count, 0);
+	return 0;
+}
+
+static void cmdq_common_cleanup(struct nitrox_cmdq *cmdq)
+{
+	struct nitrox_device *ndev = cmdq->ndev;
+
+	cancel_work_sync(&cmdq->backlog_qflush);
+
+	dma_free_coherent(DEV(ndev), cmdq->qsize,
+			  cmdq->head_unaligned, cmdq->dma_unaligned);
+
+	atomic_set(&cmdq->pending_count, 0);
+	atomic_set(&cmdq->backlog_count, 0);
+
+	cmdq->dbell_csr_addr = NULL;
+	cmdq->head = NULL;
+	cmdq->dma = 0;
+	cmdq->qsize = 0;
+	cmdq->instr_size = 0;
+}
+
+static void nitrox_cleanup_pkt_cmdqs(struct nitrox_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < ndev->nr_queues; i++) {
+		struct nitrox_cmdq *cmdq = &ndev->pkt_cmdqs[i];
+
+		cmdq_common_cleanup(cmdq);
+	}
+	kfree(ndev->pkt_cmdqs);
+	ndev->pkt_cmdqs = NULL;
+}
+
+static int nitrox_init_pkt_cmdqs(struct nitrox_device *ndev)
+{
+	int i, err, size;
+
+	size = ndev->nr_queues * sizeof(struct nitrox_cmdq);
+	ndev->pkt_cmdqs = kzalloc(size, GFP_KERNEL);
+	if (!ndev->pkt_cmdqs)
+		return -ENOMEM;
+
+	for (i = 0; i < ndev->nr_queues; i++) {
+		struct nitrox_cmdq *cmdq;
+		u64 offset;
+
+		cmdq = &ndev->pkt_cmdqs[i];
+		cmdq->ndev = ndev;
+		cmdq->qno = i;
+		cmdq->instr_size = sizeof(struct nps_pkt_instr);
+
+		offset = NPS_PKT_IN_INSTR_BAOFF_DBELLX(i);
+		/* SE ring doorbell address for this queue */
+		cmdq->dbell_csr_addr = NITROX_CSR_ADDR(ndev, offset);
+
+		err = cmdq_common_init(cmdq);
+		if (err)
+			goto pkt_cmdq_fail;
+	}
+	return 0;
+
+pkt_cmdq_fail:
+	nitrox_cleanup_pkt_cmdqs(ndev);
+	return err;
+}
+
+static int create_crypto_dma_pool(struct nitrox_device *ndev)
+{
+	size_t size;
+
+	/* Crypto context pool, 16 byte aligned */
+	size = CRYPTO_CTX_SIZE + sizeof(struct ctx_hdr);
+	ndev->ctx_pool = dma_pool_create("crypto-context",
+					 DEV(ndev), size, 16, 0);
+	if (!ndev->ctx_pool)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void destroy_crypto_dma_pool(struct nitrox_device *ndev)
+{
+	if (!ndev->ctx_pool)
+		return;
+
+	dma_pool_destroy(ndev->ctx_pool);
+	ndev->ctx_pool = NULL;
+}
+
+/*
+ * crypto_alloc_context - Allocate crypto context from pool
+ * @ndev: NITROX Device
+ */
+void *crypto_alloc_context(struct nitrox_device *ndev)
+{
+	struct ctx_hdr *ctx;
+	void *vaddr;
+	dma_addr_t dma;
+
+	vaddr = dma_pool_alloc(ndev->ctx_pool, (GFP_ATOMIC | __GFP_ZERO), &dma);
+	if (!vaddr)
+		return NULL;
+
+	/* fill meta data */
+	ctx = vaddr;
+	ctx->pool = ndev->ctx_pool;
+	ctx->dma = dma;
+	ctx->ctx_dma = dma + sizeof(struct ctx_hdr);
+
+	return ((u8 *)vaddr + sizeof(struct ctx_hdr));
+}
+
+/**
+ * crypto_free_context - Free crypto context to pool
+ * @ctx: context to free
+ */
+void crypto_free_context(void *ctx)
+{
+	struct ctx_hdr *ctxp;
+
+	if (!ctx)
+		return;
+
+	ctxp = (struct ctx_hdr *)((u8 *)ctx - sizeof(struct ctx_hdr));
+	dma_pool_free(ctxp->pool, ctxp, ctxp->dma);
+}
+
+/**
+ * nitrox_common_sw_init - allocate software resources.
+ * @ndev: NITROX device
+ *
+ * Allocates crypto context pools and command queues etc.
+ *
+ * Return: 0 on success, or a negative error code on error.
+ */
+int nitrox_common_sw_init(struct nitrox_device *ndev)
+{
+	int err = 0;
+
+	/* per device crypto context pool */
+	err = create_crypto_dma_pool(ndev);
+	if (err)
+		return err;
+
+	err = nitrox_init_pkt_cmdqs(ndev);
+	if (err)
+		destroy_crypto_dma_pool(ndev);
+
+	return err;
+}
+
+/**
+ * nitrox_common_sw_cleanup - free software resources.
+ * @ndev: NITROX device
+ */
+void nitrox_common_sw_cleanup(struct nitrox_device *ndev)
+{
+	nitrox_cleanup_pkt_cmdqs(ndev);
+	destroy_crypto_dma_pool(ndev);
+}

+ 640 - 0
drivers/crypto/cavium/nitrox/nitrox_main.c

@@ -0,0 +1,640 @@
+#include <linux/aer.h>
+#include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/firmware.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+#include "nitrox_dev.h"
+#include "nitrox_common.h"
+#include "nitrox_csr.h"
+
+#define CNN55XX_DEV_ID	0x12
+#define MAX_PF_QUEUES	64
+#define UCODE_HLEN 48
+#define SE_GROUP 0
+
+#define DRIVER_VERSION "1.0"
+/* SE microcode */
+#define SE_FW	"cnn55xx_se.fw"
+
+static const char nitrox_driver_name[] = "CNN55XX";
+
+static LIST_HEAD(ndevlist);
+static DEFINE_MUTEX(devlist_lock);
+static unsigned int num_devices;
+
+/**
+ * nitrox_pci_tbl - PCI Device ID Table
+ */
+static const struct pci_device_id nitrox_pci_tbl[] = {
+	{PCI_VDEVICE(CAVIUM, CNN55XX_DEV_ID), 0},
+	/* required last entry */
+	{0, }
+};
+MODULE_DEVICE_TABLE(pci, nitrox_pci_tbl);
+
+static unsigned int qlen = DEFAULT_CMD_QLEN;
+module_param(qlen, uint, 0644);
+MODULE_PARM_DESC(qlen, "Command queue length - default 2048");
+
+/**
+ * struct ucode - Firmware Header
+ * @id: microcode ID
+ * @version: firmware version
+ * @code_size: code section size
+ * @raz: alignment
+ * @code: code section
+ */
+struct ucode {
+	u8 id;
+	char version[VERSION_LEN - 1];
+	__be32 code_size;
+	u8 raz[12];
+	u64 code[0];
+};
+
+/**
+ * write_to_ucd_unit - Write Firmware to NITROX UCD unit
+ */
+static void write_to_ucd_unit(struct nitrox_device *ndev,
+			      struct ucode *ucode)
+{
+	u32 code_size = be32_to_cpu(ucode->code_size) * 2;
+	u64 offset, data;
+	int i = 0;
+
+	/*
+	 * UCD structure
+	 *
+	 *  -------------
+	 *  |    BLK 7  |
+	 *  -------------
+	 *  |    BLK 6  |
+	 *  -------------
+	 *  |    ...    |
+	 *  -------------
+	 *  |    BLK 0  |
+	 *  -------------
+	 *  Total of 8 blocks, each size 32KB
+	 */
+
+	/* set the block number */
+	offset = UCD_UCODE_LOAD_BLOCK_NUM;
+	nitrox_write_csr(ndev, offset, 0);
+
+	code_size = roundup(code_size, 8);
+	while (code_size) {
+		data = ucode->code[i];
+		/* write 8 bytes at a time */
+		offset = UCD_UCODE_LOAD_IDX_DATAX(i);
+		nitrox_write_csr(ndev, offset, data);
+		code_size -= 8;
+		i++;
+	}
+
+	/* put all SE cores in group 0 */
+	offset = POM_GRP_EXECMASKX(SE_GROUP);
+	nitrox_write_csr(ndev, offset, (~0ULL));
+
+	for (i = 0; i < ndev->hw.se_cores; i++) {
+		/*
+		 * write block number and firware length
+		 * bit:<2:0> block number
+		 * bit:3 is set SE uses 32KB microcode
+		 * bit:3 is clear SE uses 64KB microcode
+		 */
+		offset = UCD_SE_EID_UCODE_BLOCK_NUMX(i);
+		nitrox_write_csr(ndev, offset, 0x8);
+	}
+	usleep_range(300, 400);
+}
+
+static int nitrox_load_fw(struct nitrox_device *ndev, const char *fw_name)
+{
+	const struct firmware *fw;
+	struct ucode *ucode;
+	int ret;
+
+	dev_info(DEV(ndev), "Loading firmware \"%s\"\n", fw_name);
+
+	ret = request_firmware(&fw, fw_name, DEV(ndev));
+	if (ret < 0) {
+		dev_err(DEV(ndev), "failed to get firmware %s\n", fw_name);
+		return ret;
+	}
+
+	ucode = (struct ucode *)fw->data;
+	/* copy the firmware version */
+	memcpy(ndev->hw.fw_name, ucode->version, (VERSION_LEN - 2));
+	ndev->hw.fw_name[VERSION_LEN - 1] = '\0';
+
+	write_to_ucd_unit(ndev, ucode);
+	release_firmware(fw);
+
+	set_bit(NITROX_UCODE_LOADED, &ndev->status);
+	/* barrier to sync with other cpus */
+	smp_mb__after_atomic();
+	return 0;
+}
+
+/**
+ * nitrox_add_to_devlist - add NITROX device to global device list
+ * @ndev: NITROX device
+ */
+static int nitrox_add_to_devlist(struct nitrox_device *ndev)
+{
+	struct nitrox_device *dev;
+	int ret = 0;
+
+	INIT_LIST_HEAD(&ndev->list);
+	refcount_set(&ndev->refcnt, 1);
+
+	mutex_lock(&devlist_lock);
+	list_for_each_entry(dev, &ndevlist, list) {
+		if (dev == ndev) {
+			ret = -EEXIST;
+			goto unlock;
+		}
+	}
+	ndev->idx = num_devices++;
+	list_add_tail(&ndev->list, &ndevlist);
+unlock:
+	mutex_unlock(&devlist_lock);
+	return ret;
+}
+
+/**
+ * nitrox_remove_from_devlist - remove NITROX device from
+ *   global device list
+ * @ndev: NITROX device
+ */
+static void nitrox_remove_from_devlist(struct nitrox_device *ndev)
+{
+	mutex_lock(&devlist_lock);
+	list_del(&ndev->list);
+	num_devices--;
+	mutex_unlock(&devlist_lock);
+}
+
+struct nitrox_device *nitrox_get_first_device(void)
+{
+	struct nitrox_device *ndev = NULL;
+
+	mutex_lock(&devlist_lock);
+	list_for_each_entry(ndev, &ndevlist, list) {
+		if (nitrox_ready(ndev))
+			break;
+	}
+	mutex_unlock(&devlist_lock);
+	if (!ndev)
+		return NULL;
+
+	refcount_inc(&ndev->refcnt);
+	/* barrier to sync with other cpus */
+	smp_mb__after_atomic();
+	return ndev;
+}
+
+void nitrox_put_device(struct nitrox_device *ndev)
+{
+	if (!ndev)
+		return;
+
+	refcount_dec(&ndev->refcnt);
+	/* barrier to sync with other cpus */
+	smp_mb__after_atomic();
+}
+
+static int nitrox_reset_device(struct pci_dev *pdev)
+{
+	int pos = 0;
+
+	pos = pci_save_state(pdev);
+	if (pos) {
+		dev_err(&pdev->dev, "Failed to save pci state\n");
+		return -ENOMEM;
+	}
+
+	pos = pci_pcie_cap(pdev);
+	if (!pos)
+		return -ENOTTY;
+
+	if (!pci_wait_for_pending_transaction(pdev))
+		dev_err(&pdev->dev, "waiting for pending transaction\n");
+
+	pcie_capability_set_word(pdev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
+	msleep(100);
+	pci_restore_state(pdev);
+
+	return 0;
+}
+
+static int nitrox_pf_sw_init(struct nitrox_device *ndev)
+{
+	int err;
+
+	err = nitrox_common_sw_init(ndev);
+	if (err)
+		return err;
+
+	err = nitrox_pf_init_isr(ndev);
+	if (err)
+		nitrox_common_sw_cleanup(ndev);
+
+	return err;
+}
+
+static void nitrox_pf_sw_cleanup(struct nitrox_device *ndev)
+{
+	nitrox_pf_cleanup_isr(ndev);
+	nitrox_common_sw_cleanup(ndev);
+}
+
+/**
+ * nitrox_bist_check - Check NITORX BIST registers status
+ * @ndev: NITROX device
+ */
+static int nitrox_bist_check(struct nitrox_device *ndev)
+{
+	u64 value = 0;
+	int i;
+
+	for (i = 0; i < NR_CLUSTERS; i++) {
+		value += nitrox_read_csr(ndev, EMU_BIST_STATUSX(i));
+		value += nitrox_read_csr(ndev, EFL_CORE_BIST_REGX(i));
+	}
+	value += nitrox_read_csr(ndev, UCD_BIST_STATUS);
+	value += nitrox_read_csr(ndev, NPS_CORE_BIST_REG);
+	value += nitrox_read_csr(ndev, NPS_CORE_NPC_BIST_REG);
+	value += nitrox_read_csr(ndev, NPS_PKT_SLC_BIST_REG);
+	value += nitrox_read_csr(ndev, NPS_PKT_IN_BIST_REG);
+	value += nitrox_read_csr(ndev, POM_BIST_REG);
+	value += nitrox_read_csr(ndev, BMI_BIST_REG);
+	value += nitrox_read_csr(ndev, EFL_TOP_BIST_STAT);
+	value += nitrox_read_csr(ndev, BMO_BIST_REG);
+	value += nitrox_read_csr(ndev, LBC_BIST_STATUS);
+	value += nitrox_read_csr(ndev, PEM_BIST_STATUSX(0));
+	if (value)
+		return -EIO;
+	return 0;
+}
+
+static void nitrox_get_hwinfo(struct nitrox_device *ndev)
+{
+	union emu_fuse_map emu_fuse;
+	u64 offset;
+	int i;
+
+	for (i = 0; i < NR_CLUSTERS; i++) {
+		u8 dead_cores;
+
+		offset = EMU_FUSE_MAPX(i);
+		emu_fuse.value = nitrox_read_csr(ndev, offset);
+		if (emu_fuse.s.valid) {
+			dead_cores = hweight32(emu_fuse.s.ae_fuse);
+			ndev->hw.ae_cores += AE_CORES_PER_CLUSTER - dead_cores;
+			dead_cores = hweight16(emu_fuse.s.se_fuse);
+			ndev->hw.se_cores += SE_CORES_PER_CLUSTER - dead_cores;
+		}
+	}
+}
+
+static int nitrox_pf_hw_init(struct nitrox_device *ndev)
+{
+	int err;
+
+	err = nitrox_bist_check(ndev);
+	if (err) {
+		dev_err(&ndev->pdev->dev, "BIST check failed\n");
+		return err;
+	}
+	/* get cores information */
+	nitrox_get_hwinfo(ndev);
+
+	nitrox_config_nps_unit(ndev);
+	nitrox_config_pom_unit(ndev);
+	nitrox_config_efl_unit(ndev);
+	/* configure IO units */
+	nitrox_config_bmi_unit(ndev);
+	nitrox_config_bmo_unit(ndev);
+	/* configure Local Buffer Cache */
+	nitrox_config_lbc_unit(ndev);
+	nitrox_config_rand_unit(ndev);
+
+	/* load firmware on SE cores */
+	err = nitrox_load_fw(ndev, SE_FW);
+	if (err)
+		return err;
+
+	nitrox_config_emu_unit(ndev);
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+static int registers_show(struct seq_file *s, void *v)
+{
+	struct nitrox_device *ndev = s->private;
+	u64 offset;
+
+	/* NPS DMA stats */
+	offset = NPS_STATS_PKT_DMA_RD_CNT;
+	seq_printf(s, "NPS_STATS_PKT_DMA_RD_CNT  0x%016llx\n",
+		   nitrox_read_csr(ndev, offset));
+	offset = NPS_STATS_PKT_DMA_WR_CNT;
+	seq_printf(s, "NPS_STATS_PKT_DMA_WR_CNT  0x%016llx\n",
+		   nitrox_read_csr(ndev, offset));
+
+	/* BMI/BMO stats */
+	offset = BMI_NPS_PKT_CNT;
+	seq_printf(s, "BMI_NPS_PKT_CNT  0x%016llx\n",
+		   nitrox_read_csr(ndev, offset));
+	offset = BMO_NPS_SLC_PKT_CNT;
+	seq_printf(s, "BMO_NPS_PKT_CNT  0x%016llx\n",
+		   nitrox_read_csr(ndev, offset));
+
+	return 0;
+}
+
+static int registers_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, registers_show, inode->i_private);
+}
+
+static const struct file_operations register_fops = {
+	.owner = THIS_MODULE,
+	.open = registers_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int firmware_show(struct seq_file *s, void *v)
+{
+	struct nitrox_device *ndev = s->private;
+
+	seq_printf(s, "Version: %s\n", ndev->hw.fw_name);
+	return 0;
+}
+
+static int firmware_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, firmware_show, inode->i_private);
+}
+
+static const struct file_operations firmware_fops = {
+	.owner = THIS_MODULE,
+	.open = firmware_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int nitrox_show(struct seq_file *s, void *v)
+{
+	struct nitrox_device *ndev = s->private;
+
+	seq_printf(s, "NITROX-5 [idx: %d]\n", ndev->idx);
+	seq_printf(s, "  Revision ID: 0x%0x\n", ndev->hw.revision_id);
+	seq_printf(s, "  Cores [AE: %u  SE: %u]\n",
+		   ndev->hw.ae_cores, ndev->hw.se_cores);
+	seq_printf(s, "  Number of Queues: %u\n", ndev->nr_queues);
+	seq_printf(s, "  Queue length: %u\n", ndev->qlen);
+	seq_printf(s, "  Node: %u\n", ndev->node);
+
+	return 0;
+}
+
+static int nitrox_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, nitrox_show, inode->i_private);
+}
+
+static const struct file_operations nitrox_fops = {
+	.owner = THIS_MODULE,
+	.open = nitrox_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void nitrox_debugfs_exit(struct nitrox_device *ndev)
+{
+	debugfs_remove_recursive(ndev->debugfs_dir);
+	ndev->debugfs_dir = NULL;
+}
+
+static int nitrox_debugfs_init(struct nitrox_device *ndev)
+{
+	struct dentry *dir, *f;
+
+	dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!dir)
+		return -ENOMEM;
+
+	ndev->debugfs_dir = dir;
+	f = debugfs_create_file("counters", 0400, dir, ndev, &register_fops);
+	if (!f)
+		goto err;
+	f = debugfs_create_file("firmware", 0400, dir, ndev, &firmware_fops);
+	if (!f)
+		goto err;
+	f = debugfs_create_file("nitrox", 0400, dir, ndev, &nitrox_fops);
+	if (!f)
+		goto err;
+
+	return 0;
+
+err:
+	nitrox_debugfs_exit(ndev);
+	return -ENODEV;
+}
+#else
+static int nitrox_debugfs_init(struct nitrox_device *ndev)
+{
+	return 0;
+}
+
+static void nitrox_debugfs_exit(struct nitrox_device *ndev)
+{
+}
+#endif
+
+/**
+ * nitrox_probe - NITROX Initialization function.
+ * @pdev: PCI device information struct
+ * @id: entry in nitrox_pci_tbl
+ *
+ * Return: 0, if the driver is bound to the device, or
+ *         a negative error if there is failure.
+ */
+static int nitrox_probe(struct pci_dev *pdev,
+			const struct pci_device_id *id)
+{
+	struct nitrox_device *ndev;
+	int err;
+
+	dev_info_once(&pdev->dev, "%s driver version %s\n",
+		      nitrox_driver_name, DRIVER_VERSION);
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+	/* do FLR */
+	err = nitrox_reset_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "FLR failed\n");
+		pci_disable_device(pdev);
+		return err;
+	}
+
+	if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
+		dev_dbg(&pdev->dev, "DMA to 64-BIT address\n");
+	} else {
+		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev, "DMA configuration failed\n");
+			pci_disable_device(pdev);
+			return err;
+		}
+	}
+
+	err = pci_request_mem_regions(pdev, nitrox_driver_name);
+	if (err) {
+		pci_disable_device(pdev);
+		return err;
+	}
+	pci_set_master(pdev);
+
+	ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
+	if (!ndev)
+		goto ndev_fail;
+
+	pci_set_drvdata(pdev, ndev);
+	ndev->pdev = pdev;
+
+	/* add to device list */
+	nitrox_add_to_devlist(ndev);
+
+	ndev->hw.vendor_id = pdev->vendor;
+	ndev->hw.device_id = pdev->device;
+	ndev->hw.revision_id = pdev->revision;
+	/* command timeout in jiffies */
+	ndev->timeout = msecs_to_jiffies(CMD_TIMEOUT);
+	ndev->node = dev_to_node(&pdev->dev);
+	if (ndev->node == NUMA_NO_NODE)
+		ndev->node = 0;
+
+	ndev->bar_addr = ioremap(pci_resource_start(pdev, 0),
+				 pci_resource_len(pdev, 0));
+	if (!ndev->bar_addr) {
+		err = -EIO;
+		goto ioremap_err;
+	}
+	/* allocate command queus based on cpus, max queues are 64 */
+	ndev->nr_queues = min_t(u32, MAX_PF_QUEUES, num_online_cpus());
+	ndev->qlen = qlen;
+
+	err = nitrox_pf_sw_init(ndev);
+	if (err)
+		goto ioremap_err;
+
+	err = nitrox_pf_hw_init(ndev);
+	if (err)
+		goto pf_hw_fail;
+
+	err = nitrox_debugfs_init(ndev);
+	if (err)
+		goto pf_hw_fail;
+
+	set_bit(NITROX_READY, &ndev->status);
+	/* barrier to sync with other cpus */
+	smp_mb__after_atomic();
+
+	err = nitrox_crypto_register();
+	if (err)
+		goto crypto_fail;
+
+	return 0;
+
+crypto_fail:
+	nitrox_debugfs_exit(ndev);
+	clear_bit(NITROX_READY, &ndev->status);
+	/* barrier to sync with other cpus */
+	smp_mb__after_atomic();
+pf_hw_fail:
+	nitrox_pf_sw_cleanup(ndev);
+ioremap_err:
+	nitrox_remove_from_devlist(ndev);
+	kfree(ndev);
+	pci_set_drvdata(pdev, NULL);
+ndev_fail:
+	pci_release_mem_regions(pdev);
+	pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * nitrox_remove - Unbind the driver from the device.
+ * @pdev: PCI device information struct
+ */
+static void nitrox_remove(struct pci_dev *pdev)
+{
+	struct nitrox_device *ndev = pci_get_drvdata(pdev);
+
+	if (!ndev)
+		return;
+
+	if (!refcount_dec_and_test(&ndev->refcnt)) {
+		dev_err(DEV(ndev), "Device refcnt not zero (%d)\n",
+			refcount_read(&ndev->refcnt));
+		return;
+	}
+
+	dev_info(DEV(ndev), "Removing Device %x:%x\n",
+		 ndev->hw.vendor_id, ndev->hw.device_id);
+
+	clear_bit(NITROX_READY, &ndev->status);
+	/* barrier to sync with other cpus */
+	smp_mb__after_atomic();
+
+	nitrox_remove_from_devlist(ndev);
+	nitrox_crypto_unregister();
+	nitrox_debugfs_exit(ndev);
+	nitrox_pf_sw_cleanup(ndev);
+
+	iounmap(ndev->bar_addr);
+	kfree(ndev);
+
+	pci_set_drvdata(pdev, NULL);
+	pci_release_mem_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static void nitrox_shutdown(struct pci_dev *pdev)
+{
+	pci_set_drvdata(pdev, NULL);
+	pci_release_mem_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static struct pci_driver nitrox_driver = {
+	.name = nitrox_driver_name,
+	.id_table = nitrox_pci_tbl,
+	.probe = nitrox_probe,
+	.remove	= nitrox_remove,
+	.shutdown = nitrox_shutdown,
+};
+
+module_pci_driver(nitrox_driver);
+
+MODULE_AUTHOR("Srikanth Jampala <Jampala.Srikanth@cavium.com>");
+MODULE_DESCRIPTION("Cavium CNN55XX PF Driver" DRIVER_VERSION " ");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_FIRMWARE(SE_FW);

+ 445 - 0
drivers/crypto/cavium/nitrox/nitrox_req.h

@@ -0,0 +1,445 @@
+#ifndef __NITROX_REQ_H
+#define __NITROX_REQ_H
+
+#include <linux/dma-mapping.h>
+#include <crypto/aes.h>
+
+#include "nitrox_dev.h"
+
+/**
+ * struct gphdr - General purpose Header
+ * @param0: first parameter.
+ * @param1: second parameter.
+ * @param2: third parameter.
+ * @param3: fourth parameter.
+ *
+ * Params tell the iv and enc/dec data offsets.
+ */
+struct gphdr {
+	__be16 param0;
+	__be16 param1;
+	__be16 param2;
+	__be16 param3;
+};
+
+/**
+ * struct se_req_ctrl - SE request information.
+ * @arg: Minor number of the opcode
+ * @ctxc: Context control.
+ * @unca: Uncertainity enabled.
+ * @info: Additional information for SE cores.
+ * @ctxl: Context length in bytes.
+ * @uddl: User defined data length
+ */
+union se_req_ctrl {
+	u64 value;
+	struct {
+		u64 raz	: 22;
+		u64 arg	: 8;
+		u64 ctxc : 2;
+		u64 unca : 1;
+		u64 info : 3;
+		u64 unc : 8;
+		u64 ctxl : 12;
+		u64 uddl : 8;
+	} s;
+};
+
+struct nitrox_sglist {
+	u16 len;
+	u16 raz0;
+	u32 raz1;
+	dma_addr_t dma;
+};
+
+#define MAX_IV_LEN 16
+
+/**
+ * struct se_crypto_request - SE crypto request structure.
+ * @opcode: Request opcode (enc/dec)
+ * @flags: flags from crypto subsystem
+ * @ctx_handle: Crypto context handle.
+ * @gph: GP Header
+ * @ctrl: Request Information.
+ * @in: Input sglist
+ * @out: Output sglist
+ */
+struct se_crypto_request {
+	u8 opcode;
+	gfp_t gfp;
+	u32 flags;
+	u64 ctx_handle;
+
+	struct gphdr gph;
+	union se_req_ctrl ctrl;
+
+	u8 iv[MAX_IV_LEN];
+	u16 ivsize;
+
+	struct scatterlist *src;
+	struct scatterlist *dst;
+};
+
+/* Crypto opcodes */
+#define FLEXI_CRYPTO_ENCRYPT_HMAC	0x33
+#define ENCRYPT	0
+#define DECRYPT 1
+
+/* IV from context */
+#define IV_FROM_CTX	0
+/* IV from Input data */
+#define IV_FROM_DPTR	1
+
+/**
+ * cipher opcodes for firmware
+ */
+enum flexi_cipher {
+	CIPHER_NULL = 0,
+	CIPHER_3DES_CBC,
+	CIPHER_3DES_ECB,
+	CIPHER_AES_CBC,
+	CIPHER_AES_ECB,
+	CIPHER_AES_CFB,
+	CIPHER_AES_CTR,
+	CIPHER_AES_GCM,
+	CIPHER_AES_XTS,
+	CIPHER_AES_CCM,
+	CIPHER_AES_CBC_CTS,
+	CIPHER_AES_ECB_CTS,
+	CIPHER_INVALID
+};
+
+/**
+ * struct crypto_keys - Crypto keys
+ * @key: Encryption key or KEY1 for AES-XTS
+ * @iv: Encryption IV or Tweak for AES-XTS
+ */
+struct crypto_keys {
+	union {
+		u8 key[AES_MAX_KEY_SIZE];
+		u8 key1[AES_MAX_KEY_SIZE];
+	} u;
+	u8 iv[AES_BLOCK_SIZE];
+};
+
+/**
+ * struct auth_keys - Authentication keys
+ * @ipad: IPAD or KEY2 for AES-XTS
+ * @opad: OPAD or AUTH KEY if auth_input_type = 1
+ */
+struct auth_keys {
+	union {
+		u8 ipad[64];
+		u8 key2[64];
+	} u;
+	u8 opad[64];
+};
+
+/**
+ * struct flexi_crypto_context - Crypto context
+ * @cipher_type: Encryption cipher type
+ * @aes_keylen: AES key length
+ * @iv_source: Encryption IV source
+ * @hash_type: Authentication type
+ * @auth_input_type: Authentication input type
+ *   1 - Authentication IV and KEY, microcode calculates OPAD/IPAD
+ *   0 - Authentication OPAD/IPAD
+ * @mac_len: mac length
+ * @crypto: Crypto keys
+ * @auth: Authentication keys
+ */
+struct flexi_crypto_context {
+	union {
+		__be64 flags;
+		struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+			u64 cipher_type	: 4;
+			u64 reserved_59	: 1;
+			u64 aes_keylen : 2;
+			u64 iv_source : 1;
+			u64 hash_type : 4;
+			u64 reserved_49_51 : 3;
+			u64 auth_input_type: 1;
+			u64 mac_len : 8;
+			u64 reserved_0_39 : 40;
+#else
+			u64 reserved_0_39 : 40;
+			u64 mac_len : 8;
+			u64 auth_input_type: 1;
+			u64 reserved_49_51 : 3;
+			u64 hash_type : 4;
+			u64 iv_source : 1;
+			u64 aes_keylen : 2;
+			u64 reserved_59	: 1;
+			u64 cipher_type	: 4;
+#endif
+		} w0;
+	};
+
+	struct crypto_keys crypto;
+	struct auth_keys auth;
+};
+
+struct nitrox_crypto_ctx {
+	struct nitrox_device *ndev;
+	union {
+		u64 ctx_handle;
+		struct flexi_crypto_context *fctx;
+	} u;
+};
+
+struct nitrox_kcrypt_request {
+	struct se_crypto_request creq;
+	struct nitrox_crypto_ctx *nctx;
+	struct skcipher_request *skreq;
+};
+
+/**
+ * struct pkt_instr_hdr - Packet Instruction Header
+ * @g: Gather used
+ *   When [G] is set and [GSZ] != 0, the instruction is
+ *   indirect gather instruction.
+ *   When [G] is set and [GSZ] = 0, the instruction is
+ *   direct gather instruction.
+ * @gsz: Number of pointers in the indirect gather list
+ * @ihi: When set hardware duplicates the 1st 8 bytes of pkt_instr_hdr
+ *   and adds them to the packet after the pkt_instr_hdr but before any UDD
+ * @ssz: Not used by the input hardware. But can become slc_store_int[SSZ]
+ *   when [IHI] is set.
+ * @fsz: The number of front data bytes directly included in the
+ *   PCIe instruction.
+ * @tlen: The length of the input packet in bytes, include:
+ *   - 16B pkt_hdr
+ *   - Inline context bytes if any,
+ *   - UDD if any,
+ *   - packet payload bytes
+ */
+union pkt_instr_hdr {
+	u64 value;
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 raz_48_63 : 16;
+		u64 g : 1;
+		u64 gsz	: 7;
+		u64 ihi	: 1;
+		u64 ssz	: 7;
+		u64 raz_30_31 : 2;
+		u64 fsz	: 6;
+		u64 raz_16_23 : 8;
+		u64 tlen : 16;
+#else
+		u64 tlen : 16;
+		u64 raz_16_23 : 8;
+		u64 fsz	: 6;
+		u64 raz_30_31 : 2;
+		u64 ssz	: 7;
+		u64 ihi	: 1;
+		u64 gsz	: 7;
+		u64 g : 1;
+		u64 raz_48_63 : 16;
+#endif
+	} s;
+};
+
+/**
+ * struct pkt_hdr - Packet Input Header
+ * @opcode: Request opcode (Major)
+ * @arg: Request opcode (Minor)
+ * @ctxc: Context control.
+ * @unca: When set [UNC] is the uncertainty count for an input packet.
+ *        The hardware uses uncertainty counts to predict
+ *        output buffer use and avoid deadlock.
+ * @info: Not used by input hardware. Available for use
+ *        during SE processing.
+ * @destport: The expected destination port/ring/channel for the packet.
+ * @unc: Uncertainty count for an input packet.
+ * @grp: SE group that will process the input packet.
+ * @ctxl: Context Length in 64-bit words.
+ * @uddl: User-defined data (UDD) length in bytes.
+ * @ctxp: Context pointer. CTXP<63,2:0> must be zero in all cases.
+ */
+union pkt_hdr {
+	u64 value[2];
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 opcode : 8;
+		u64 arg	: 8;
+		u64 ctxc : 2;
+		u64 unca : 1;
+		u64 raz_44 : 1;
+		u64 info : 3;
+		u64 destport : 9;
+		u64 unc	: 8;
+		u64 raz_19_23 : 5;
+		u64 grp	: 3;
+		u64 raz_15 : 1;
+		u64 ctxl : 7;
+		u64 uddl : 8;
+#else
+		u64 uddl : 8;
+		u64 ctxl : 7;
+		u64 raz_15 : 1;
+		u64 grp	: 3;
+		u64 raz_19_23 : 5;
+		u64 unc	: 8;
+		u64 destport : 9;
+		u64 info : 3;
+		u64 raz_44 : 1;
+		u64 unca : 1;
+		u64 ctxc : 2;
+		u64 arg	: 8;
+		u64 opcode : 8;
+#endif
+		__be64 ctxp;
+	} s;
+};
+
+/**
+ * struct slc_store_info - Solicited Paceket Output Store Information.
+ * @ssz: The number of scatterlist pointers for the solicited output port
+ *       packet.
+ * @rptr: The result pointer for the solicited output port packet.
+ *        If [SSZ]=0, [RPTR] must point directly to a buffer on the remote
+ *        host that is large enough to hold the entire output packet.
+ *        If [SSZ]!=0, [RPTR] must point to an array of ([SSZ]+3)/4
+ *        sglist components at [RPTR] on the remote host.
+ */
+union slc_store_info {
+	u64 value[2];
+	struct {
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 raz_39_63 : 25;
+		u64 ssz	: 7;
+		u64 raz_0_31 : 32;
+#else
+		u64 raz_0_31 : 32;
+		u64 ssz	: 7;
+		u64 raz_39_63 : 25;
+#endif
+		__be64 rptr;
+	} s;
+};
+
+/**
+ * struct nps_pkt_instr - NPS Packet Instruction of SE cores.
+ * @dptr0 : Input pointer points to buffer in remote host.
+ * @ih: Packet Instruction Header (8 bytes)
+ * @irh: Packet Input Header (16 bytes)
+ * @slc: Solicited Packet Output Store Information (16 bytes)
+ * @fdata: Front data
+ *
+ * 64-Byte Instruction Format
+ */
+struct nps_pkt_instr {
+	__be64 dptr0;
+	union pkt_instr_hdr ih;
+	union pkt_hdr irh;
+	union slc_store_info slc;
+	u64 fdata[2];
+};
+
+/**
+ * struct ctx_hdr - Book keeping data about the crypto context
+ * @pool: Pool used to allocate crypto context
+ * @dma: Base DMA address of the cypto context
+ * @ctx_dma: Actual usable crypto context for NITROX
+ */
+struct ctx_hdr {
+	struct dma_pool *pool;
+	dma_addr_t dma;
+	dma_addr_t ctx_dma;
+};
+
+/*
+ * struct sglist_component - SG list component format
+ * @len0: The number of bytes at [PTR0] on the remote host.
+ * @len1: The number of bytes at [PTR1] on the remote host.
+ * @len2: The number of bytes at [PTR2] on the remote host.
+ * @len3: The number of bytes at [PTR3] on the remote host.
+ * @dma0: First pointer point to buffer in remote host.
+ * @dma1: Second pointer point to buffer in remote host.
+ * @dma2: Third pointer point to buffer in remote host.
+ * @dma3: Fourth pointer point to buffer in remote host.
+ */
+struct nitrox_sgcomp {
+	__be16 len[4];
+	__be64 dma[4];
+};
+
+/*
+ * strutct nitrox_sgtable - SG list information
+ * @map_cnt: Number of buffers mapped
+ * @nr_comp: Number of sglist components
+ * @total_bytes: Total bytes in sglist.
+ * @len: Total sglist components length.
+ * @dma: DMA address of sglist component.
+ * @dir: DMA direction.
+ * @buf: crypto request buffer.
+ * @sglist: SG list of input/output buffers.
+ * @sgcomp: sglist component for NITROX.
+ */
+struct nitrox_sgtable {
+	u8 map_bufs_cnt;
+	u8 nr_sgcomp;
+	u16 total_bytes;
+	u32 len;
+	dma_addr_t dma;
+	enum dma_data_direction dir;
+
+	struct scatterlist *buf;
+	struct nitrox_sglist *sglist;
+	struct nitrox_sgcomp *sgcomp;
+};
+
+/* Response Header Length */
+#define ORH_HLEN	8
+/* Completion bytes Length */
+#define COMP_HLEN	8
+
+struct resp_hdr {
+	u64 orh;
+	dma_addr_t orh_dma;
+	u64 completion;
+	dma_addr_t completion_dma;
+};
+
+typedef void (*completion_t)(struct skcipher_request *skreq, int err);
+
+/**
+ * struct nitrox_softreq - Represents the NIROX Request.
+ * @response: response list entry
+ * @backlog: Backlog list entry
+ * @ndev: Device used to submit the request
+ * @cmdq: Command queue for submission
+ * @resp: Response headers
+ * @instr: 64B instruction
+ * @in: SG table for input
+ * @out SG table for output
+ * @tstamp: Request submitted time in jiffies
+ * @callback: callback after request completion/timeout
+ * @cb_arg: callback argument
+ */
+struct nitrox_softreq {
+	struct list_head response;
+	struct list_head backlog;
+
+	u32 flags;
+	gfp_t gfp;
+	atomic_t status;
+	bool inplace;
+
+	struct nitrox_device *ndev;
+	struct nitrox_cmdq *cmdq;
+
+	struct nps_pkt_instr instr;
+	struct resp_hdr resp;
+	struct nitrox_sgtable in;
+	struct nitrox_sgtable out;
+
+	unsigned long tstamp;
+
+	completion_t callback;
+	struct skcipher_request *skreq;
+};
+
+#endif /* __NITROX_REQ_H */

+ 735 - 0
drivers/crypto/cavium/nitrox/nitrox_reqmgr.c

@@ -0,0 +1,735 @@
+#include <linux/gfp.h>
+#include <linux/workqueue.h>
+#include <crypto/internal/skcipher.h>
+
+#include "nitrox_dev.h"
+#include "nitrox_req.h"
+#include "nitrox_csr.h"
+#include "nitrox_req.h"
+
+/* SLC_STORE_INFO */
+#define MIN_UDD_LEN 16
+/* PKT_IN_HDR + SLC_STORE_INFO */
+#define FDATA_SIZE 32
+/* Base destination port for the solicited requests */
+#define SOLICIT_BASE_DPORT 256
+#define PENDING_SIG	0xFFFFFFFFFFFFFFFFUL
+
+#define REQ_NOT_POSTED 1
+#define REQ_BACKLOG    2
+#define REQ_POSTED     3
+
+/**
+ * Response codes from SE microcode
+ * 0x00 - Success
+ *   Completion with no error
+ * 0x43 - ERR_GC_DATA_LEN_INVALID
+ *   Invalid Data length if Encryption Data length is
+ *   less than 16 bytes for AES-XTS and AES-CTS.
+ * 0x45 - ERR_GC_CTX_LEN_INVALID
+ *   Invalid context length: CTXL != 23 words.
+ * 0x4F - ERR_GC_DOCSIS_CIPHER_INVALID
+ *   DOCSIS support is enabled with other than
+ *   AES/DES-CBC mode encryption.
+ * 0x50 - ERR_GC_DOCSIS_OFFSET_INVALID
+ *   Authentication offset is other than 0 with
+ *   Encryption IV source = 0.
+ *   Authentication offset is other than 8 (DES)/16 (AES)
+ *   with Encryption IV source = 1
+ * 0x51 - ERR_GC_CRC32_INVALID_SELECTION
+ *   CRC32 is enabled for other than DOCSIS encryption.
+ * 0x52 - ERR_GC_AES_CCM_FLAG_INVALID
+ *   Invalid flag options in AES-CCM IV.
+ */
+
+/**
+ * dma_free_sglist - unmap and free the sg lists.
+ * @ndev: N5 device
+ * @sgtbl: SG table
+ */
+static void softreq_unmap_sgbufs(struct nitrox_softreq *sr)
+{
+	struct nitrox_device *ndev = sr->ndev;
+	struct device *dev = DEV(ndev);
+	struct nitrox_sglist *sglist;
+
+	/* unmap in sgbuf */
+	sglist = sr->in.sglist;
+	if (!sglist)
+		goto out_unmap;
+
+	/* unmap iv */
+	dma_unmap_single(dev, sglist->dma, sglist->len, DMA_BIDIRECTIONAL);
+	/* unmpa src sglist */
+	dma_unmap_sg(dev, sr->in.buf, (sr->in.map_bufs_cnt - 1), sr->in.dir);
+	/* unamp gather component */
+	dma_unmap_single(dev, sr->in.dma, sr->in.len, DMA_TO_DEVICE);
+	kfree(sr->in.sglist);
+	kfree(sr->in.sgcomp);
+	sr->in.sglist = NULL;
+	sr->in.buf = NULL;
+	sr->in.map_bufs_cnt = 0;
+
+out_unmap:
+	/* unmap out sgbuf */
+	sglist = sr->out.sglist;
+	if (!sglist)
+		return;
+
+	/* unmap orh */
+	dma_unmap_single(dev, sr->resp.orh_dma, ORH_HLEN, sr->out.dir);
+
+	/* unmap dst sglist */
+	if (!sr->inplace) {
+		dma_unmap_sg(dev, sr->out.buf, (sr->out.map_bufs_cnt - 3),
+			     sr->out.dir);
+	}
+	/* unmap completion */
+	dma_unmap_single(dev, sr->resp.completion_dma, COMP_HLEN, sr->out.dir);
+
+	/* unmap scatter component */
+	dma_unmap_single(dev, sr->out.dma, sr->out.len, DMA_TO_DEVICE);
+	kfree(sr->out.sglist);
+	kfree(sr->out.sgcomp);
+	sr->out.sglist = NULL;
+	sr->out.buf = NULL;
+	sr->out.map_bufs_cnt = 0;
+}
+
+static void softreq_destroy(struct nitrox_softreq *sr)
+{
+	softreq_unmap_sgbufs(sr);
+	kfree(sr);
+}
+
+/**
+ * create_sg_component - create SG componets for N5 device.
+ * @sr: Request structure
+ * @sgtbl: SG table
+ * @nr_comp: total number of components required
+ *
+ * Component structure
+ *
+ *   63     48 47     32 31    16 15      0
+ *   --------------------------------------
+ *   |   LEN0  |  LEN1  |  LEN2  |  LEN3  |
+ *   |-------------------------------------
+ *   |               PTR0                 |
+ *   --------------------------------------
+ *   |               PTR1                 |
+ *   --------------------------------------
+ *   |               PTR2                 |
+ *   --------------------------------------
+ *   |               PTR3                 |
+ *   --------------------------------------
+ *
+ *   Returns 0 if success or a negative errno code on error.
+ */
+static int create_sg_component(struct nitrox_softreq *sr,
+			       struct nitrox_sgtable *sgtbl, int map_nents)
+{
+	struct nitrox_device *ndev = sr->ndev;
+	struct nitrox_sgcomp *sgcomp;
+	struct nitrox_sglist *sglist;
+	dma_addr_t dma;
+	size_t sz_comp;
+	int i, j, nr_sgcomp;
+
+	nr_sgcomp = roundup(map_nents, 4) / 4;
+
+	/* each component holds 4 dma pointers */
+	sz_comp = nr_sgcomp * sizeof(*sgcomp);
+	sgcomp = kzalloc(sz_comp, sr->gfp);
+	if (!sgcomp)
+		return -ENOMEM;
+
+	sgtbl->sgcomp = sgcomp;
+	sgtbl->nr_sgcomp = nr_sgcomp;
+
+	sglist = sgtbl->sglist;
+	/* populate device sg component */
+	for (i = 0; i < nr_sgcomp; i++) {
+		for (j = 0; j < 4; j++) {
+			sgcomp->len[j] = cpu_to_be16(sglist->len);
+			sgcomp->dma[j] = cpu_to_be64(sglist->dma);
+			sglist++;
+		}
+		sgcomp++;
+	}
+	/* map the device sg component */
+	dma = dma_map_single(DEV(ndev), sgtbl->sgcomp, sz_comp, DMA_TO_DEVICE);
+	if (dma_mapping_error(DEV(ndev), dma)) {
+		kfree(sgtbl->sgcomp);
+		sgtbl->sgcomp = NULL;
+		return -ENOMEM;
+	}
+
+	sgtbl->dma = dma;
+	sgtbl->len = sz_comp;
+
+	return 0;
+}
+
+/**
+ * dma_map_inbufs - DMA map input sglist and creates sglist component
+ *                  for N5 device.
+ * @sr: Request structure
+ * @req: Crypto request structre
+ *
+ * Returns 0 if successful or a negative errno code on error.
+ */
+static int dma_map_inbufs(struct nitrox_softreq *sr,
+			  struct se_crypto_request *req)
+{
+	struct device *dev = DEV(sr->ndev);
+	struct scatterlist *sg = req->src;
+	struct nitrox_sglist *glist;
+	int i, nents, ret = 0;
+	dma_addr_t dma;
+	size_t sz;
+
+	nents = sg_nents(req->src);
+
+	/* creater gather list IV and src entries */
+	sz = roundup((1 + nents), 4) * sizeof(*glist);
+	glist = kzalloc(sz, sr->gfp);
+	if (!glist)
+		return -ENOMEM;
+
+	sr->in.sglist = glist;
+	/* map IV */
+	dma = dma_map_single(dev, &req->iv, req->ivsize, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, dma)) {
+		ret = -EINVAL;
+		goto iv_map_err;
+	}
+
+	sr->in.dir = (req->src == req->dst) ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+	/* map src entries */
+	nents = dma_map_sg(dev, req->src, nents, sr->in.dir);
+	if (!nents) {
+		ret = -EINVAL;
+		goto src_map_err;
+	}
+	sr->in.buf = req->src;
+
+	/* store the mappings */
+	glist->len = req->ivsize;
+	glist->dma = dma;
+	glist++;
+	sr->in.total_bytes += req->ivsize;
+
+	for_each_sg(req->src, sg, nents, i) {
+		glist->len = sg_dma_len(sg);
+		glist->dma = sg_dma_address(sg);
+		sr->in.total_bytes += glist->len;
+		glist++;
+	}
+	/* roundup map count to align with entires in sg component */
+	sr->in.map_bufs_cnt = (1 + nents);
+
+	/* create NITROX gather component */
+	ret = create_sg_component(sr, &sr->in, sr->in.map_bufs_cnt);
+	if (ret)
+		goto incomp_err;
+
+	return 0;
+
+incomp_err:
+	dma_unmap_sg(dev, req->src, nents, sr->in.dir);
+	sr->in.map_bufs_cnt = 0;
+src_map_err:
+	dma_unmap_single(dev, dma, req->ivsize, DMA_BIDIRECTIONAL);
+iv_map_err:
+	kfree(sr->in.sglist);
+	sr->in.sglist = NULL;
+	return ret;
+}
+
+static int dma_map_outbufs(struct nitrox_softreq *sr,
+			   struct se_crypto_request *req)
+{
+	struct device *dev = DEV(sr->ndev);
+	struct nitrox_sglist *glist = sr->in.sglist;
+	struct nitrox_sglist *slist;
+	struct scatterlist *sg;
+	int i, nents, map_bufs_cnt, ret = 0;
+	size_t sz;
+
+	nents = sg_nents(req->dst);
+
+	/* create scatter list ORH, IV, dst entries and Completion header */
+	sz = roundup((3 + nents), 4) * sizeof(*slist);
+	slist = kzalloc(sz, sr->gfp);
+	if (!slist)
+		return -ENOMEM;
+
+	sr->out.sglist = slist;
+	sr->out.dir = DMA_BIDIRECTIONAL;
+	/* map ORH */
+	sr->resp.orh_dma = dma_map_single(dev, &sr->resp.orh, ORH_HLEN,
+					  sr->out.dir);
+	if (dma_mapping_error(dev, sr->resp.orh_dma)) {
+		ret = -EINVAL;
+		goto orh_map_err;
+	}
+
+	/* map completion */
+	sr->resp.completion_dma = dma_map_single(dev, &sr->resp.completion,
+						 COMP_HLEN, sr->out.dir);
+	if (dma_mapping_error(dev, sr->resp.completion_dma)) {
+		ret = -EINVAL;
+		goto compl_map_err;
+	}
+
+	sr->inplace = (req->src == req->dst) ? true : false;
+	/* out place */
+	if (!sr->inplace) {
+		nents = dma_map_sg(dev, req->dst, nents, sr->out.dir);
+		if (!nents) {
+			ret = -EINVAL;
+			goto dst_map_err;
+		}
+	}
+	sr->out.buf = req->dst;
+
+	/* store the mappings */
+	/* orh */
+	slist->len = ORH_HLEN;
+	slist->dma = sr->resp.orh_dma;
+	slist++;
+
+	/* copy the glist mappings */
+	if (sr->inplace) {
+		nents = sr->in.map_bufs_cnt - 1;
+		map_bufs_cnt = sr->in.map_bufs_cnt;
+		while (map_bufs_cnt--) {
+			slist->len = glist->len;
+			slist->dma = glist->dma;
+			slist++;
+			glist++;
+		}
+	} else {
+		/* copy iv mapping */
+		slist->len = glist->len;
+		slist->dma = glist->dma;
+		slist++;
+		/* copy remaining maps */
+		for_each_sg(req->dst, sg, nents, i) {
+			slist->len = sg_dma_len(sg);
+			slist->dma = sg_dma_address(sg);
+			slist++;
+		}
+	}
+
+	/* completion */
+	slist->len = COMP_HLEN;
+	slist->dma = sr->resp.completion_dma;
+
+	sr->out.map_bufs_cnt = (3 + nents);
+
+	ret = create_sg_component(sr, &sr->out, sr->out.map_bufs_cnt);
+	if (ret)
+		goto outcomp_map_err;
+
+	return 0;
+
+outcomp_map_err:
+	if (!sr->inplace)
+		dma_unmap_sg(dev, req->dst, nents, sr->out.dir);
+	sr->out.map_bufs_cnt = 0;
+	sr->out.buf = NULL;
+dst_map_err:
+	dma_unmap_single(dev, sr->resp.completion_dma, COMP_HLEN, sr->out.dir);
+	sr->resp.completion_dma = 0;
+compl_map_err:
+	dma_unmap_single(dev, sr->resp.orh_dma, ORH_HLEN, sr->out.dir);
+	sr->resp.orh_dma = 0;
+orh_map_err:
+	kfree(sr->out.sglist);
+	sr->out.sglist = NULL;
+	return ret;
+}
+
+static inline int softreq_map_iobuf(struct nitrox_softreq *sr,
+				    struct se_crypto_request *creq)
+{
+	int ret;
+
+	ret = dma_map_inbufs(sr, creq);
+	if (ret)
+		return ret;
+
+	ret = dma_map_outbufs(sr, creq);
+	if (ret)
+		softreq_unmap_sgbufs(sr);
+
+	return ret;
+}
+
+static inline void backlog_list_add(struct nitrox_softreq *sr,
+				    struct nitrox_cmdq *cmdq)
+{
+	INIT_LIST_HEAD(&sr->backlog);
+
+	spin_lock_bh(&cmdq->backlog_lock);
+	list_add_tail(&sr->backlog, &cmdq->backlog_head);
+	atomic_inc(&cmdq->backlog_count);
+	atomic_set(&sr->status, REQ_BACKLOG);
+	spin_unlock_bh(&cmdq->backlog_lock);
+}
+
+static inline void response_list_add(struct nitrox_softreq *sr,
+				     struct nitrox_cmdq *cmdq)
+{
+	INIT_LIST_HEAD(&sr->response);
+
+	spin_lock_bh(&cmdq->response_lock);
+	list_add_tail(&sr->response, &cmdq->response_head);
+	spin_unlock_bh(&cmdq->response_lock);
+}
+
+static inline void response_list_del(struct nitrox_softreq *sr,
+				     struct nitrox_cmdq *cmdq)
+{
+	spin_lock_bh(&cmdq->response_lock);
+	list_del(&sr->response);
+	spin_unlock_bh(&cmdq->response_lock);
+}
+
+static struct nitrox_softreq *
+get_first_response_entry(struct nitrox_cmdq *cmdq)
+{
+	return list_first_entry_or_null(&cmdq->response_head,
+					struct nitrox_softreq, response);
+}
+
+static inline bool cmdq_full(struct nitrox_cmdq *cmdq, int qlen)
+{
+	if (atomic_inc_return(&cmdq->pending_count) > qlen) {
+		atomic_dec(&cmdq->pending_count);
+		/* sync with other cpus */
+		smp_mb__after_atomic();
+		return true;
+	}
+	return false;
+}
+
+/**
+ * post_se_instr - Post SE instruction to Packet Input ring
+ * @sr: Request structure
+ *
+ * Returns 0 if successful or a negative error code,
+ * if no space in ring.
+ */
+static void post_se_instr(struct nitrox_softreq *sr,
+			  struct nitrox_cmdq *cmdq)
+{
+	struct nitrox_device *ndev = sr->ndev;
+	union nps_pkt_in_instr_baoff_dbell pkt_in_baoff_dbell;
+	u64 offset;
+	u8 *ent;
+
+	spin_lock_bh(&cmdq->cmdq_lock);
+
+	/* get the next write offset */
+	offset = NPS_PKT_IN_INSTR_BAOFF_DBELLX(cmdq->qno);
+	pkt_in_baoff_dbell.value = nitrox_read_csr(ndev, offset);
+	/* copy the instruction */
+	ent = cmdq->head + pkt_in_baoff_dbell.s.aoff;
+	memcpy(ent, &sr->instr, cmdq->instr_size);
+	/* flush the command queue updates */
+	dma_wmb();
+
+	sr->tstamp = jiffies;
+	atomic_set(&sr->status, REQ_POSTED);
+	response_list_add(sr, cmdq);
+
+	/* Ring doorbell with count 1 */
+	writeq(1, cmdq->dbell_csr_addr);
+	/* orders the doorbell rings */
+	mmiowb();
+
+	spin_unlock_bh(&cmdq->cmdq_lock);
+}
+
+static int post_backlog_cmds(struct nitrox_cmdq *cmdq)
+{
+	struct nitrox_device *ndev = cmdq->ndev;
+	struct nitrox_softreq *sr, *tmp;
+	int ret = 0;
+
+	spin_lock_bh(&cmdq->backlog_lock);
+
+	list_for_each_entry_safe(sr, tmp, &cmdq->backlog_head, backlog) {
+		struct skcipher_request *skreq;
+
+		/* submit until space available */
+		if (unlikely(cmdq_full(cmdq, ndev->qlen))) {
+			ret = -EBUSY;
+			break;
+		}
+		/* delete from backlog list */
+		list_del(&sr->backlog);
+		atomic_dec(&cmdq->backlog_count);
+		/* sync with other cpus */
+		smp_mb__after_atomic();
+
+		skreq = sr->skreq;
+		/* post the command */
+		post_se_instr(sr, cmdq);
+
+		/* backlog requests are posted, wakeup with -EINPROGRESS */
+		skcipher_request_complete(skreq, -EINPROGRESS);
+	}
+	spin_unlock_bh(&cmdq->backlog_lock);
+
+	return ret;
+}
+
+static int nitrox_enqueue_request(struct nitrox_softreq *sr)
+{
+	struct nitrox_cmdq *cmdq = sr->cmdq;
+	struct nitrox_device *ndev = sr->ndev;
+	int ret = -EBUSY;
+
+	if (unlikely(cmdq_full(cmdq, ndev->qlen))) {
+		if (!(sr->flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return -EAGAIN;
+
+		backlog_list_add(sr, cmdq);
+	} else {
+		ret = post_backlog_cmds(cmdq);
+		if (ret) {
+			backlog_list_add(sr, cmdq);
+			return ret;
+		}
+		post_se_instr(sr, cmdq);
+		ret = -EINPROGRESS;
+	}
+	return ret;
+}
+
+/**
+ * nitrox_se_request - Send request to SE core
+ * @ndev: NITROX device
+ * @req: Crypto request
+ *
+ * Returns 0 on success, or a negative error code.
+ */
+int nitrox_process_se_request(struct nitrox_device *ndev,
+			      struct se_crypto_request *req,
+			      completion_t callback,
+			      struct skcipher_request *skreq)
+{
+	struct nitrox_softreq *sr;
+	dma_addr_t ctx_handle = 0;
+	int qno, ret = 0;
+
+	if (!nitrox_ready(ndev))
+		return -ENODEV;
+
+	sr = kzalloc(sizeof(*sr), req->gfp);
+	if (!sr)
+		return -ENOMEM;
+
+	sr->ndev = ndev;
+	sr->flags = req->flags;
+	sr->gfp = req->gfp;
+	sr->callback = callback;
+	sr->skreq = skreq;
+
+	atomic_set(&sr->status, REQ_NOT_POSTED);
+
+	WRITE_ONCE(sr->resp.orh, PENDING_SIG);
+	WRITE_ONCE(sr->resp.completion, PENDING_SIG);
+
+	ret = softreq_map_iobuf(sr, req);
+	if (ret) {
+		kfree(sr);
+		return ret;
+	}
+
+	/* get the context handle */
+	if (req->ctx_handle) {
+		struct ctx_hdr *hdr;
+		u8 *ctx_ptr;
+
+		ctx_ptr = (u8 *)(uintptr_t)req->ctx_handle;
+		hdr = (struct ctx_hdr *)(ctx_ptr - sizeof(struct ctx_hdr));
+		ctx_handle = hdr->ctx_dma;
+	}
+
+	/* select the queue */
+	qno = smp_processor_id() % ndev->nr_queues;
+
+	sr->cmdq = &ndev->pkt_cmdqs[qno];
+
+	/*
+	 * 64-Byte Instruction Format
+	 *
+	 *  ----------------------
+	 *  |      DPTR0         | 8 bytes
+	 *  ----------------------
+	 *  |  PKT_IN_INSTR_HDR  | 8 bytes
+	 *  ----------------------
+	 *  |    PKT_IN_HDR      | 16 bytes
+	 *  ----------------------
+	 *  |    SLC_INFO        | 16 bytes
+	 *  ----------------------
+	 *  |   Front data       | 16 bytes
+	 *  ----------------------
+	 */
+
+	/* fill the packet instruction */
+	/* word 0 */
+	sr->instr.dptr0 = cpu_to_be64(sr->in.dma);
+
+	/* word 1 */
+	sr->instr.ih.value = 0;
+	sr->instr.ih.s.g = 1;
+	sr->instr.ih.s.gsz = sr->in.map_bufs_cnt;
+	sr->instr.ih.s.ssz = sr->out.map_bufs_cnt;
+	sr->instr.ih.s.fsz = FDATA_SIZE + sizeof(struct gphdr);
+	sr->instr.ih.s.tlen = sr->instr.ih.s.fsz + sr->in.total_bytes;
+	sr->instr.ih.value = cpu_to_be64(sr->instr.ih.value);
+
+	/* word 2 */
+	sr->instr.irh.value[0] = 0;
+	sr->instr.irh.s.uddl = MIN_UDD_LEN;
+	/* context length in 64-bit words */
+	sr->instr.irh.s.ctxl = (req->ctrl.s.ctxl / 8);
+	/* offset from solicit base port 256 */
+	sr->instr.irh.s.destport = SOLICIT_BASE_DPORT + qno;
+	sr->instr.irh.s.ctxc = req->ctrl.s.ctxc;
+	sr->instr.irh.s.arg = req->ctrl.s.arg;
+	sr->instr.irh.s.opcode = req->opcode;
+	sr->instr.irh.value[0] = cpu_to_be64(sr->instr.irh.value[0]);
+
+	/* word 3 */
+	sr->instr.irh.s.ctxp = cpu_to_be64(ctx_handle);
+
+	/* word 4 */
+	sr->instr.slc.value[0] = 0;
+	sr->instr.slc.s.ssz = sr->out.map_bufs_cnt;
+	sr->instr.slc.value[0] = cpu_to_be64(sr->instr.slc.value[0]);
+
+	/* word 5 */
+	sr->instr.slc.s.rptr = cpu_to_be64(sr->out.dma);
+
+	/*
+	 * No conversion for front data,
+	 * It goes into payload
+	 * put GP Header in front data
+	 */
+	sr->instr.fdata[0] = *((u64 *)&req->gph);
+	sr->instr.fdata[1] = 0;
+	/* flush the soft_req changes before posting the cmd */
+	wmb();
+
+	ret = nitrox_enqueue_request(sr);
+	if (ret == -EAGAIN)
+		goto send_fail;
+
+	return ret;
+
+send_fail:
+	softreq_destroy(sr);
+	return ret;
+}
+
+static inline int cmd_timeout(unsigned long tstamp, unsigned long timeout)
+{
+	return time_after_eq(jiffies, (tstamp + timeout));
+}
+
+void backlog_qflush_work(struct work_struct *work)
+{
+	struct nitrox_cmdq *cmdq;
+
+	cmdq = container_of(work, struct nitrox_cmdq, backlog_qflush);
+	post_backlog_cmds(cmdq);
+}
+
+/**
+ * process_request_list - process completed requests
+ * @ndev: N5 device
+ * @qno: queue to operate
+ *
+ * Returns the number of responses processed.
+ */
+static void process_response_list(struct nitrox_cmdq *cmdq)
+{
+	struct nitrox_device *ndev = cmdq->ndev;
+	struct nitrox_softreq *sr;
+	struct skcipher_request *skreq;
+	completion_t callback;
+	int req_completed = 0, err = 0, budget;
+
+	/* check all pending requests */
+	budget = atomic_read(&cmdq->pending_count);
+
+	while (req_completed < budget) {
+		sr = get_first_response_entry(cmdq);
+		if (!sr)
+			break;
+
+		if (atomic_read(&sr->status) != REQ_POSTED)
+			break;
+
+		/* check orh and completion bytes updates */
+		if (READ_ONCE(sr->resp.orh) == READ_ONCE(sr->resp.completion)) {
+			/* request not completed, check for timeout */
+			if (!cmd_timeout(sr->tstamp, ndev->timeout))
+				break;
+			dev_err_ratelimited(DEV(ndev),
+					    "Request timeout, orh 0x%016llx\n",
+					    READ_ONCE(sr->resp.orh));
+		}
+		atomic_dec(&cmdq->pending_count);
+		/* sync with other cpus */
+		smp_mb__after_atomic();
+		/* remove from response list */
+		response_list_del(sr, cmdq);
+
+		callback = sr->callback;
+		skreq = sr->skreq;
+
+		/* ORH error code */
+		err = READ_ONCE(sr->resp.orh) & 0xff;
+		softreq_destroy(sr);
+
+		if (callback)
+			callback(skreq, err);
+
+		req_completed++;
+	}
+}
+
+/**
+ * pkt_slc_resp_handler - post processing of SE responses
+ */
+void pkt_slc_resp_handler(unsigned long data)
+{
+	struct bh_data *bh = (void *)(uintptr_t)(data);
+	struct nitrox_cmdq *cmdq = bh->cmdq;
+	union nps_pkt_slc_cnts pkt_slc_cnts;
+
+	/* read completion count */
+	pkt_slc_cnts.value = readq(bh->completion_cnt_csr_addr);
+	/* resend the interrupt if more work to do */
+	pkt_slc_cnts.s.resend = 1;
+
+	process_response_list(cmdq);
+
+	/*
+	 * clear the interrupt with resend bit enabled,
+	 * MSI-X interrupt generates if Completion count > Threshold
+	 */
+	writeq(pkt_slc_cnts.value, bh->completion_cnt_csr_addr);
+	/* order the writes */
+	mmiowb();
+
+	if (atomic_read(&cmdq->backlog_count))
+		schedule_work(&cmdq->backlog_qflush);
+}

+ 2 - 1
drivers/crypto/ccp/Makefile

@@ -4,7 +4,8 @@ ccp-objs := ccp-dev.o \
 	    ccp-dev-v3.o \
 	    ccp-dev-v3.o \
 	    ccp-dev-v5.o \
 	    ccp-dev-v5.o \
 	    ccp-platform.o \
 	    ccp-platform.o \
-	    ccp-dmaengine.o
+	    ccp-dmaengine.o \
+	    ccp-debugfs.o
 ccp-$(CONFIG_PCI) += ccp-pci.o
 ccp-$(CONFIG_PCI) += ccp-pci.o
 
 
 obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
 obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o

+ 3 - 2
drivers/crypto/ccp/ccp-crypto-sha.c

@@ -18,6 +18,7 @@
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <crypto/algapi.h>
 #include <crypto/algapi.h>
 #include <crypto/hash.h>
 #include <crypto/hash.h>
+#include <crypto/hmac.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <crypto/sha.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/scatterwalk.h>
@@ -308,8 +309,8 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
 	}
 	}
 
 
 	for (i = 0; i < block_size; i++) {
 	for (i = 0; i < block_size; i++) {
-		ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36;
-		ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ 0x5c;
+		ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ HMAC_IPAD_VALUE;
+		ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ HMAC_OPAD_VALUE;
 	}
 	}
 
 
 	sg_init_one(&ctx->u.sha.opad_sg, ctx->u.sha.opad, block_size);
 	sg_init_one(&ctx->u.sha.opad_sg, ctx->u.sha.opad, block_size);

+ 344 - 0
drivers/crypto/ccp/ccp-debugfs.c

@@ -0,0 +1,344 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+/* DebugFS helpers */
+#define	OBUFP		(obuf + oboff)
+#define	OBUFLEN		512
+#define	OBUFSPC		(OBUFLEN - oboff)
+#define	OSCNPRINTF(fmt, ...) \
+		scnprintf(OBUFP, OBUFSPC, fmt, ## __VA_ARGS__)
+
+#define BUFLEN	63
+
+#define	RI_VERSION_NUM	0x0000003F
+#define	RI_AES_PRESENT	0x00000040
+#define	RI_3DES_PRESENT	0x00000080
+#define	RI_SHA_PRESENT	0x00000100
+#define	RI_RSA_PRESENT	0x00000200
+#define	RI_ECC_PRESENT	0x00000400
+#define	RI_ZDE_PRESENT	0x00000800
+#define	RI_ZCE_PRESENT	0x00001000
+#define	RI_TRNG_PRESENT	0x00002000
+#define	RI_ELFC_PRESENT	0x00004000
+#define	RI_ELFC_SHIFT	14
+#define	RI_NUM_VQM	0x00078000
+#define	RI_NVQM_SHIFT	15
+#define	RI_NVQM(r)	(((r) * RI_NUM_VQM) >> RI_NVQM_SHIFT)
+#define	RI_LSB_ENTRIES	0x0FF80000
+#define	RI_NLSB_SHIFT	19
+#define	RI_NLSB(r)	(((r) * RI_LSB_ENTRIES) >> RI_NLSB_SHIFT)
+
+static ssize_t ccp5_debugfs_info_read(struct file *filp, char __user *ubuf,
+				      size_t count, loff_t *offp)
+{
+	struct ccp_device *ccp = filp->private_data;
+	unsigned int oboff = 0;
+	unsigned int regval;
+	ssize_t ret;
+	char *obuf;
+
+	if (!ccp)
+		return 0;
+
+	obuf = kmalloc(OBUFLEN, GFP_KERNEL);
+	if (!obuf)
+		return -ENOMEM;
+
+	oboff += OSCNPRINTF("Device name: %s\n", ccp->name);
+	oboff += OSCNPRINTF("   RNG name: %s\n", ccp->rngname);
+	oboff += OSCNPRINTF("   # Queues: %d\n", ccp->cmd_q_count);
+	oboff += OSCNPRINTF("     # Cmds: %d\n", ccp->cmd_count);
+
+	regval = ioread32(ccp->io_regs + CMD5_PSP_CCP_VERSION);
+	oboff += OSCNPRINTF("    Version: %d\n", regval & RI_VERSION_NUM);
+	oboff += OSCNPRINTF("    Engines:");
+	if (regval & RI_AES_PRESENT)
+		oboff += OSCNPRINTF(" AES");
+	if (regval & RI_3DES_PRESENT)
+		oboff += OSCNPRINTF(" 3DES");
+	if (regval & RI_SHA_PRESENT)
+		oboff += OSCNPRINTF(" SHA");
+	if (regval & RI_RSA_PRESENT)
+		oboff += OSCNPRINTF(" RSA");
+	if (regval & RI_ECC_PRESENT)
+		oboff += OSCNPRINTF(" ECC");
+	if (regval & RI_ZDE_PRESENT)
+		oboff += OSCNPRINTF(" ZDE");
+	if (regval & RI_ZCE_PRESENT)
+		oboff += OSCNPRINTF(" ZCE");
+	if (regval & RI_TRNG_PRESENT)
+		oboff += OSCNPRINTF(" TRNG");
+	oboff += OSCNPRINTF("\n");
+	oboff += OSCNPRINTF("     Queues: %d\n",
+		   (regval & RI_NUM_VQM) >> RI_NVQM_SHIFT);
+	oboff += OSCNPRINTF("LSB Entries: %d\n",
+		   (regval & RI_LSB_ENTRIES) >> RI_NLSB_SHIFT);
+
+	ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff);
+	kfree(obuf);
+
+	return ret;
+}
+
+/* Return a formatted buffer containing the current
+ * statistics across all queues for a CCP.
+ */
+static ssize_t ccp5_debugfs_stats_read(struct file *filp, char __user *ubuf,
+				       size_t count, loff_t *offp)
+{
+	struct ccp_device *ccp = filp->private_data;
+	unsigned long total_xts_aes_ops = 0;
+	unsigned long total_3des_ops = 0;
+	unsigned long total_aes_ops = 0;
+	unsigned long total_sha_ops = 0;
+	unsigned long total_rsa_ops = 0;
+	unsigned long total_ecc_ops = 0;
+	unsigned long total_pt_ops = 0;
+	unsigned long total_ops = 0;
+	unsigned int oboff = 0;
+	ssize_t ret = 0;
+	unsigned int i;
+	char *obuf;
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
+
+		total_ops += cmd_q->total_ops;
+		total_aes_ops += cmd_q->total_aes_ops;
+		total_xts_aes_ops += cmd_q->total_xts_aes_ops;
+		total_3des_ops += cmd_q->total_3des_ops;
+		total_sha_ops += cmd_q->total_sha_ops;
+		total_rsa_ops += cmd_q->total_rsa_ops;
+		total_pt_ops += cmd_q->total_pt_ops;
+		total_ecc_ops += cmd_q->total_ecc_ops;
+	}
+
+	obuf = kmalloc(OBUFLEN, GFP_KERNEL);
+	if (!obuf)
+		return -ENOMEM;
+
+	oboff += OSCNPRINTF("Total Interrupts Handled: %ld\n",
+			    ccp->total_interrupts);
+	oboff += OSCNPRINTF("        Total Operations: %ld\n",
+			    total_ops);
+	oboff += OSCNPRINTF("                     AES: %ld\n",
+			    total_aes_ops);
+	oboff += OSCNPRINTF("                 XTS AES: %ld\n",
+			    total_xts_aes_ops);
+	oboff += OSCNPRINTF("                     SHA: %ld\n",
+			    total_3des_ops);
+	oboff += OSCNPRINTF("                     SHA: %ld\n",
+			    total_sha_ops);
+	oboff += OSCNPRINTF("                     RSA: %ld\n",
+			    total_rsa_ops);
+	oboff += OSCNPRINTF("               Pass-Thru: %ld\n",
+			    total_pt_ops);
+	oboff += OSCNPRINTF("                     ECC: %ld\n",
+			    total_ecc_ops);
+
+	ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff);
+	kfree(obuf);
+
+	return ret;
+}
+
+/* Reset the counters in a queue
+ */
+static void ccp5_debugfs_reset_queue_stats(struct ccp_cmd_queue *cmd_q)
+{
+	cmd_q->total_ops = 0L;
+	cmd_q->total_aes_ops = 0L;
+	cmd_q->total_xts_aes_ops = 0L;
+	cmd_q->total_3des_ops = 0L;
+	cmd_q->total_sha_ops = 0L;
+	cmd_q->total_rsa_ops = 0L;
+	cmd_q->total_pt_ops = 0L;
+	cmd_q->total_ecc_ops = 0L;
+}
+
+/* A value was written to the stats variable, which
+ * should be used to reset the queue counters across
+ * that device.
+ */
+static ssize_t ccp5_debugfs_stats_write(struct file *filp,
+					const char __user *ubuf,
+					size_t count, loff_t *offp)
+{
+	struct ccp_device *ccp = filp->private_data;
+	int i;
+
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		ccp5_debugfs_reset_queue_stats(&ccp->cmd_q[i]);
+	ccp->total_interrupts = 0L;
+
+	return count;
+}
+
+/* Return a formatted buffer containing the current information
+ * for that queue
+ */
+static ssize_t ccp5_debugfs_queue_read(struct file *filp, char __user *ubuf,
+				       size_t count, loff_t *offp)
+{
+	struct ccp_cmd_queue *cmd_q = filp->private_data;
+	unsigned int oboff = 0;
+	unsigned int regval;
+	ssize_t ret;
+	char *obuf;
+
+	if (!cmd_q)
+		return 0;
+
+	obuf = kmalloc(OBUFLEN, GFP_KERNEL);
+	if (!obuf)
+		return -ENOMEM;
+
+	oboff += OSCNPRINTF("  Total Queue Operations: %ld\n",
+			    cmd_q->total_ops);
+	oboff += OSCNPRINTF("                     AES: %ld\n",
+			    cmd_q->total_aes_ops);
+	oboff += OSCNPRINTF("                 XTS AES: %ld\n",
+			    cmd_q->total_xts_aes_ops);
+	oboff += OSCNPRINTF("                     SHA: %ld\n",
+			    cmd_q->total_3des_ops);
+	oboff += OSCNPRINTF("                     SHA: %ld\n",
+			    cmd_q->total_sha_ops);
+	oboff += OSCNPRINTF("                     RSA: %ld\n",
+			    cmd_q->total_rsa_ops);
+	oboff += OSCNPRINTF("               Pass-Thru: %ld\n",
+			    cmd_q->total_pt_ops);
+	oboff += OSCNPRINTF("                     ECC: %ld\n",
+			    cmd_q->total_ecc_ops);
+
+	regval = ioread32(cmd_q->reg_int_enable);
+	oboff += OSCNPRINTF("      Enabled Interrupts:");
+	if (regval & INT_EMPTY_QUEUE)
+		oboff += OSCNPRINTF(" EMPTY");
+	if (regval & INT_QUEUE_STOPPED)
+		oboff += OSCNPRINTF(" STOPPED");
+	if (regval & INT_ERROR)
+		oboff += OSCNPRINTF(" ERROR");
+	if (regval & INT_COMPLETION)
+		oboff += OSCNPRINTF(" COMPLETION");
+	oboff += OSCNPRINTF("\n");
+
+	ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff);
+	kfree(obuf);
+
+	return ret;
+}
+
+/* A value was written to the stats variable for a
+ * queue. Reset the queue counters to this value.
+ */
+static ssize_t ccp5_debugfs_queue_write(struct file *filp,
+					const char __user *ubuf,
+					size_t count, loff_t *offp)
+{
+	struct ccp_cmd_queue *cmd_q = filp->private_data;
+
+	ccp5_debugfs_reset_queue_stats(cmd_q);
+
+	return count;
+}
+
+static const struct file_operations ccp_debugfs_info_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = ccp5_debugfs_info_read,
+	.write = NULL,
+};
+
+static const struct file_operations ccp_debugfs_queue_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = ccp5_debugfs_queue_read,
+	.write = ccp5_debugfs_queue_write,
+};
+
+static const struct file_operations ccp_debugfs_stats_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = ccp5_debugfs_stats_read,
+	.write = ccp5_debugfs_stats_write,
+};
+
+static struct dentry *ccp_debugfs_dir;
+static DEFINE_RWLOCK(ccp_debugfs_lock);
+
+#define	MAX_NAME_LEN	20
+
+void ccp5_debugfs_setup(struct ccp_device *ccp)
+{
+	struct ccp_cmd_queue *cmd_q;
+	char name[MAX_NAME_LEN + 1];
+	struct dentry *debugfs_info;
+	struct dentry *debugfs_stats;
+	struct dentry *debugfs_q_instance;
+	struct dentry *debugfs_q_stats;
+	unsigned long flags;
+	int i;
+
+	if (!debugfs_initialized())
+		return;
+
+	write_lock_irqsave(&ccp_debugfs_lock, flags);
+	if (!ccp_debugfs_dir)
+		ccp_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	write_unlock_irqrestore(&ccp_debugfs_lock, flags);
+	if (!ccp_debugfs_dir)
+		return;
+
+	ccp->debugfs_instance = debugfs_create_dir(ccp->name, ccp_debugfs_dir);
+	if (!ccp->debugfs_instance)
+		return;
+
+	debugfs_info = debugfs_create_file("info", 0400,
+					   ccp->debugfs_instance, ccp,
+					   &ccp_debugfs_info_ops);
+	if (!debugfs_info)
+		return;
+
+	debugfs_stats = debugfs_create_file("stats", 0600,
+					    ccp->debugfs_instance, ccp,
+					    &ccp_debugfs_stats_ops);
+	if (!debugfs_stats)
+		return;
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		snprintf(name, MAX_NAME_LEN - 1, "q%d", cmd_q->id);
+
+		debugfs_q_instance =
+			debugfs_create_dir(name, ccp->debugfs_instance);
+		if (!debugfs_q_instance)
+			return;
+
+		debugfs_q_stats =
+			debugfs_create_file("stats", 0600,
+					    debugfs_q_instance, cmd_q,
+					    &ccp_debugfs_queue_ops);
+		if (!debugfs_q_stats)
+			return;
+	}
+}
+
+void ccp5_debugfs_destroy(void)
+{
+	debugfs_remove_recursive(ccp_debugfs_dir);
+}

+ 27 - 1
drivers/crypto/ccp/ccp-dev-v5.c

@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/pci.h>
 #include <linux/kthread.h>
 #include <linux/kthread.h>
+#include <linux/debugfs.h>
 #include <linux/dma-mapping.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
 #include <linux/compiler.h>
 #include <linux/compiler.h>
@@ -231,6 +232,8 @@ static int ccp5_do_cmd(struct ccp5_desc *desc,
 	int	i;
 	int	i;
 	int ret = 0;
 	int ret = 0;
 
 
+	cmd_q->total_ops++;
+
 	if (CCP5_CMD_SOC(desc)) {
 	if (CCP5_CMD_SOC(desc)) {
 		CCP5_CMD_IOC(desc) = 1;
 		CCP5_CMD_IOC(desc) = 1;
 		CCP5_CMD_SOC(desc) = 0;
 		CCP5_CMD_SOC(desc) = 0;
@@ -282,6 +285,8 @@ static int ccp5_perform_aes(struct ccp_op *op)
 	union ccp_function function;
 	union ccp_function function;
 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
 
 
+	op->cmd_q->total_aes_ops++;
+
 	/* Zero out all the fields of the command desc */
 	/* Zero out all the fields of the command desc */
 	memset(&desc, 0, Q_DESC_SIZE);
 	memset(&desc, 0, Q_DESC_SIZE);
 
 
@@ -325,6 +330,8 @@ static int ccp5_perform_xts_aes(struct ccp_op *op)
 	union ccp_function function;
 	union ccp_function function;
 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
 
 
+	op->cmd_q->total_xts_aes_ops++;
+
 	/* Zero out all the fields of the command desc */
 	/* Zero out all the fields of the command desc */
 	memset(&desc, 0, Q_DESC_SIZE);
 	memset(&desc, 0, Q_DESC_SIZE);
 
 
@@ -364,6 +371,8 @@ static int ccp5_perform_sha(struct ccp_op *op)
 	struct ccp5_desc desc;
 	struct ccp5_desc desc;
 	union ccp_function function;
 	union ccp_function function;
 
 
+	op->cmd_q->total_sha_ops++;
+
 	/* Zero out all the fields of the command desc */
 	/* Zero out all the fields of the command desc */
 	memset(&desc, 0, Q_DESC_SIZE);
 	memset(&desc, 0, Q_DESC_SIZE);
 
 
@@ -404,6 +413,8 @@ static int ccp5_perform_des3(struct ccp_op *op)
 	union ccp_function function;
 	union ccp_function function;
 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
 
 
+	op->cmd_q->total_3des_ops++;
+
 	/* Zero out all the fields of the command desc */
 	/* Zero out all the fields of the command desc */
 	memset(&desc, 0, sizeof(struct ccp5_desc));
 	memset(&desc, 0, sizeof(struct ccp5_desc));
 
 
@@ -444,6 +455,8 @@ static int ccp5_perform_rsa(struct ccp_op *op)
 	struct ccp5_desc desc;
 	struct ccp5_desc desc;
 	union ccp_function function;
 	union ccp_function function;
 
 
+	op->cmd_q->total_rsa_ops++;
+
 	/* Zero out all the fields of the command desc */
 	/* Zero out all the fields of the command desc */
 	memset(&desc, 0, Q_DESC_SIZE);
 	memset(&desc, 0, Q_DESC_SIZE);
 
 
@@ -487,6 +500,8 @@ static int ccp5_perform_passthru(struct ccp_op *op)
 	struct ccp_dma_info *daddr = &op->dst.u.dma;
 	struct ccp_dma_info *daddr = &op->dst.u.dma;
 
 
 
 
+	op->cmd_q->total_pt_ops++;
+
 	memset(&desc, 0, Q_DESC_SIZE);
 	memset(&desc, 0, Q_DESC_SIZE);
 
 
 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;
 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;
@@ -543,6 +558,8 @@ static int ccp5_perform_ecc(struct ccp_op *op)
 	struct ccp5_desc desc;
 	struct ccp5_desc desc;
 	union ccp_function function;
 	union ccp_function function;
 
 
+	op->cmd_q->total_ecc_ops++;
+
 	/* Zero out all the fields of the command desc */
 	/* Zero out all the fields of the command desc */
 	memset(&desc, 0, Q_DESC_SIZE);
 	memset(&desc, 0, Q_DESC_SIZE);
 
 
@@ -592,7 +609,6 @@ static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status)
 	return queues ? 0 : -EINVAL;
 	return queues ? 0 : -EINVAL;
 }
 }
 
 
-
 static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
 static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
 					int lsb_cnt, int n_lsbs,
 					int lsb_cnt, int n_lsbs,
 					unsigned long *lsb_pub)
 					unsigned long *lsb_pub)
@@ -757,6 +773,7 @@ static irqreturn_t ccp5_irq_handler(int irq, void *data)
 	struct ccp_device *ccp = dev_get_drvdata(dev);
 	struct ccp_device *ccp = dev_get_drvdata(dev);
 
 
 	ccp5_disable_queue_interrupts(ccp);
 	ccp5_disable_queue_interrupts(ccp);
+	ccp->total_interrupts++;
 	if (ccp->use_tasklet)
 	if (ccp->use_tasklet)
 		tasklet_schedule(&ccp->irq_tasklet);
 		tasklet_schedule(&ccp->irq_tasklet);
 	else
 	else
@@ -956,6 +973,9 @@ static int ccp5_init(struct ccp_device *ccp)
 	if (ret)
 	if (ret)
 		goto e_hwrng;
 		goto e_hwrng;
 
 
+	/* Set up debugfs entries */
+	ccp5_debugfs_setup(ccp);
+
 	return 0;
 	return 0;
 
 
 e_hwrng:
 e_hwrng:
@@ -992,6 +1012,12 @@ static void ccp5_destroy(struct ccp_device *ccp)
 	/* Remove this device from the list of available units first */
 	/* Remove this device from the list of available units first */
 	ccp_del_device(ccp);
 	ccp_del_device(ccp);
 
 
+	/* We're in the process of tearing down the entire driver;
+	 * when all the devices are gone clean up debugfs
+	 */
+	if (ccp_present())
+		ccp5_debugfs_destroy();
+
 	/* Disable and clear interrupts */
 	/* Disable and clear interrupts */
 	ccp5_disable_queue_interrupts(ccp);
 	ccp5_disable_queue_interrupts(ccp);
 	for (i = 0; i < ccp->cmd_q_count; i++) {
 	for (i = 0; i < ccp->cmd_q_count; i++) {

+ 2 - 1
drivers/crypto/ccp/ccp-dev.c

@@ -31,8 +31,9 @@
 #include "ccp-dev.h"
 #include "ccp-dev.h"
 
 
 MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
 MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
+MODULE_AUTHOR("Gary R Hook <gary.hook@amd.com>");
 MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");
-MODULE_VERSION("1.0.0");
+MODULE_VERSION("1.1.0");
 MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver");
 MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver");
 
 
 struct ccp_tasklet_data {
 struct ccp_tasklet_data {

+ 20 - 0
drivers/crypto/ccp/ccp-dev.h

@@ -70,6 +70,7 @@
 #define LSB_PUBLIC_MASK_HI_OFFSET	0x1C
 #define LSB_PUBLIC_MASK_HI_OFFSET	0x1C
 #define LSB_PRIVATE_MASK_LO_OFFSET	0x20
 #define LSB_PRIVATE_MASK_LO_OFFSET	0x20
 #define LSB_PRIVATE_MASK_HI_OFFSET	0x24
 #define LSB_PRIVATE_MASK_HI_OFFSET	0x24
+#define CMD5_PSP_CCP_VERSION		0x100
 
 
 #define CMD5_Q_CONTROL_BASE		0x0000
 #define CMD5_Q_CONTROL_BASE		0x0000
 #define CMD5_Q_TAIL_LO_BASE		0x0004
 #define CMD5_Q_TAIL_LO_BASE		0x0004
@@ -322,6 +323,16 @@ struct ccp_cmd_queue {
 	/* Interrupt wait queue */
 	/* Interrupt wait queue */
 	wait_queue_head_t int_queue;
 	wait_queue_head_t int_queue;
 	unsigned int int_rcvd;
 	unsigned int int_rcvd;
+
+	/* Per-queue Statistics */
+	unsigned long total_ops;
+	unsigned long total_aes_ops;
+	unsigned long total_xts_aes_ops;
+	unsigned long total_3des_ops;
+	unsigned long total_sha_ops;
+	unsigned long total_rsa_ops;
+	unsigned long total_pt_ops;
+	unsigned long total_ecc_ops;
 } ____cacheline_aligned;
 } ____cacheline_aligned;
 
 
 struct ccp_device {
 struct ccp_device {
@@ -419,6 +430,12 @@ struct ccp_device {
 
 
 	/* DMA caching attribute support */
 	/* DMA caching attribute support */
 	unsigned int axcache;
 	unsigned int axcache;
+
+	/* Device Statistics */
+	unsigned long total_interrupts;
+
+	/* DebugFS info */
+	struct dentry *debugfs_instance;
 };
 };
 
 
 enum ccp_memtype {
 enum ccp_memtype {
@@ -632,6 +649,9 @@ void ccp_unregister_rng(struct ccp_device *ccp);
 int ccp_dmaengine_register(struct ccp_device *ccp);
 int ccp_dmaengine_register(struct ccp_device *ccp);
 void ccp_dmaengine_unregister(struct ccp_device *ccp);
 void ccp_dmaengine_unregister(struct ccp_device *ccp);
 
 
+void ccp5_debugfs_setup(struct ccp_device *ccp);
+void ccp5_debugfs_destroy(void);
+
 /* Structure for computation functions that are device-specific */
 /* Structure for computation functions that are device-specific */
 struct ccp_actions {
 struct ccp_actions {
 	int (*aes)(struct ccp_op *);
 	int (*aes)(struct ccp_op *);

+ 2 - 2
drivers/crypto/ccp/ccp-platform.c

@@ -44,7 +44,7 @@ static struct ccp_vdata *ccp_get_of_version(struct platform_device *pdev)
 	if (match && match->data)
 	if (match && match->data)
 		return (struct ccp_vdata *)match->data;
 		return (struct ccp_vdata *)match->data;
 #endif
 #endif
-	return 0;
+	return NULL;
 }
 }
 
 
 static struct ccp_vdata *ccp_get_acpi_version(struct platform_device *pdev)
 static struct ccp_vdata *ccp_get_acpi_version(struct platform_device *pdev)
@@ -56,7 +56,7 @@ static struct ccp_vdata *ccp_get_acpi_version(struct platform_device *pdev)
 	if (match && match->driver_data)
 	if (match && match->driver_data)
 		return (struct ccp_vdata *)match->driver_data;
 		return (struct ccp_vdata *)match->driver_data;
 #endif
 #endif
-	return 0;
+	return NULL;
 }
 }
 
 
 static int ccp_get_irq(struct ccp_device *ccp)
 static int ccp_get_irq(struct ccp_device *ccp)

+ 874 - 222
drivers/crypto/chelsio/chcr_algo.c

@@ -55,6 +55,8 @@
 #include <crypto/hash.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
 #include <crypto/sha.h>
 #include <crypto/authenc.h>
 #include <crypto/authenc.h>
+#include <crypto/ctr.h>
+#include <crypto/gf128mul.h>
 #include <crypto/internal/aead.h>
 #include <crypto/internal/aead.h>
 #include <crypto/null.h>
 #include <crypto/null.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/skcipher.h>
@@ -126,13 +128,13 @@ static void chcr_verify_tag(struct aead_request *req, u8 *input, int *err)
 	fw6_pld = (struct cpl_fw6_pld *)input;
 	fw6_pld = (struct cpl_fw6_pld *)input;
 	if ((get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) ||
 	if ((get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) ||
 	    (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_GCM)) {
 	    (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_GCM)) {
-		cmp = memcmp(&fw6_pld->data[2], (fw6_pld + 1), authsize);
+		cmp = crypto_memneq(&fw6_pld->data[2], (fw6_pld + 1), authsize);
 	} else {
 	} else {
 
 
 		sg_pcopy_to_buffer(req->src, sg_nents(req->src), temp,
 		sg_pcopy_to_buffer(req->src, sg_nents(req->src), temp,
 				authsize, req->assoclen +
 				authsize, req->assoclen +
 				req->cryptlen - authsize);
 				req->cryptlen - authsize);
-		cmp = memcmp(temp, (fw6_pld + 1), authsize);
+		cmp = crypto_memneq(temp, (fw6_pld + 1), authsize);
 	}
 	}
 	if (cmp)
 	if (cmp)
 		*err = -EBADMSG;
 		*err = -EBADMSG;
@@ -151,12 +153,12 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
 	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct chcr_req_ctx ctx_req;
 	struct chcr_req_ctx ctx_req;
-	struct cpl_fw6_pld *fw6_pld;
 	unsigned int digestsize, updated_digestsize;
 	unsigned int digestsize, updated_digestsize;
+	struct adapter *adap = padap(ctx->dev);
 
 
 	switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
 	switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_AEAD:
 	case CRYPTO_ALG_TYPE_AEAD:
-		ctx_req.req.aead_req = (struct aead_request *)req;
+		ctx_req.req.aead_req = aead_request_cast(req);
 		ctx_req.ctx.reqctx = aead_request_ctx(ctx_req.req.aead_req);
 		ctx_req.ctx.reqctx = aead_request_ctx(ctx_req.req.aead_req);
 		dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.ctx.reqctx->dst,
 		dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.ctx.reqctx->dst,
 			     ctx_req.ctx.reqctx->dst_nents, DMA_FROM_DEVICE);
 			     ctx_req.ctx.reqctx->dst_nents, DMA_FROM_DEVICE);
@@ -164,32 +166,23 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 			kfree_skb(ctx_req.ctx.reqctx->skb);
 			kfree_skb(ctx_req.ctx.reqctx->skb);
 			ctx_req.ctx.reqctx->skb = NULL;
 			ctx_req.ctx.reqctx->skb = NULL;
 		}
 		}
+		free_new_sg(ctx_req.ctx.reqctx->newdstsg);
+		ctx_req.ctx.reqctx->newdstsg = NULL;
 		if (ctx_req.ctx.reqctx->verify == VERIFY_SW) {
 		if (ctx_req.ctx.reqctx->verify == VERIFY_SW) {
 			chcr_verify_tag(ctx_req.req.aead_req, input,
 			chcr_verify_tag(ctx_req.req.aead_req, input,
 					&err);
 					&err);
 			ctx_req.ctx.reqctx->verify = VERIFY_HW;
 			ctx_req.ctx.reqctx->verify = VERIFY_HW;
 		}
 		}
+		ctx_req.req.aead_req->base.complete(req, err);
 		break;
 		break;
 
 
 	case CRYPTO_ALG_TYPE_ABLKCIPHER:
 	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		ctx_req.req.ablk_req = (struct ablkcipher_request *)req;
-		ctx_req.ctx.ablk_ctx =
-			ablkcipher_request_ctx(ctx_req.req.ablk_req);
-		if (!err) {
-			fw6_pld = (struct cpl_fw6_pld *)input;
-			memcpy(ctx_req.req.ablk_req->info, &fw6_pld->data[2],
-			       AES_BLOCK_SIZE);
-		}
-		dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.req.ablk_req->dst,
-			     ctx_req.ctx.ablk_ctx->dst_nents, DMA_FROM_DEVICE);
-		if (ctx_req.ctx.ablk_ctx->skb) {
-			kfree_skb(ctx_req.ctx.ablk_ctx->skb);
-			ctx_req.ctx.ablk_ctx->skb = NULL;
-		}
+		 err = chcr_handle_cipher_resp(ablkcipher_request_cast(req),
+					       input, err);
 		break;
 		break;
 
 
 	case CRYPTO_ALG_TYPE_AHASH:
 	case CRYPTO_ALG_TYPE_AHASH:
-		ctx_req.req.ahash_req = (struct ahash_request *)req;
+		ctx_req.req.ahash_req = ahash_request_cast(req);
 		ctx_req.ctx.ahash_ctx =
 		ctx_req.ctx.ahash_ctx =
 			ahash_request_ctx(ctx_req.req.ahash_req);
 			ahash_request_ctx(ctx_req.req.ahash_req);
 		digestsize =
 		digestsize =
@@ -214,8 +207,10 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 			       sizeof(struct cpl_fw6_pld),
 			       sizeof(struct cpl_fw6_pld),
 			       updated_digestsize);
 			       updated_digestsize);
 		}
 		}
+		ctx_req.req.ahash_req->base.complete(req, err);
 		break;
 		break;
 	}
 	}
+	atomic_inc(&adap->chcr_stats.complete);
 	return err;
 	return err;
 }
 }
 
 
@@ -392,7 +387,7 @@ static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl,
 			   struct phys_sge_parm *sg_param)
 			   struct phys_sge_parm *sg_param)
 {
 {
 	struct phys_sge_pairs *to;
 	struct phys_sge_pairs *to;
-	int out_buf_size = sg_param->obsize;
+	unsigned int len = 0, left_size = sg_param->obsize;
 	unsigned int nents = sg_param->nents, i, j = 0;
 	unsigned int nents = sg_param->nents, i, j = 0;
 
 
 	phys_cpl->op_to_tid = htonl(CPL_RX_PHYS_DSGL_OPCODE_V(CPL_RX_PHYS_DSGL)
 	phys_cpl->op_to_tid = htonl(CPL_RX_PHYS_DSGL_OPCODE_V(CPL_RX_PHYS_DSGL)
@@ -409,20 +404,15 @@ static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl,
 	phys_cpl->rss_hdr_int.hash_val = 0;
 	phys_cpl->rss_hdr_int.hash_val = 0;
 	to = (struct phys_sge_pairs *)((unsigned char *)phys_cpl +
 	to = (struct phys_sge_pairs *)((unsigned char *)phys_cpl +
 				       sizeof(struct cpl_rx_phys_dsgl));
 				       sizeof(struct cpl_rx_phys_dsgl));
-
-	for (i = 0; nents; to++) {
-		for (j = 0; j < 8 && nents; j++, nents--) {
-			out_buf_size -= sg_dma_len(sg);
-			to->len[j] = htons(sg_dma_len(sg));
+	for (i = 0; nents && left_size; to++) {
+		for (j = 0; j < 8 && nents && left_size; j++, nents--) {
+			len = min(left_size, sg_dma_len(sg));
+			to->len[j] = htons(len);
 			to->addr[j] = cpu_to_be64(sg_dma_address(sg));
 			to->addr[j] = cpu_to_be64(sg_dma_address(sg));
+			left_size -= len;
 			sg = sg_next(sg);
 			sg = sg_next(sg);
 		}
 		}
 	}
 	}
-	if (out_buf_size) {
-		j--;
-		to--;
-		to->len[j] = htons(ntohs(to->len[j]) + (out_buf_size));
-	}
 }
 }
 
 
 static inline int map_writesg_phys_cpl(struct device *dev,
 static inline int map_writesg_phys_cpl(struct device *dev,
@@ -431,7 +421,7 @@ static inline int map_writesg_phys_cpl(struct device *dev,
 					struct phys_sge_parm *sg_param)
 					struct phys_sge_parm *sg_param)
 {
 {
 	if (!sg || !sg_param->nents)
 	if (!sg || !sg_param->nents)
-		return 0;
+		return -EINVAL;
 
 
 	sg_param->nents = dma_map_sg(dev, sg, sg_param->nents, DMA_FROM_DEVICE);
 	sg_param->nents = dma_map_sg(dev, sg, sg_param->nents, DMA_FROM_DEVICE);
 	if (sg_param->nents == 0) {
 	if (sg_param->nents == 0) {
@@ -498,6 +488,24 @@ write_sg_to_skb(struct sk_buff *skb, unsigned int *frags,
 	}
 	}
 }
 }
 
 
+static int cxgb4_is_crypto_q_full(struct net_device *dev, unsigned int idx)
+{
+	struct adapter *adap = netdev2adap(dev);
+	struct sge_uld_txq_info *txq_info =
+		adap->sge.uld_txq_info[CXGB4_TX_CRYPTO];
+	struct sge_uld_txq *txq;
+	int ret = 0;
+
+	local_bh_disable();
+	txq = &txq_info->uldtxq[idx];
+	spin_lock(&txq->sendq.lock);
+	if (txq->full)
+		ret = -1;
+	spin_unlock(&txq->sendq.lock);
+	local_bh_enable();
+	return ret;
+}
+
 static int generate_copy_rrkey(struct ablk_ctx *ablkctx,
 static int generate_copy_rrkey(struct ablk_ctx *ablkctx,
 			       struct _key_ctx *key_ctx)
 			       struct _key_ctx *key_ctx)
 {
 {
@@ -512,13 +520,67 @@ static int generate_copy_rrkey(struct ablk_ctx *ablkctx,
 	}
 	}
 	return 0;
 	return 0;
 }
 }
+static int chcr_sg_ent_in_wr(struct scatterlist *src,
+			     struct scatterlist *dst,
+			     unsigned int minsg,
+			     unsigned int space,
+			     short int *sent,
+			     short int *dent)
+{
+	int srclen = 0, dstlen = 0;
+	int srcsg = minsg, dstsg = 0;
+
+	*sent = 0;
+	*dent = 0;
+	while (src && dst && ((srcsg + 1) <= MAX_SKB_FRAGS) &&
+	       space > (sgl_ent_len[srcsg + 1] + dsgl_ent_len[dstsg])) {
+		srclen += src->length;
+		srcsg++;
+		while (dst && ((dstsg + 1) <= MAX_DSGL_ENT) &&
+		       space > (sgl_ent_len[srcsg] + dsgl_ent_len[dstsg + 1])) {
+			if (srclen <= dstlen)
+				break;
+			dstlen += dst->length;
+			dst = sg_next(dst);
+			dstsg++;
+		}
+		src = sg_next(src);
+	}
+	*sent = srcsg - minsg;
+	*dent = dstsg;
+	return min(srclen, dstlen);
+}
+
+static int chcr_cipher_fallback(struct crypto_skcipher *cipher,
+				u32 flags,
+				struct scatterlist *src,
+				struct scatterlist *dst,
+				unsigned int nbytes,
+				u8 *iv,
+				unsigned short op_type)
+{
+	int err;
 
 
+	SKCIPHER_REQUEST_ON_STACK(subreq, cipher);
+	skcipher_request_set_tfm(subreq, cipher);
+	skcipher_request_set_callback(subreq, flags, NULL, NULL);
+	skcipher_request_set_crypt(subreq, src, dst,
+				   nbytes, iv);
+
+	err = op_type ? crypto_skcipher_decrypt(subreq) :
+		crypto_skcipher_encrypt(subreq);
+	skcipher_request_zero(subreq);
+
+	return err;
+
+}
 static inline void create_wreq(struct chcr_context *ctx,
 static inline void create_wreq(struct chcr_context *ctx,
 			       struct chcr_wr *chcr_req,
 			       struct chcr_wr *chcr_req,
 			       void *req, struct sk_buff *skb,
 			       void *req, struct sk_buff *skb,
 			       int kctx_len, int hash_sz,
 			       int kctx_len, int hash_sz,
 			       int is_iv,
 			       int is_iv,
-			       unsigned int sc_len)
+			       unsigned int sc_len,
+			       unsigned int lcb)
 {
 {
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	int iv_loc = IV_DSGL;
 	int iv_loc = IV_DSGL;
@@ -543,7 +605,8 @@ static inline void create_wreq(struct chcr_context *ctx,
 	chcr_req->wreq.cookie = cpu_to_be64((uintptr_t)req);
 	chcr_req->wreq.cookie = cpu_to_be64((uintptr_t)req);
 	chcr_req->wreq.rx_chid_to_rx_q_id =
 	chcr_req->wreq.rx_chid_to_rx_q_id =
 		FILL_WR_RX_Q_ID(ctx->dev->rx_channel_id, qid,
 		FILL_WR_RX_Q_ID(ctx->dev->rx_channel_id, qid,
-				is_iv ? iv_loc : IV_NOP, ctx->tx_qidx);
+				is_iv ? iv_loc : IV_NOP, !!lcb,
+				ctx->tx_qidx);
 
 
 	chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id,
 	chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id,
 						       qid);
 						       qid);
@@ -563,69 +626,62 @@ static inline void create_wreq(struct chcr_context *ctx,
  *	@qid: ingress qid where response of this WR should be received.
  *	@qid: ingress qid where response of this WR should be received.
  *	@op_type:	encryption or decryption
  *	@op_type:	encryption or decryption
  */
  */
-static struct sk_buff
-*create_cipher_wr(struct ablkcipher_request *req,
-		  unsigned short qid,
-		  unsigned short op_type)
+static struct sk_buff *create_cipher_wr(struct cipher_wr_param *wrparam)
 {
 {
-	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(wrparam->req);
 	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
 	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 	struct sk_buff *skb = NULL;
 	struct sk_buff *skb = NULL;
 	struct chcr_wr *chcr_req;
 	struct chcr_wr *chcr_req;
 	struct cpl_rx_phys_dsgl *phys_cpl;
 	struct cpl_rx_phys_dsgl *phys_cpl;
-	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	struct chcr_blkcipher_req_ctx *reqctx =
+		ablkcipher_request_ctx(wrparam->req);
 	struct phys_sge_parm sg_param;
 	struct phys_sge_parm sg_param;
 	unsigned int frags = 0, transhdr_len, phys_dsgl;
 	unsigned int frags = 0, transhdr_len, phys_dsgl;
-	unsigned int ivsize = crypto_ablkcipher_ivsize(tfm), kctx_len;
-	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
-			GFP_ATOMIC;
-
-	if (!req->info)
-		return ERR_PTR(-EINVAL);
-	reqctx->dst_nents = sg_nents_for_len(req->dst, req->nbytes);
-	if (reqctx->dst_nents <= 0) {
-		pr_err("AES:Invalid Destination sg lists\n");
-		return ERR_PTR(-EINVAL);
-	}
-	if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) ||
-	    (req->nbytes <= 0) || (req->nbytes % AES_BLOCK_SIZE)) {
-		pr_err("AES: Invalid value of Key Len %d nbytes %d IV Len %d\n",
-		       ablkctx->enckey_len, req->nbytes, ivsize);
-		return ERR_PTR(-EINVAL);
-	}
+	int error;
+	unsigned int ivsize = AES_BLOCK_SIZE, kctx_len;
+	gfp_t flags = wrparam->req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+			GFP_KERNEL : GFP_ATOMIC;
+	struct adapter *adap = padap(ctx->dev);
 
 
 	phys_dsgl = get_space_for_phys_dsgl(reqctx->dst_nents);
 	phys_dsgl = get_space_for_phys_dsgl(reqctx->dst_nents);
 
 
 	kctx_len = (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16);
 	kctx_len = (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16);
 	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, phys_dsgl);
 	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, phys_dsgl);
 	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
 	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
-	if (!skb)
-		return ERR_PTR(-ENOMEM);
+	if (!skb) {
+		error = -ENOMEM;
+		goto err;
+	}
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
 	chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len);
 	chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len);
 	memset(chcr_req, 0, transhdr_len);
 	memset(chcr_req, 0, transhdr_len);
 	chcr_req->sec_cpl.op_ivinsrtofst =
 	chcr_req->sec_cpl.op_ivinsrtofst =
 		FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2, 1);
 		FILL_SEC_CPL_OP_IVINSR(ctx->dev->rx_channel_id, 2, 1);
 
 
-	chcr_req->sec_cpl.pldlen = htonl(ivsize + req->nbytes);
+	chcr_req->sec_cpl.pldlen = htonl(ivsize + wrparam->bytes);
 	chcr_req->sec_cpl.aadstart_cipherstop_hi =
 	chcr_req->sec_cpl.aadstart_cipherstop_hi =
 			FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, ivsize + 1, 0);
 			FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, ivsize + 1, 0);
 
 
 	chcr_req->sec_cpl.cipherstop_lo_authinsert =
 	chcr_req->sec_cpl.cipherstop_lo_authinsert =
 			FILL_SEC_CPL_AUTHINSERT(0, 0, 0, 0);
 			FILL_SEC_CPL_AUTHINSERT(0, 0, 0, 0);
-	chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type, 0,
+	chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(reqctx->op, 0,
 							 ablkctx->ciph_mode,
 							 ablkctx->ciph_mode,
 							 0, 0, ivsize >> 1);
 							 0, 0, ivsize >> 1);
 	chcr_req->sec_cpl.ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 0,
 	chcr_req->sec_cpl.ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 0,
 							  0, 1, phys_dsgl);
 							  0, 1, phys_dsgl);
 
 
 	chcr_req->key_ctx.ctx_hdr = ablkctx->key_ctx_hdr;
 	chcr_req->key_ctx.ctx_hdr = ablkctx->key_ctx_hdr;
-	if (op_type == CHCR_DECRYPT_OP) {
+	if ((reqctx->op == CHCR_DECRYPT_OP) &&
+	    (!(get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+	       CRYPTO_ALG_SUB_TYPE_CTR)) &&
+	    (!(get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+	       CRYPTO_ALG_SUB_TYPE_CTR_RFC3686))) {
 		generate_copy_rrkey(ablkctx, &chcr_req->key_ctx);
 		generate_copy_rrkey(ablkctx, &chcr_req->key_ctx);
 	} else {
 	} else {
-		if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) {
+		if ((ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) ||
+		    (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CTR)) {
 			memcpy(chcr_req->key_ctx.key, ablkctx->key,
 			memcpy(chcr_req->key_ctx.key, ablkctx->key,
 			       ablkctx->enckey_len);
 			       ablkctx->enckey_len);
 		} else {
 		} else {
@@ -640,45 +696,80 @@ static struct sk_buff
 	}
 	}
 	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
 	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
 	sg_param.nents = reqctx->dst_nents;
 	sg_param.nents = reqctx->dst_nents;
-	sg_param.obsize = req->nbytes;
-	sg_param.qid = qid;
-	sg_param.align = 1;
-	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, req->dst,
-				 &sg_param))
+	sg_param.obsize =  wrparam->bytes;
+	sg_param.qid = wrparam->qid;
+	error = map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl,
+				       reqctx->dst, &sg_param);
+	if (error)
 		goto map_fail1;
 		goto map_fail1;
 
 
 	skb_set_transport_header(skb, transhdr_len);
 	skb_set_transport_header(skb, transhdr_len);
-	memcpy(reqctx->iv, req->info, ivsize);
 	write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize);
 	write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize);
-	write_sg_to_skb(skb, &frags, req->src, req->nbytes);
-	create_wreq(ctx, chcr_req, req, skb, kctx_len, 0, 1,
-			sizeof(struct cpl_rx_phys_dsgl) + phys_dsgl);
+	write_sg_to_skb(skb, &frags, wrparam->srcsg, wrparam->bytes);
+	atomic_inc(&adap->chcr_stats.cipher_rqst);
+	create_wreq(ctx, chcr_req, &(wrparam->req->base), skb, kctx_len, 0, 1,
+			sizeof(struct cpl_rx_phys_dsgl) + phys_dsgl,
+			ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC);
 	reqctx->skb = skb;
 	reqctx->skb = skb;
 	skb_get(skb);
 	skb_get(skb);
 	return skb;
 	return skb;
 map_fail1:
 map_fail1:
 	kfree_skb(skb);
 	kfree_skb(skb);
-	return ERR_PTR(-ENOMEM);
+err:
+	return ERR_PTR(error);
 }
 }
 
 
-static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static inline int chcr_keyctx_ck_size(unsigned int keylen)
+{
+	int ck_size = 0;
+
+	if (keylen == AES_KEYSIZE_128)
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+	else if (keylen == AES_KEYSIZE_192)
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+	else if (keylen == AES_KEYSIZE_256)
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+	else
+		ck_size = 0;
+
+	return ck_size;
+}
+static int chcr_cipher_fallback_setkey(struct crypto_ablkcipher *cipher,
+				       const u8 *key,
+				       unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(cipher);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+	int err = 0;
+
+	crypto_skcipher_clear_flags(ablkctx->sw_cipher, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(ablkctx->sw_cipher, cipher->base.crt_flags &
+				  CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(ablkctx->sw_cipher, key, keylen);
+	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+	tfm->crt_flags |=
+		crypto_skcipher_get_flags(ablkctx->sw_cipher) &
+		CRYPTO_TFM_RES_MASK;
+	return err;
+}
+
+static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *cipher,
+			       const u8 *key,
 			       unsigned int keylen)
 			       unsigned int keylen)
 {
 {
-	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(cipher);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 	unsigned int ck_size, context_size;
 	unsigned int ck_size, context_size;
 	u16 alignment = 0;
 	u16 alignment = 0;
+	int err;
 
 
-	if (keylen == AES_KEYSIZE_128) {
-		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
-	} else if (keylen == AES_KEYSIZE_192) {
-		alignment = 8;
-		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
-	} else if (keylen == AES_KEYSIZE_256) {
-		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
-	} else {
+	err = chcr_cipher_fallback_setkey(cipher, key, keylen);
+	if (err)
 		goto badkey_err;
 		goto badkey_err;
-	}
+
+	ck_size = chcr_keyctx_ck_size(keylen);
+	alignment = ck_size == CHCR_KEYCTX_CIPHER_KEY_SIZE_192 ? 8 : 0;
 	memcpy(ablkctx->key, key, keylen);
 	memcpy(ablkctx->key, key, keylen);
 	ablkctx->enckey_len = keylen;
 	ablkctx->enckey_len = keylen;
 	get_aes_decrypt_key(ablkctx->rrkey, ablkctx->key, keylen << 3);
 	get_aes_decrypt_key(ablkctx->rrkey, ablkctx->key, keylen << 3);
@@ -690,35 +781,398 @@ static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_CBC;
 	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_CBC;
 	return 0;
 	return 0;
 badkey_err:
 badkey_err:
-	crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	ablkctx->enckey_len = 0;
 	ablkctx->enckey_len = 0;
-	return -EINVAL;
+
+	return err;
 }
 }
 
 
-static int cxgb4_is_crypto_q_full(struct net_device *dev, unsigned int idx)
+static int chcr_aes_ctr_setkey(struct crypto_ablkcipher *cipher,
+				   const u8 *key,
+				   unsigned int keylen)
 {
 {
-	struct adapter *adap = netdev2adap(dev);
-	struct sge_uld_txq_info *txq_info =
-		adap->sge.uld_txq_info[CXGB4_TX_CRYPTO];
-	struct sge_uld_txq *txq;
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(cipher);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+	unsigned int ck_size, context_size;
+	u16 alignment = 0;
+	int err;
+
+	err = chcr_cipher_fallback_setkey(cipher, key, keylen);
+	if (err)
+		goto badkey_err;
+	ck_size = chcr_keyctx_ck_size(keylen);
+	alignment = (ck_size == CHCR_KEYCTX_CIPHER_KEY_SIZE_192) ? 8 : 0;
+	memcpy(ablkctx->key, key, keylen);
+	ablkctx->enckey_len = keylen;
+	context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD +
+			keylen + alignment) >> 4;
+
+	ablkctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, CHCR_KEYCTX_NO_KEY,
+						0, 0, context_size);
+	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_CTR;
+
+	return 0;
+badkey_err:
+	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	ablkctx->enckey_len = 0;
+
+	return err;
+}
+
+static int chcr_aes_rfc3686_setkey(struct crypto_ablkcipher *cipher,
+				   const u8 *key,
+				   unsigned int keylen)
+{
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(cipher);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+	unsigned int ck_size, context_size;
+	u16 alignment = 0;
+	int err;
+
+	if (keylen < CTR_RFC3686_NONCE_SIZE)
+		return -EINVAL;
+	memcpy(ablkctx->nonce, key + (keylen - CTR_RFC3686_NONCE_SIZE),
+	       CTR_RFC3686_NONCE_SIZE);
+
+	keylen -= CTR_RFC3686_NONCE_SIZE;
+	err = chcr_cipher_fallback_setkey(cipher, key, keylen);
+	if (err)
+		goto badkey_err;
+
+	ck_size = chcr_keyctx_ck_size(keylen);
+	alignment = (ck_size == CHCR_KEYCTX_CIPHER_KEY_SIZE_192) ? 8 : 0;
+	memcpy(ablkctx->key, key, keylen);
+	ablkctx->enckey_len = keylen;
+	context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD +
+			keylen + alignment) >> 4;
+
+	ablkctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, CHCR_KEYCTX_NO_KEY,
+						0, 0, context_size);
+	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_CTR;
+
+	return 0;
+badkey_err:
+	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	ablkctx->enckey_len = 0;
+
+	return err;
+}
+static void ctr_add_iv(u8 *dstiv, u8 *srciv, u32 add)
+{
+	unsigned int size = AES_BLOCK_SIZE;
+	__be32 *b = (__be32 *)(dstiv + size);
+	u32 c, prev;
+
+	memcpy(dstiv, srciv, AES_BLOCK_SIZE);
+	for (; size >= 4; size -= 4) {
+		prev = be32_to_cpu(*--b);
+		c = prev + add;
+		*b = cpu_to_be32(c);
+		if (prev < c)
+			break;
+		add = 1;
+	}
+
+}
+
+static unsigned int adjust_ctr_overflow(u8 *iv, u32 bytes)
+{
+	__be32 *b = (__be32 *)(iv + AES_BLOCK_SIZE);
+	u64 c;
+	u32 temp = be32_to_cpu(*--b);
+
+	temp = ~temp;
+	c = (u64)temp +  1; // No of block can processed withou overflow
+	if ((bytes / AES_BLOCK_SIZE) > c)
+		bytes = c * AES_BLOCK_SIZE;
+	return bytes;
+}
+
+static int chcr_update_tweak(struct ablkcipher_request *req, u8 *iv)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	struct crypto_cipher *cipher;
+	int ret, i;
+	u8 *key;
+	unsigned int keylen;
+
+	cipher = crypto_alloc_cipher("aes-generic", 0, 0);
+	memcpy(iv, req->info, AES_BLOCK_SIZE);
+
+	if (IS_ERR(cipher)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	keylen = ablkctx->enckey_len / 2;
+	key = ablkctx->key + keylen;
+	ret = crypto_cipher_setkey(cipher, key, keylen);
+	if (ret)
+		goto out1;
+
+	crypto_cipher_encrypt_one(cipher, iv, iv);
+	for (i = 0; i < (reqctx->processed / AES_BLOCK_SIZE); i++)
+		gf128mul_x_ble((le128 *)iv, (le128 *)iv);
+
+	crypto_cipher_decrypt_one(cipher, iv, iv);
+out1:
+	crypto_free_cipher(cipher);
+out:
+	return ret;
+}
+
+static int chcr_update_cipher_iv(struct ablkcipher_request *req,
+				   struct cpl_fw6_pld *fw6_pld, u8 *iv)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	int subtype = get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm));
 	int ret = 0;
 	int ret = 0;
 
 
-	local_bh_disable();
-	txq = &txq_info->uldtxq[idx];
-	spin_lock(&txq->sendq.lock);
-	if (txq->full)
-		ret = -1;
-	spin_unlock(&txq->sendq.lock);
-	local_bh_enable();
+	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR)
+		ctr_add_iv(iv, req->info, (reqctx->processed /
+			   AES_BLOCK_SIZE));
+	else if (subtype == CRYPTO_ALG_SUB_TYPE_CTR_RFC3686)
+		*(__be32 *)(reqctx->iv + CTR_RFC3686_NONCE_SIZE +
+			CTR_RFC3686_IV_SIZE) = cpu_to_be32((reqctx->processed /
+						AES_BLOCK_SIZE) + 1);
+	else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS)
+		ret = chcr_update_tweak(req, iv);
+	else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) {
+		if (reqctx->op)
+			sg_pcopy_to_buffer(req->src, sg_nents(req->src), iv,
+					   16,
+					   reqctx->processed - AES_BLOCK_SIZE);
+		else
+			memcpy(iv, &fw6_pld->data[2], AES_BLOCK_SIZE);
+	}
+
 	return ret;
 	return ret;
+
 }
 }
 
 
-static int chcr_aes_encrypt(struct ablkcipher_request *req)
+/* We need separate function for final iv because in rfc3686  Initial counter
+ * starts from 1 and buffer size of iv is 8 byte only which remains constant
+ * for subsequent update requests
+ */
+
+static int chcr_final_cipher_iv(struct ablkcipher_request *req,
+				   struct cpl_fw6_pld *fw6_pld, u8 *iv)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	int subtype = get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm));
+	int ret = 0;
+
+	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR)
+		ctr_add_iv(iv, req->info, (reqctx->processed /
+			   AES_BLOCK_SIZE));
+	else if (subtype == CRYPTO_ALG_SUB_TYPE_XTS)
+		ret = chcr_update_tweak(req, iv);
+	else if (subtype == CRYPTO_ALG_SUB_TYPE_CBC) {
+		if (reqctx->op)
+			sg_pcopy_to_buffer(req->src, sg_nents(req->src), iv,
+					   16,
+					   reqctx->processed - AES_BLOCK_SIZE);
+		else
+			memcpy(iv, &fw6_pld->data[2], AES_BLOCK_SIZE);
+
+	}
+	return ret;
+
+}
+
+
+static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
+				   unsigned char *input, int err)
 {
 {
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
 	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 	struct sk_buff *skb;
 	struct sk_buff *skb;
+	struct cpl_fw6_pld *fw6_pld = (struct cpl_fw6_pld *)input;
+	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	struct  cipher_wr_param wrparam;
+	int bytes;
+
+	dma_unmap_sg(&u_ctx->lldi.pdev->dev, reqctx->dst, reqctx->dst_nents,
+		     DMA_FROM_DEVICE);
+
+	if (reqctx->skb) {
+		kfree_skb(reqctx->skb);
+		reqctx->skb = NULL;
+	}
+	if (err)
+		goto complete;
+
+	if (req->nbytes == reqctx->processed) {
+		err = chcr_final_cipher_iv(req, fw6_pld, req->info);
+		goto complete;
+	}
+
+	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+					    ctx->tx_qidx))) {
+		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+			err = -EBUSY;
+			goto complete;
+		}
+
+	}
+	wrparam.srcsg = scatterwalk_ffwd(reqctx->srcffwd, req->src,
+				       reqctx->processed);
+	reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, reqctx->dstsg,
+					 reqctx->processed);
+	if (!wrparam.srcsg || !reqctx->dst) {
+		pr_err("Input sg list length less that nbytes\n");
+		err = -EINVAL;
+		goto complete;
+	}
+	bytes = chcr_sg_ent_in_wr(wrparam.srcsg, reqctx->dst, 1,
+				 SPACE_LEFT(ablkctx->enckey_len),
+				 &wrparam.snent, &reqctx->dst_nents);
+	if ((bytes + reqctx->processed) >= req->nbytes)
+		bytes  = req->nbytes - reqctx->processed;
+	else
+		bytes = ROUND_16(bytes);
+	err = chcr_update_cipher_iv(req, fw6_pld, reqctx->iv);
+	if (err)
+		goto complete;
+
+	if (unlikely(bytes == 0)) {
+		err = chcr_cipher_fallback(ablkctx->sw_cipher,
+				     req->base.flags,
+				     wrparam.srcsg,
+				     reqctx->dst,
+				     req->nbytes - reqctx->processed,
+				     reqctx->iv,
+				     reqctx->op);
+		goto complete;
+	}
+
+	if (get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+	    CRYPTO_ALG_SUB_TYPE_CTR)
+		bytes = adjust_ctr_overflow(reqctx->iv, bytes);
+	reqctx->processed += bytes;
+	wrparam.qid = u_ctx->lldi.rxq_ids[ctx->rx_qidx];
+	wrparam.req = req;
+	wrparam.bytes = bytes;
+	skb = create_cipher_wr(&wrparam);
+	if (IS_ERR(skb)) {
+		pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
+		err = PTR_ERR(skb);
+		goto complete;
+	}
+	skb->dev = u_ctx->lldi.ports[0];
+	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_qidx);
+	chcr_send_wr(skb);
+	return 0;
+complete:
+	free_new_sg(reqctx->newdstsg);
+	reqctx->newdstsg = NULL;
+	req->base.complete(&req->base, err);
+	return err;
+}
+
+static int process_cipher(struct ablkcipher_request *req,
+				  unsigned short qid,
+				  struct sk_buff **skb,
+				  unsigned short op_type)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	unsigned int ivsize = crypto_ablkcipher_ivsize(tfm);
+	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+	struct	cipher_wr_param wrparam;
+	int bytes, nents, err = -EINVAL;
+
+	reqctx->newdstsg = NULL;
+	reqctx->processed = 0;
+	if (!req->info)
+		goto error;
+	if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) ||
+	    (req->nbytes == 0) ||
+	    (req->nbytes % crypto_ablkcipher_blocksize(tfm))) {
+		pr_err("AES: Invalid value of Key Len %d nbytes %d IV Len %d\n",
+		       ablkctx->enckey_len, req->nbytes, ivsize);
+		goto error;
+	}
+	wrparam.srcsg = req->src;
+	if (is_newsg(req->dst, &nents)) {
+		reqctx->newdstsg = alloc_new_sg(req->dst, nents);
+		if (IS_ERR(reqctx->newdstsg))
+			return PTR_ERR(reqctx->newdstsg);
+		reqctx->dstsg = reqctx->newdstsg;
+	} else {
+		reqctx->dstsg = req->dst;
+	}
+	bytes = chcr_sg_ent_in_wr(wrparam.srcsg, reqctx->dstsg, MIN_CIPHER_SG,
+				 SPACE_LEFT(ablkctx->enckey_len),
+				 &wrparam.snent,
+				 &reqctx->dst_nents);
+	if ((bytes + reqctx->processed) >= req->nbytes)
+		bytes  = req->nbytes - reqctx->processed;
+	else
+		bytes = ROUND_16(bytes);
+	if (unlikely(bytes > req->nbytes))
+		bytes = req->nbytes;
+	if (get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+				  CRYPTO_ALG_SUB_TYPE_CTR) {
+		bytes = adjust_ctr_overflow(req->info, bytes);
+	}
+	if (get_cryptoalg_subtype(crypto_ablkcipher_tfm(tfm)) ==
+	    CRYPTO_ALG_SUB_TYPE_CTR_RFC3686) {
+		memcpy(reqctx->iv, ablkctx->nonce, CTR_RFC3686_NONCE_SIZE);
+		memcpy(reqctx->iv + CTR_RFC3686_NONCE_SIZE, req->info,
+				CTR_RFC3686_IV_SIZE);
+
+		/* initialize counter portion of counter block */
+		*(__be32 *)(reqctx->iv + CTR_RFC3686_NONCE_SIZE +
+			CTR_RFC3686_IV_SIZE) = cpu_to_be32(1);
+
+	} else {
+
+		memcpy(reqctx->iv, req->info, ivsize);
+	}
+	if (unlikely(bytes == 0)) {
+		err = chcr_cipher_fallback(ablkctx->sw_cipher,
+					   req->base.flags,
+					   req->src,
+					   req->dst,
+					   req->nbytes,
+					   req->info,
+					   op_type);
+		goto error;
+	}
+	reqctx->processed = bytes;
+	reqctx->dst = reqctx->dstsg;
+	reqctx->op = op_type;
+	wrparam.qid = qid;
+	wrparam.req = req;
+	wrparam.bytes = bytes;
+	*skb = create_cipher_wr(&wrparam);
+	if (IS_ERR(*skb)) {
+		err = PTR_ERR(*skb);
+		goto error;
+	}
+
+	return 0;
+error:
+	free_new_sg(reqctx->newdstsg);
+	reqctx->newdstsg = NULL;
+	return err;
+}
+
+static int chcr_aes_encrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct sk_buff *skb = NULL;
+	int err;
+	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 
 
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
 					    ctx->tx_qidx))) {
 					    ctx->tx_qidx))) {
@@ -726,12 +1180,10 @@ static int chcr_aes_encrypt(struct ablkcipher_request *req)
 			return -EBUSY;
 			return -EBUSY;
 	}
 	}
 
 
-	skb = create_cipher_wr(req, u_ctx->lldi.rxq_ids[ctx->rx_qidx],
+	err = process_cipher(req, u_ctx->lldi.rxq_ids[ctx->rx_qidx], &skb,
 			       CHCR_ENCRYPT_OP);
 			       CHCR_ENCRYPT_OP);
-	if (IS_ERR(skb)) {
-		pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
-		return  PTR_ERR(skb);
-	}
+	if (err || !skb)
+		return  err;
 	skb->dev = u_ctx->lldi.ports[0];
 	skb->dev = u_ctx->lldi.ports[0];
 	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_qidx);
 	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_qidx);
 	chcr_send_wr(skb);
 	chcr_send_wr(skb);
@@ -743,7 +1195,8 @@ static int chcr_aes_decrypt(struct ablkcipher_request *req)
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
 	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
+	int err;
 
 
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
 					    ctx->tx_qidx))) {
 					    ctx->tx_qidx))) {
@@ -751,12 +1204,10 @@ static int chcr_aes_decrypt(struct ablkcipher_request *req)
 			return -EBUSY;
 			return -EBUSY;
 	}
 	}
 
 
-	skb = create_cipher_wr(req, u_ctx->lldi.rxq_ids[ctx->rx_qidx],
+	 err = process_cipher(req, u_ctx->lldi.rxq_ids[ctx->rx_qidx], &skb,
 			       CHCR_DECRYPT_OP);
 			       CHCR_DECRYPT_OP);
-	if (IS_ERR(skb)) {
-		pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
-		return PTR_ERR(skb);
-	}
+	if (err || !skb)
+		return err;
 	skb->dev = u_ctx->lldi.ports[0];
 	skb->dev = u_ctx->lldi.ports[0];
 	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_qidx);
 	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_qidx);
 	chcr_send_wr(skb);
 	chcr_send_wr(skb);
@@ -765,7 +1216,7 @@ static int chcr_aes_decrypt(struct ablkcipher_request *req)
 
 
 static int chcr_device_init(struct chcr_context *ctx)
 static int chcr_device_init(struct chcr_context *ctx)
 {
 {
-	struct uld_ctx *u_ctx;
+	struct uld_ctx *u_ctx = NULL;
 	struct adapter *adap;
 	struct adapter *adap;
 	unsigned int id;
 	unsigned int id;
 	int txq_perchan, txq_idx, ntxq;
 	int txq_perchan, txq_idx, ntxq;
@@ -773,12 +1224,12 @@ static int chcr_device_init(struct chcr_context *ctx)
 
 
 	id = smp_processor_id();
 	id = smp_processor_id();
 	if (!ctx->dev) {
 	if (!ctx->dev) {
-		err = assign_chcr_device(&ctx->dev);
-		if (err) {
+		u_ctx = assign_chcr_device();
+		if (!u_ctx) {
 			pr_err("chcr device assignment fails\n");
 			pr_err("chcr device assignment fails\n");
 			goto out;
 			goto out;
 		}
 		}
-		u_ctx = ULD_CTX(ctx);
+		ctx->dev = u_ctx->dev;
 		adap = padap(ctx->dev);
 		adap = padap(ctx->dev);
 		ntxq = min_not_zero((unsigned int)u_ctx->lldi.nrxq,
 		ntxq = min_not_zero((unsigned int)u_ctx->lldi.nrxq,
 				    adap->vres.ncrypto_fc);
 				    adap->vres.ncrypto_fc);
@@ -801,10 +1252,48 @@ out:
 
 
 static int chcr_cra_init(struct crypto_tfm *tfm)
 static int chcr_cra_init(struct crypto_tfm *tfm)
 {
 {
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+
+	ablkctx->sw_cipher = crypto_alloc_skcipher(alg->cra_name, 0,
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(ablkctx->sw_cipher)) {
+		pr_err("failed to allocate fallback for %s\n", alg->cra_name);
+		return PTR_ERR(ablkctx->sw_cipher);
+	}
+	tfm->crt_ablkcipher.reqsize =  sizeof(struct chcr_blkcipher_req_ctx);
+	return chcr_device_init(crypto_tfm_ctx(tfm));
+}
+
+static int chcr_rfc3686_init(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+
+	/*RFC3686 initialises IV counter value to 1, rfc3686(ctr(aes))
+	 * cannot be used as fallback in chcr_handle_cipher_response
+	 */
+	ablkctx->sw_cipher = crypto_alloc_skcipher("ctr(aes)", 0,
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(ablkctx->sw_cipher)) {
+		pr_err("failed to allocate fallback for %s\n", alg->cra_name);
+		return PTR_ERR(ablkctx->sw_cipher);
+	}
 	tfm->crt_ablkcipher.reqsize =  sizeof(struct chcr_blkcipher_req_ctx);
 	tfm->crt_ablkcipher.reqsize =  sizeof(struct chcr_blkcipher_req_ctx);
 	return chcr_device_init(crypto_tfm_ctx(tfm));
 	return chcr_device_init(crypto_tfm_ctx(tfm));
 }
 }
 
 
+
+static void chcr_cra_exit(struct crypto_tfm *tfm)
+{
+	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+
+	crypto_free_skcipher(ablkctx->sw_cipher);
+}
+
 static int get_alg_config(struct algo_param *params,
 static int get_alg_config(struct algo_param *params,
 			  unsigned int auth_size)
 			  unsigned int auth_size)
 {
 {
@@ -865,6 +1354,7 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req,
 	u8 hash_size_in_response = 0;
 	u8 hash_size_in_response = 0;
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		GFP_ATOMIC;
 		GFP_ATOMIC;
+	struct adapter *adap = padap(ctx->dev);
 
 
 	iopad_alignment = KEYCTX_ALIGN_PAD(digestsize);
 	iopad_alignment = KEYCTX_ALIGN_PAD(digestsize);
 	kctx_len = param->alg_prm.result_size + iopad_alignment;
 	kctx_len = param->alg_prm.result_size + iopad_alignment;
@@ -921,9 +1411,9 @@ static struct sk_buff *create_hash_wr(struct ahash_request *req,
 				    param->bfr_len);
 				    param->bfr_len);
 	if (param->sg_len != 0)
 	if (param->sg_len != 0)
 		write_sg_to_skb(skb, &frags, req->src, param->sg_len);
 		write_sg_to_skb(skb, &frags, req->src, param->sg_len);
-
-	create_wreq(ctx, chcr_req, req, skb, kctx_len, hash_size_in_response, 0,
-			DUMMY_BYTES);
+	atomic_inc(&adap->chcr_stats.digest_rqst);
+	create_wreq(ctx, chcr_req, &req->base, skb, kctx_len,
+		    hash_size_in_response, 0, DUMMY_BYTES, 0);
 	req_ctx->skb = skb;
 	req_ctx->skb = skb;
 	skb_get(skb);
 	skb_get(skb);
 	return skb;
 	return skb;
@@ -1226,21 +1716,17 @@ out:
 	return err;
 	return err;
 }
 }
 
 
-static int chcr_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+static int chcr_aes_xts_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 			       unsigned int key_len)
 			       unsigned int key_len)
 {
 {
-	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(cipher);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 	unsigned short context_size = 0;
 	unsigned short context_size = 0;
+	int err;
 
 
-	if ((key_len != (AES_KEYSIZE_128 << 1)) &&
-	    (key_len != (AES_KEYSIZE_256 << 1))) {
-		crypto_tfm_set_flags((struct crypto_tfm *)tfm,
-				     CRYPTO_TFM_RES_BAD_KEY_LEN);
-		ablkctx->enckey_len = 0;
-		return -EINVAL;
-
-	}
+	err = chcr_cipher_fallback_setkey(cipher, key, key_len);
+	if (err)
+		goto badkey_err;
 
 
 	memcpy(ablkctx->key, key, key_len);
 	memcpy(ablkctx->key, key, key_len);
 	ablkctx->enckey_len = key_len;
 	ablkctx->enckey_len = key_len;
@@ -1254,6 +1740,11 @@ static int chcr_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 				 0, context_size);
 				 0, context_size);
 	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS;
 	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS;
 	return 0;
 	return 0;
+badkey_err:
+	crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	ablkctx->enckey_len = 0;
+
+	return err;
 }
 }
 
 
 static int chcr_sha_init(struct ahash_request *areq)
 static int chcr_sha_init(struct ahash_request *areq)
@@ -1330,6 +1821,63 @@ static void chcr_hmac_cra_exit(struct crypto_tfm *tfm)
 	}
 	}
 }
 }
 
 
+static int is_newsg(struct scatterlist *sgl, unsigned int *newents)
+{
+	int nents = 0;
+	int ret = 0;
+
+	while (sgl) {
+		if (sgl->length > CHCR_SG_SIZE)
+			ret = 1;
+		nents += DIV_ROUND_UP(sgl->length, CHCR_SG_SIZE);
+		sgl = sg_next(sgl);
+	}
+	*newents = nents;
+	return ret;
+}
+
+static inline void free_new_sg(struct scatterlist *sgl)
+{
+	kfree(sgl);
+}
+
+static struct scatterlist *alloc_new_sg(struct scatterlist *sgl,
+				       unsigned int nents)
+{
+	struct scatterlist *newsg, *sg;
+	int i, len, processed = 0;
+	struct page *spage;
+	int offset;
+
+	newsg = kmalloc_array(nents, sizeof(struct scatterlist), GFP_KERNEL);
+	if (!newsg)
+		return ERR_PTR(-ENOMEM);
+	sg = newsg;
+	sg_init_table(sg, nents);
+	offset = sgl->offset;
+	spage = sg_page(sgl);
+	for (i = 0; i < nents; i++) {
+		len = min_t(u32, sgl->length - processed, CHCR_SG_SIZE);
+		sg_set_page(sg, spage, len, offset);
+		processed += len;
+		offset += len;
+		if (offset >= PAGE_SIZE) {
+			offset = offset % PAGE_SIZE;
+			spage++;
+		}
+		if (processed == sgl->length) {
+			processed = 0;
+			sgl = sg_next(sgl);
+			if (!sgl)
+				break;
+			spage = sg_page(sgl);
+			offset = sgl->offset;
+		}
+		sg = sg_next(sg);
+	}
+	return newsg;
+}
+
 static int chcr_copy_assoc(struct aead_request *req,
 static int chcr_copy_assoc(struct aead_request *req,
 				struct chcr_aead_ctx *ctx)
 				struct chcr_aead_ctx *ctx)
 {
 {
@@ -1392,16 +1940,20 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	struct scatterlist *src;
 	struct scatterlist *src;
 	unsigned int frags = 0, transhdr_len;
 	unsigned int frags = 0, transhdr_len;
 	unsigned int ivsize = crypto_aead_ivsize(tfm), dst_size = 0;
 	unsigned int ivsize = crypto_aead_ivsize(tfm), dst_size = 0;
-	unsigned int   kctx_len = 0;
+	unsigned int   kctx_len = 0, nents;
 	unsigned short stop_offset = 0;
 	unsigned short stop_offset = 0;
 	unsigned int  assoclen = req->assoclen;
 	unsigned int  assoclen = req->assoclen;
 	unsigned int  authsize = crypto_aead_authsize(tfm);
 	unsigned int  authsize = crypto_aead_authsize(tfm);
-	int err = -EINVAL, src_nent;
+	int error = -EINVAL, src_nent;
 	int null = 0;
 	int null = 0;
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		GFP_ATOMIC;
 		GFP_ATOMIC;
+	struct adapter *adap = padap(ctx->dev);
 
 
-	if (aeadctx->enckey_len == 0 || (req->cryptlen == 0))
+	reqctx->newdstsg = NULL;
+	dst_size = req->assoclen + req->cryptlen + (op_type ? -authsize :
+						   authsize);
+	if (aeadctx->enckey_len == 0 || (req->cryptlen <= 0))
 		goto err;
 		goto err;
 
 
 	if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
 	if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
@@ -1410,14 +1962,24 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	if (src_nent < 0)
 	if (src_nent < 0)
 		goto err;
 		goto err;
 	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen);
 	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen);
-	reqctx->dst = src;
 
 
 	if (req->src != req->dst) {
 	if (req->src != req->dst) {
-		err = chcr_copy_assoc(req, aeadctx);
-		if (err)
-			return ERR_PTR(err);
-		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst,
-					       req->assoclen);
+		error = chcr_copy_assoc(req, aeadctx);
+		if (error)
+			return ERR_PTR(error);
+	}
+	if (dst_size && is_newsg(req->dst, &nents)) {
+		reqctx->newdstsg = alloc_new_sg(req->dst, nents);
+		if (IS_ERR(reqctx->newdstsg))
+			return ERR_CAST(reqctx->newdstsg);
+		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd,
+					       reqctx->newdstsg, req->assoclen);
+	} else {
+		if (req->src == req->dst)
+			reqctx->dst = src;
+		else
+			reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd,
+						       req->dst, req->assoclen);
 	}
 	}
 	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_NULL) {
 	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_NULL) {
 		null = 1;
 		null = 1;
@@ -1427,6 +1989,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 					     (op_type ? -authsize : authsize));
 					     (op_type ? -authsize : authsize));
 	if (reqctx->dst_nents < 0) {
 	if (reqctx->dst_nents < 0) {
 		pr_err("AUTHENC:Invalid Destination sg entries\n");
 		pr_err("AUTHENC:Invalid Destination sg entries\n");
+		error = -EINVAL;
 		goto err;
 		goto err;
 	}
 	}
 	dst_size = get_space_for_phys_dsgl(reqctx->dst_nents);
 	dst_size = get_space_for_phys_dsgl(reqctx->dst_nents);
@@ -1437,11 +2000,16 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 			T6_MAX_AAD_SIZE,
 			T6_MAX_AAD_SIZE,
 			transhdr_len + (sgl_len(src_nent + MIN_AUTH_SG) * 8),
 			transhdr_len + (sgl_len(src_nent + MIN_AUTH_SG) * 8),
 				op_type)) {
 				op_type)) {
+		atomic_inc(&adap->chcr_stats.fallback);
+		free_new_sg(reqctx->newdstsg);
+		reqctx->newdstsg = NULL;
 		return ERR_PTR(chcr_aead_fallback(req, op_type));
 		return ERR_PTR(chcr_aead_fallback(req, op_type));
 	}
 	}
 	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
 	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
-	if (!skb)
+	if (!skb) {
+		error = -ENOMEM;
 		goto err;
 		goto err;
+	}
 
 
 	/* LLD is going to write the sge hdr. */
 	/* LLD is going to write the sge hdr. */
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
@@ -1493,9 +2061,9 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	sg_param.nents = reqctx->dst_nents;
 	sg_param.nents = reqctx->dst_nents;
 	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
 	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
 	sg_param.qid = qid;
 	sg_param.qid = qid;
-	sg_param.align = 0;
-	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst,
-				  &sg_param))
+	error = map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl,
+					reqctx->dst, &sg_param);
+	if (error)
 		goto dstmap_fail;
 		goto dstmap_fail;
 
 
 	skb_set_transport_header(skb, transhdr_len);
 	skb_set_transport_header(skb, transhdr_len);
@@ -1507,8 +2075,9 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	}
 	}
 	write_buffer_to_skb(skb, &frags, req->iv, ivsize);
 	write_buffer_to_skb(skb, &frags, req->iv, ivsize);
 	write_sg_to_skb(skb, &frags, src, req->cryptlen);
 	write_sg_to_skb(skb, &frags, src, req->cryptlen);
-	create_wreq(ctx, chcr_req, req, skb, kctx_len, size, 1,
-		   sizeof(struct cpl_rx_phys_dsgl) + dst_size);
+	atomic_inc(&adap->chcr_stats.cipher_rqst);
+	create_wreq(ctx, chcr_req, &req->base, skb, kctx_len, size, 1,
+		   sizeof(struct cpl_rx_phys_dsgl) + dst_size, 0);
 	reqctx->skb = skb;
 	reqctx->skb = skb;
 	skb_get(skb);
 	skb_get(skb);
 
 
@@ -1517,7 +2086,9 @@ dstmap_fail:
 	/* ivmap_fail: */
 	/* ivmap_fail: */
 	kfree_skb(skb);
 	kfree_skb(skb);
 err:
 err:
-	return ERR_PTR(-EINVAL);
+	free_new_sg(reqctx->newdstsg);
+	reqctx->newdstsg = NULL;
+	return ERR_PTR(error);
 }
 }
 
 
 static int set_msg_len(u8 *block, unsigned int msglen, int csize)
 static int set_msg_len(u8 *block, unsigned int msglen, int csize)
@@ -1724,14 +2295,17 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 	struct phys_sge_parm sg_param;
 	struct phys_sge_parm sg_param;
 	struct scatterlist *src;
 	struct scatterlist *src;
 	unsigned int frags = 0, transhdr_len, ivsize = AES_BLOCK_SIZE;
 	unsigned int frags = 0, transhdr_len, ivsize = AES_BLOCK_SIZE;
-	unsigned int dst_size = 0, kctx_len;
+	unsigned int dst_size = 0, kctx_len, nents;
 	unsigned int sub_type;
 	unsigned int sub_type;
 	unsigned int authsize = crypto_aead_authsize(tfm);
 	unsigned int authsize = crypto_aead_authsize(tfm);
-	int err = -EINVAL, src_nent;
+	int error = -EINVAL, src_nent;
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		GFP_ATOMIC;
 		GFP_ATOMIC;
+	struct adapter *adap = padap(ctx->dev);
 
 
-
+	dst_size = req->assoclen + req->cryptlen + (op_type ? -authsize :
+						   authsize);
+	reqctx->newdstsg = NULL;
 	if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
 	if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
 		goto err;
 		goto err;
 	src_nent = sg_nents_for_len(req->src, req->assoclen + req->cryptlen);
 	src_nent = sg_nents_for_len(req->src, req->assoclen + req->cryptlen);
@@ -1740,26 +2314,35 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 
 
 	sub_type = get_aead_subtype(tfm);
 	sub_type = get_aead_subtype(tfm);
 	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen);
 	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen);
-	reqctx->dst = src;
-
 	if (req->src != req->dst) {
 	if (req->src != req->dst) {
-		err = chcr_copy_assoc(req, aeadctx);
-		if (err) {
+		error = chcr_copy_assoc(req, aeadctx);
+		if (error) {
 			pr_err("AAD copy to destination buffer fails\n");
 			pr_err("AAD copy to destination buffer fails\n");
-			return ERR_PTR(err);
+			return ERR_PTR(error);
 		}
 		}
-		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst,
-					       req->assoclen);
+	}
+	if (dst_size && is_newsg(req->dst, &nents)) {
+		reqctx->newdstsg = alloc_new_sg(req->dst, nents);
+		if (IS_ERR(reqctx->newdstsg))
+			return ERR_CAST(reqctx->newdstsg);
+		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd,
+					       reqctx->newdstsg, req->assoclen);
+	} else {
+		if (req->src == req->dst)
+			reqctx->dst = src;
+		else
+			reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd,
+						       req->dst, req->assoclen);
 	}
 	}
 	reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen +
 	reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen +
 					     (op_type ? -authsize : authsize));
 					     (op_type ? -authsize : authsize));
 	if (reqctx->dst_nents < 0) {
 	if (reqctx->dst_nents < 0) {
 		pr_err("CCM:Invalid Destination sg entries\n");
 		pr_err("CCM:Invalid Destination sg entries\n");
+		error = -EINVAL;
 		goto err;
 		goto err;
 	}
 	}
-
-
-	if (aead_ccm_validate_input(op_type, req, aeadctx, sub_type))
+	error = aead_ccm_validate_input(op_type, req, aeadctx, sub_type);
+	if (error)
 		goto err;
 		goto err;
 
 
 	dst_size = get_space_for_phys_dsgl(reqctx->dst_nents);
 	dst_size = get_space_for_phys_dsgl(reqctx->dst_nents);
@@ -1769,13 +2352,18 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 			    T6_MAX_AAD_SIZE - 18,
 			    T6_MAX_AAD_SIZE - 18,
 			    transhdr_len + (sgl_len(src_nent + MIN_CCM_SG) * 8),
 			    transhdr_len + (sgl_len(src_nent + MIN_CCM_SG) * 8),
 			    op_type)) {
 			    op_type)) {
+		atomic_inc(&adap->chcr_stats.fallback);
+		free_new_sg(reqctx->newdstsg);
+		reqctx->newdstsg = NULL;
 		return ERR_PTR(chcr_aead_fallback(req, op_type));
 		return ERR_PTR(chcr_aead_fallback(req, op_type));
 	}
 	}
 
 
 	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)),  flags);
 	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)),  flags);
 
 
-	if (!skb)
+	if (!skb) {
+		error = -ENOMEM;
 		goto err;
 		goto err;
+	}
 
 
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
 
 
@@ -1790,29 +2378,32 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 					16), aeadctx->key, aeadctx->enckey_len);
 					16), aeadctx->key, aeadctx->enckey_len);
 
 
 	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
 	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
-	if (ccm_format_packet(req, aeadctx, sub_type, op_type))
+	error = ccm_format_packet(req, aeadctx, sub_type, op_type);
+	if (error)
 		goto dstmap_fail;
 		goto dstmap_fail;
 
 
 	sg_param.nents = reqctx->dst_nents;
 	sg_param.nents = reqctx->dst_nents;
 	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
 	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
 	sg_param.qid = qid;
 	sg_param.qid = qid;
-	sg_param.align = 0;
-	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst,
-				  &sg_param))
+	error = map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl,
+				 reqctx->dst, &sg_param);
+	if (error)
 		goto dstmap_fail;
 		goto dstmap_fail;
 
 
 	skb_set_transport_header(skb, transhdr_len);
 	skb_set_transport_header(skb, transhdr_len);
 	frags = fill_aead_req_fields(skb, req, src, ivsize, aeadctx);
 	frags = fill_aead_req_fields(skb, req, src, ivsize, aeadctx);
-	create_wreq(ctx, chcr_req, req, skb, kctx_len, 0, 1,
-		    sizeof(struct cpl_rx_phys_dsgl) + dst_size);
+	atomic_inc(&adap->chcr_stats.aead_rqst);
+	create_wreq(ctx, chcr_req, &req->base, skb, kctx_len, 0, 1,
+		    sizeof(struct cpl_rx_phys_dsgl) + dst_size, 0);
 	reqctx->skb = skb;
 	reqctx->skb = skb;
 	skb_get(skb);
 	skb_get(skb);
 	return skb;
 	return skb;
 dstmap_fail:
 dstmap_fail:
 	kfree_skb(skb);
 	kfree_skb(skb);
-	skb = NULL;
 err:
 err:
-	return ERR_PTR(-EINVAL);
+	free_new_sg(reqctx->newdstsg);
+	reqctx->newdstsg = NULL;
+	return ERR_PTR(error);
 }
 }
 
 
 static struct sk_buff *create_gcm_wr(struct aead_request *req,
 static struct sk_buff *create_gcm_wr(struct aead_request *req,
@@ -1832,45 +2423,53 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	struct scatterlist *src;
 	struct scatterlist *src;
 	unsigned int frags = 0, transhdr_len;
 	unsigned int frags = 0, transhdr_len;
 	unsigned int ivsize = AES_BLOCK_SIZE;
 	unsigned int ivsize = AES_BLOCK_SIZE;
-	unsigned int dst_size = 0, kctx_len;
+	unsigned int dst_size = 0, kctx_len, nents, assoclen = req->assoclen;
 	unsigned char tag_offset = 0;
 	unsigned char tag_offset = 0;
-	unsigned int crypt_len = 0;
 	unsigned int authsize = crypto_aead_authsize(tfm);
 	unsigned int authsize = crypto_aead_authsize(tfm);
-	int err = -EINVAL, src_nent;
+	int error = -EINVAL, src_nent;
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		GFP_ATOMIC;
 		GFP_ATOMIC;
+	struct adapter *adap = padap(ctx->dev);
 
 
+	reqctx->newdstsg = NULL;
+	dst_size = assoclen + req->cryptlen + (op_type ? -authsize :
+						    authsize);
 	/* validate key size */
 	/* validate key size */
 	if (aeadctx->enckey_len == 0)
 	if (aeadctx->enckey_len == 0)
 		goto err;
 		goto err;
 
 
 	if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
 	if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
 		goto err;
 		goto err;
-	src_nent = sg_nents_for_len(req->src, req->assoclen + req->cryptlen);
+	src_nent = sg_nents_for_len(req->src, assoclen + req->cryptlen);
 	if (src_nent < 0)
 	if (src_nent < 0)
 		goto err;
 		goto err;
 
 
-	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, req->assoclen);
-	reqctx->dst = src;
+	src = scatterwalk_ffwd(reqctx->srcffwd, req->src, assoclen);
 	if (req->src != req->dst) {
 	if (req->src != req->dst) {
-		err = chcr_copy_assoc(req, aeadctx);
-		if (err)
-			return	ERR_PTR(err);
-		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd, req->dst,
-					       req->assoclen);
+		error = chcr_copy_assoc(req, aeadctx);
+		if (error)
+			return	ERR_PTR(error);
+	}
+
+	if (dst_size && is_newsg(req->dst, &nents)) {
+		reqctx->newdstsg = alloc_new_sg(req->dst, nents);
+		if (IS_ERR(reqctx->newdstsg))
+			return ERR_CAST(reqctx->newdstsg);
+		reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd,
+					       reqctx->newdstsg, assoclen);
+	} else {
+		if (req->src == req->dst)
+			reqctx->dst = src;
+		else
+			reqctx->dst = scatterwalk_ffwd(reqctx->dstffwd,
+						       req->dst, assoclen);
 	}
 	}
 
 
-	if (!req->cryptlen)
-		/* null-payload is not supported in the hardware.
-		 * software is sending block size
-		 */
-		crypt_len = AES_BLOCK_SIZE;
-	else
-		crypt_len = req->cryptlen;
 	reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen +
 	reqctx->dst_nents = sg_nents_for_len(reqctx->dst, req->cryptlen +
 					     (op_type ? -authsize : authsize));
 					     (op_type ? -authsize : authsize));
 	if (reqctx->dst_nents < 0) {
 	if (reqctx->dst_nents < 0) {
 		pr_err("GCM:Invalid Destination sg entries\n");
 		pr_err("GCM:Invalid Destination sg entries\n");
+		error = -EINVAL;
 		goto err;
 		goto err;
 	}
 	}
 
 
@@ -1883,11 +2482,16 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 			    T6_MAX_AAD_SIZE,
 			    T6_MAX_AAD_SIZE,
 			    transhdr_len + (sgl_len(src_nent + MIN_GCM_SG) * 8),
 			    transhdr_len + (sgl_len(src_nent + MIN_GCM_SG) * 8),
 			    op_type)) {
 			    op_type)) {
+		atomic_inc(&adap->chcr_stats.fallback);
+		free_new_sg(reqctx->newdstsg);
+		reqctx->newdstsg = NULL;
 		return ERR_PTR(chcr_aead_fallback(req, op_type));
 		return ERR_PTR(chcr_aead_fallback(req, op_type));
 	}
 	}
 	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
 	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
-	if (!skb)
+	if (!skb) {
+		error = -ENOMEM;
 		goto err;
 		goto err;
+	}
 
 
 	/* NIC driver is going to write the sge hdr. */
 	/* NIC driver is going to write the sge hdr. */
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
 	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
@@ -1896,19 +2500,19 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	memset(chcr_req, 0, transhdr_len);
 	memset(chcr_req, 0, transhdr_len);
 
 
 	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106)
 	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106)
-		req->assoclen -= 8;
+		assoclen = req->assoclen - 8;
 
 
 	tag_offset = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize;
 	tag_offset = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize;
 	chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(
 	chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(
 					ctx->dev->rx_channel_id, 2, (ivsize ?
 					ctx->dev->rx_channel_id, 2, (ivsize ?
-					(req->assoclen + 1) : 0));
+					(assoclen + 1) : 0));
 	chcr_req->sec_cpl.pldlen =
 	chcr_req->sec_cpl.pldlen =
-		htonl(req->assoclen + ivsize + req->cryptlen);
+		htonl(assoclen + ivsize + req->cryptlen);
 	chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
 	chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
-					req->assoclen ? 1 : 0, req->assoclen,
-					req->assoclen + ivsize + 1, 0);
+					assoclen ? 1 : 0, assoclen,
+					assoclen + ivsize + 1, 0);
 		chcr_req->sec_cpl.cipherstop_lo_authinsert =
 		chcr_req->sec_cpl.cipherstop_lo_authinsert =
-			FILL_SEC_CPL_AUTHINSERT(0, req->assoclen + ivsize + 1,
+			FILL_SEC_CPL_AUTHINSERT(0, assoclen + ivsize + 1,
 						tag_offset, tag_offset);
 						tag_offset, tag_offset);
 		chcr_req->sec_cpl.seqno_numivs =
 		chcr_req->sec_cpl.seqno_numivs =
 			FILL_SEC_CPL_SCMD0_SEQNO(op_type, (op_type ==
 			FILL_SEC_CPL_SCMD0_SEQNO(op_type, (op_type ==
@@ -1938,19 +2542,19 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	sg_param.nents = reqctx->dst_nents;
 	sg_param.nents = reqctx->dst_nents;
 	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
 	sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
 	sg_param.qid = qid;
 	sg_param.qid = qid;
-	sg_param.align = 0;
-	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, reqctx->dst,
-				  &sg_param))
+	error = map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl,
+					  reqctx->dst, &sg_param);
+	if (error)
 		goto dstmap_fail;
 		goto dstmap_fail;
 
 
 	skb_set_transport_header(skb, transhdr_len);
 	skb_set_transport_header(skb, transhdr_len);
-
-	write_sg_to_skb(skb, &frags, req->src, req->assoclen);
-
+	write_sg_to_skb(skb, &frags, req->src, assoclen);
 	write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize);
 	write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize);
 	write_sg_to_skb(skb, &frags, src, req->cryptlen);
 	write_sg_to_skb(skb, &frags, src, req->cryptlen);
-	create_wreq(ctx, chcr_req, req, skb, kctx_len, size, 1,
-			sizeof(struct cpl_rx_phys_dsgl) + dst_size);
+	atomic_inc(&adap->chcr_stats.aead_rqst);
+	create_wreq(ctx, chcr_req, &req->base, skb, kctx_len, size, 1,
+			sizeof(struct cpl_rx_phys_dsgl) + dst_size,
+			reqctx->verify);
 	reqctx->skb = skb;
 	reqctx->skb = skb;
 	skb_get(skb);
 	skb_get(skb);
 	return skb;
 	return skb;
@@ -1958,9 +2562,10 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 dstmap_fail:
 dstmap_fail:
 	/* ivmap_fail: */
 	/* ivmap_fail: */
 	kfree_skb(skb);
 	kfree_skb(skb);
-	skb = NULL;
 err:
 err:
-	return skb;
+	free_new_sg(reqctx->newdstsg);
+	reqctx->newdstsg = NULL;
+	return ERR_PTR(error);
 }
 }
 
 
 
 
@@ -1972,7 +2577,8 @@ static int chcr_aead_cra_init(struct crypto_aead *tfm)
 	struct aead_alg *alg = crypto_aead_alg(tfm);
 	struct aead_alg *alg = crypto_aead_alg(tfm);
 
 
 	aeadctx->sw_cipher = crypto_alloc_aead(alg->base.cra_name, 0,
 	aeadctx->sw_cipher = crypto_alloc_aead(alg->base.cra_name, 0,
-					       CRYPTO_ALG_NEED_FALLBACK);
+					       CRYPTO_ALG_NEED_FALLBACK |
+					       CRYPTO_ALG_ASYNC);
 	if  (IS_ERR(aeadctx->sw_cipher))
 	if  (IS_ERR(aeadctx->sw_cipher))
 		return PTR_ERR(aeadctx->sw_cipher);
 		return PTR_ERR(aeadctx->sw_cipher);
 	crypto_aead_set_reqsize(tfm, max(sizeof(struct chcr_aead_reqctx),
 	crypto_aead_set_reqsize(tfm, max(sizeof(struct chcr_aead_reqctx),
@@ -2206,7 +2812,8 @@ static int chcr_aead_rfc4309_setkey(struct crypto_aead *aead, const u8 *key,
 				    unsigned int keylen)
 				    unsigned int keylen)
 {
 {
 	struct chcr_context *ctx = crypto_aead_ctx(aead);
 	struct chcr_context *ctx = crypto_aead_ctx(aead);
-	 struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+	int error;
 
 
 	if (keylen < 3) {
 	if (keylen < 3) {
 		crypto_tfm_set_flags((struct crypto_tfm *)aead,
 		crypto_tfm_set_flags((struct crypto_tfm *)aead,
@@ -2214,6 +2821,15 @@ static int chcr_aead_rfc4309_setkey(struct crypto_aead *aead, const u8 *key,
 		aeadctx->enckey_len = 0;
 		aeadctx->enckey_len = 0;
 		return	-EINVAL;
 		return	-EINVAL;
 	}
 	}
+	crypto_aead_clear_flags(aeadctx->sw_cipher, CRYPTO_TFM_REQ_MASK);
+	crypto_aead_set_flags(aeadctx->sw_cipher, crypto_aead_get_flags(aead) &
+			      CRYPTO_TFM_REQ_MASK);
+	error = crypto_aead_setkey(aeadctx->sw_cipher, key, keylen);
+	crypto_aead_clear_flags(aead, CRYPTO_TFM_RES_MASK);
+	crypto_aead_set_flags(aead, crypto_aead_get_flags(aeadctx->sw_cipher) &
+			      CRYPTO_TFM_RES_MASK);
+	if (error)
+		return error;
 	keylen -= 3;
 	keylen -= 3;
 	memcpy(aeadctx->salt, key + keylen, 3);
 	memcpy(aeadctx->salt, key + keylen, 3);
 	return chcr_ccm_common_setkey(aead, key, keylen);
 	return chcr_ccm_common_setkey(aead, key, keylen);
@@ -2552,22 +3168,14 @@ static int chcr_aead_op(struct aead_request *req,
 static struct chcr_alg_template driver_algs[] = {
 static struct chcr_alg_template driver_algs[] = {
 	/* AES-CBC */
 	/* AES-CBC */
 	{
 	{
-		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_SUB_TYPE_CBC,
 		.is_registered = 0,
 		.is_registered = 0,
 		.alg.crypto = {
 		.alg.crypto = {
 			.cra_name		= "cbc(aes)",
 			.cra_name		= "cbc(aes)",
 			.cra_driver_name	= "cbc-aes-chcr",
 			.cra_driver_name	= "cbc-aes-chcr",
-			.cra_priority		= CHCR_CRA_PRIORITY,
-			.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				CRYPTO_ALG_ASYNC,
 			.cra_blocksize		= AES_BLOCK_SIZE,
 			.cra_blocksize		= AES_BLOCK_SIZE,
-			.cra_ctxsize		= sizeof(struct chcr_context)
-				+ sizeof(struct ablk_ctx),
-			.cra_alignmask		= 0,
-			.cra_type		= &crypto_ablkcipher_type,
-			.cra_module		= THIS_MODULE,
 			.cra_init		= chcr_cra_init,
 			.cra_init		= chcr_cra_init,
-			.cra_exit		= NULL,
+			.cra_exit		= chcr_cra_exit,
 			.cra_u.ablkcipher	= {
 			.cra_u.ablkcipher	= {
 				.min_keysize	= AES_MIN_KEY_SIZE,
 				.min_keysize	= AES_MIN_KEY_SIZE,
 				.max_keysize	= AES_MAX_KEY_SIZE,
 				.max_keysize	= AES_MAX_KEY_SIZE,
@@ -2579,24 +3187,15 @@ static struct chcr_alg_template driver_algs[] = {
 		}
 		}
 	},
 	},
 	{
 	{
-		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_SUB_TYPE_XTS,
 		.is_registered = 0,
 		.is_registered = 0,
 		.alg.crypto =   {
 		.alg.crypto =   {
 			.cra_name		= "xts(aes)",
 			.cra_name		= "xts(aes)",
 			.cra_driver_name	= "xts-aes-chcr",
 			.cra_driver_name	= "xts-aes-chcr",
-			.cra_priority		= CHCR_CRA_PRIORITY,
-			.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				CRYPTO_ALG_ASYNC,
 			.cra_blocksize		= AES_BLOCK_SIZE,
 			.cra_blocksize		= AES_BLOCK_SIZE,
-			.cra_ctxsize		= sizeof(struct chcr_context) +
-				sizeof(struct ablk_ctx),
-			.cra_alignmask		= 0,
-			.cra_type		= &crypto_ablkcipher_type,
-			.cra_module		= THIS_MODULE,
 			.cra_init		= chcr_cra_init,
 			.cra_init		= chcr_cra_init,
 			.cra_exit		= NULL,
 			.cra_exit		= NULL,
-			.cra_u = {
-				.ablkcipher = {
+			.cra_u .ablkcipher = {
 					.min_keysize	= 2 * AES_MIN_KEY_SIZE,
 					.min_keysize	= 2 * AES_MIN_KEY_SIZE,
 					.max_keysize	= 2 * AES_MAX_KEY_SIZE,
 					.max_keysize	= 2 * AES_MAX_KEY_SIZE,
 					.ivsize		= AES_BLOCK_SIZE,
 					.ivsize		= AES_BLOCK_SIZE,
@@ -2605,6 +3204,47 @@ static struct chcr_alg_template driver_algs[] = {
 					.decrypt	= chcr_aes_decrypt,
 					.decrypt	= chcr_aes_decrypt,
 				}
 				}
 			}
 			}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_SUB_TYPE_CTR,
+		.is_registered = 0,
+		.alg.crypto = {
+			.cra_name		= "ctr(aes)",
+			.cra_driver_name	= "ctr-aes-chcr",
+			.cra_blocksize		= 1,
+			.cra_init		= chcr_cra_init,
+			.cra_exit		= chcr_cra_exit,
+			.cra_u.ablkcipher	= {
+				.min_keysize	= AES_MIN_KEY_SIZE,
+				.max_keysize	= AES_MAX_KEY_SIZE,
+				.ivsize		= AES_BLOCK_SIZE,
+				.setkey		= chcr_aes_ctr_setkey,
+				.encrypt	= chcr_aes_encrypt,
+				.decrypt	= chcr_aes_decrypt,
+			}
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER |
+			CRYPTO_ALG_SUB_TYPE_CTR_RFC3686,
+		.is_registered = 0,
+		.alg.crypto = {
+			.cra_name		= "rfc3686(ctr(aes))",
+			.cra_driver_name	= "rfc3686-ctr-aes-chcr",
+			.cra_blocksize		= 1,
+			.cra_init		= chcr_rfc3686_init,
+			.cra_exit		= chcr_cra_exit,
+			.cra_u.ablkcipher	= {
+				.min_keysize	= AES_MIN_KEY_SIZE +
+					CTR_RFC3686_NONCE_SIZE,
+				.max_keysize	= AES_MAX_KEY_SIZE +
+					CTR_RFC3686_NONCE_SIZE,
+				.ivsize		= CTR_RFC3686_IV_SIZE,
+				.setkey		= chcr_aes_rfc3686_setkey,
+				.encrypt	= chcr_aes_encrypt,
+				.decrypt	= chcr_aes_decrypt,
+				.geniv          = "seqiv",
+			}
 		}
 		}
 	},
 	},
 	/* SHA */
 	/* SHA */
@@ -2986,6 +3626,18 @@ static int chcr_register_alg(void)
 			continue;
 			continue;
 		switch (driver_algs[i].type & CRYPTO_ALG_TYPE_MASK) {
 		switch (driver_algs[i].type & CRYPTO_ALG_TYPE_MASK) {
 		case CRYPTO_ALG_TYPE_ABLKCIPHER:
 		case CRYPTO_ALG_TYPE_ABLKCIPHER:
+			driver_algs[i].alg.crypto.cra_priority =
+				CHCR_CRA_PRIORITY;
+			driver_algs[i].alg.crypto.cra_module = THIS_MODULE;
+			driver_algs[i].alg.crypto.cra_flags =
+				CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
+				CRYPTO_ALG_NEED_FALLBACK;
+			driver_algs[i].alg.crypto.cra_ctxsize =
+				sizeof(struct chcr_context) +
+				sizeof(struct ablk_ctx);
+			driver_algs[i].alg.crypto.cra_alignmask = 0;
+			driver_algs[i].alg.crypto.cra_type =
+				&crypto_ablkcipher_type;
 			err = crypto_register_alg(&driver_algs[i].alg.crypto);
 			err = crypto_register_alg(&driver_algs[i].alg.crypto);
 			name = driver_algs[i].alg.crypto.cra_driver_name;
 			name = driver_algs[i].alg.crypto.cra_driver_name;
 			break;
 			break;

+ 27 - 3
drivers/crypto/chelsio/chcr_algo.h

@@ -185,11 +185,11 @@
 			FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(1) | \
 			FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(1) | \
 			FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V((ctx_len)))
 			FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V((ctx_len)))
 
 
-#define FILL_WR_RX_Q_ID(cid, qid, wr_iv, fid) \
+#define FILL_WR_RX_Q_ID(cid, qid, wr_iv, lcb, fid) \
 		htonl( \
 		htonl( \
 			FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V((cid)) | \
 			FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V((cid)) | \
 			FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V((qid)) | \
 			FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V((qid)) | \
-			FW_CRYPTO_LOOKASIDE_WR_LCB_V(0) | \
+			FW_CRYPTO_LOOKASIDE_WR_LCB_V((lcb)) | \
 			FW_CRYPTO_LOOKASIDE_WR_IV_V((wr_iv)) | \
 			FW_CRYPTO_LOOKASIDE_WR_IV_V((wr_iv)) | \
 			FW_CRYPTO_LOOKASIDE_WR_FQIDX_V(fid))
 			FW_CRYPTO_LOOKASIDE_WR_FQIDX_V(fid))
 
 
@@ -219,9 +219,26 @@
 #define MAX_NK 8
 #define MAX_NK 8
 #define CRYPTO_MAX_IMM_TX_PKT_LEN 256
 #define CRYPTO_MAX_IMM_TX_PKT_LEN 256
 #define MAX_WR_SIZE			512
 #define MAX_WR_SIZE			512
+#define ROUND_16(bytes)		((bytes) & 0xFFFFFFF0)
+#define MAX_DSGL_ENT			32
+#define MAX_DIGEST_SKB_SGE	(MAX_SKB_FRAGS - 2)
+#define MIN_CIPHER_SG			1 /* IV */
 #define MIN_AUTH_SG			2 /*IV + AAD*/
 #define MIN_AUTH_SG			2 /*IV + AAD*/
 #define MIN_GCM_SG			2 /* IV + AAD*/
 #define MIN_GCM_SG			2 /* IV + AAD*/
+#define MIN_DIGEST_SG			1 /*Partial Buffer*/
 #define MIN_CCM_SG			3 /*IV+AAD+B0*/
 #define MIN_CCM_SG			3 /*IV+AAD+B0*/
+#define SPACE_LEFT(len) \
+	((MAX_WR_SIZE - WR_MIN_LEN - (len)))
+
+unsigned int sgl_ent_len[] = {0, 0, 16, 24, 40,
+				48, 64, 72, 88,
+				96, 112, 120, 136,
+				144, 160, 168, 184,
+				192};
+unsigned int dsgl_ent_len[] = {0, 32, 32, 48, 48, 64, 64, 80, 80,
+				112, 112, 128, 128, 144, 144, 160, 160,
+				192, 192, 208, 208, 224, 224, 240, 240,
+				272, 272, 288, 288, 304, 304, 320, 320};
 
 
 struct algo_param {
 struct algo_param {
 	unsigned int auth_mode;
 	unsigned int auth_mode;
@@ -239,6 +256,14 @@ struct hash_wr_param {
 	u64 scmd1;
 	u64 scmd1;
 };
 };
 
 
+struct cipher_wr_param {
+	struct ablkcipher_request *req;
+	struct scatterlist *srcsg;
+	char *iv;
+	int bytes;
+	short int snent;
+	unsigned short qid;
+};
 enum {
 enum {
 	AES_KEYLENGTH_128BIT = 128,
 	AES_KEYLENGTH_128BIT = 128,
 	AES_KEYLENGTH_192BIT = 192,
 	AES_KEYLENGTH_192BIT = 192,
@@ -293,7 +318,6 @@ struct phys_sge_parm {
 	unsigned int nents;
 	unsigned int nents;
 	unsigned int obsize;
 	unsigned int obsize;
 	unsigned short qid;
 	unsigned short qid;
-	unsigned char align;
 };
 };
 
 
 struct crypto_result {
 struct crypto_result {

+ 39 - 17
drivers/crypto/chelsio/chcr_core.c

@@ -29,6 +29,7 @@
 static LIST_HEAD(uld_ctx_list);
 static LIST_HEAD(uld_ctx_list);
 static DEFINE_MUTEX(dev_mutex);
 static DEFINE_MUTEX(dev_mutex);
 static atomic_t dev_count;
 static atomic_t dev_count;
+static struct uld_ctx *ctx_rr;
 
 
 typedef int (*chcr_handler_func)(struct chcr_dev *dev, unsigned char *input);
 typedef int (*chcr_handler_func)(struct chcr_dev *dev, unsigned char *input);
 static int cpl_fw6_pld_handler(struct chcr_dev *dev, unsigned char *input);
 static int cpl_fw6_pld_handler(struct chcr_dev *dev, unsigned char *input);
@@ -49,25 +50,28 @@ static struct cxgb4_uld_info chcr_uld_info = {
 	.rx_handler = chcr_uld_rx_handler,
 	.rx_handler = chcr_uld_rx_handler,
 };
 };
 
 
-int assign_chcr_device(struct chcr_dev **dev)
+struct uld_ctx *assign_chcr_device(void)
 {
 {
-	struct uld_ctx *u_ctx;
-	int ret = -ENXIO;
+	struct uld_ctx *u_ctx = NULL;
 
 
 	/*
 	/*
-	 * Which device to use if multiple devices are available TODO
-	 * May be select the device based on round robin. One session
-	 * must go to the same device to maintain the ordering.
+	 * When multiple devices are present in system select
+	 * device in round-robin fashion for crypto operations
+	 * Although One session must use the same device to
+	 * maintain request-response ordering.
 	 */
 	 */
-	mutex_lock(&dev_mutex); /* TODO ? */
-	list_for_each_entry(u_ctx, &uld_ctx_list, entry)
-		if (u_ctx->dev) {
-			*dev = u_ctx->dev;
-			ret = 0;
-			break;
+	mutex_lock(&dev_mutex);
+	if (!list_empty(&uld_ctx_list)) {
+		u_ctx = ctx_rr;
+		if (list_is_last(&ctx_rr->entry, &uld_ctx_list))
+			ctx_rr = list_first_entry(&uld_ctx_list,
+						  struct uld_ctx,
+						  entry);
+		else
+			ctx_rr = list_next_entry(ctx_rr, entry);
 	}
 	}
 	mutex_unlock(&dev_mutex);
 	mutex_unlock(&dev_mutex);
-	return ret;
+	return u_ctx;
 }
 }
 
 
 static int chcr_dev_add(struct uld_ctx *u_ctx)
 static int chcr_dev_add(struct uld_ctx *u_ctx)
@@ -82,11 +86,27 @@ static int chcr_dev_add(struct uld_ctx *u_ctx)
 	u_ctx->dev = dev;
 	u_ctx->dev = dev;
 	dev->u_ctx = u_ctx;
 	dev->u_ctx = u_ctx;
 	atomic_inc(&dev_count);
 	atomic_inc(&dev_count);
+	mutex_lock(&dev_mutex);
+	list_add_tail(&u_ctx->entry, &uld_ctx_list);
+	if (!ctx_rr)
+		ctx_rr = u_ctx;
+	mutex_unlock(&dev_mutex);
 	return 0;
 	return 0;
 }
 }
 
 
 static int chcr_dev_remove(struct uld_ctx *u_ctx)
 static int chcr_dev_remove(struct uld_ctx *u_ctx)
 {
 {
+	if (ctx_rr == u_ctx) {
+		if (list_is_last(&ctx_rr->entry, &uld_ctx_list))
+			ctx_rr = list_first_entry(&uld_ctx_list,
+						  struct uld_ctx,
+						  entry);
+		else
+			ctx_rr = list_next_entry(ctx_rr, entry);
+	}
+	list_del(&u_ctx->entry);
+	if (list_empty(&uld_ctx_list))
+		ctx_rr = NULL;
 	kfree(u_ctx->dev);
 	kfree(u_ctx->dev);
 	u_ctx->dev = NULL;
 	u_ctx->dev = NULL;
 	atomic_dec(&dev_count);
 	atomic_dec(&dev_count);
@@ -100,6 +120,7 @@ static int cpl_fw6_pld_handler(struct chcr_dev *dev,
 	struct cpl_fw6_pld *fw6_pld;
 	struct cpl_fw6_pld *fw6_pld;
 	u32 ack_err_status = 0;
 	u32 ack_err_status = 0;
 	int error_status = 0;
 	int error_status = 0;
+	struct adapter *adap = padap(dev);
 
 
 	fw6_pld = (struct cpl_fw6_pld *)input;
 	fw6_pld = (struct cpl_fw6_pld *)input;
 	req = (struct crypto_async_request *)(uintptr_t)be64_to_cpu(
 	req = (struct crypto_async_request *)(uintptr_t)be64_to_cpu(
@@ -111,11 +132,11 @@ static int cpl_fw6_pld_handler(struct chcr_dev *dev,
 		if (CHK_MAC_ERR_BIT(ack_err_status) ||
 		if (CHK_MAC_ERR_BIT(ack_err_status) ||
 		    CHK_PAD_ERR_BIT(ack_err_status))
 		    CHK_PAD_ERR_BIT(ack_err_status))
 			error_status = -EBADMSG;
 			error_status = -EBADMSG;
+		atomic_inc(&adap->chcr_stats.error);
 	}
 	}
 	/* call completion callback with failure status */
 	/* call completion callback with failure status */
 	if (req) {
 	if (req) {
 		error_status = chcr_handle_resp(req, input, error_status);
 		error_status = chcr_handle_resp(req, input, error_status);
-		req->complete(req, error_status);
 	} else {
 	} else {
 		pr_err("Incorrect request address from the firmware\n");
 		pr_err("Incorrect request address from the firmware\n");
 		return -EFAULT;
 		return -EFAULT;
@@ -138,10 +159,11 @@ static void *chcr_uld_add(const struct cxgb4_lld_info *lld)
 		u_ctx = ERR_PTR(-ENOMEM);
 		u_ctx = ERR_PTR(-ENOMEM);
 		goto out;
 		goto out;
 	}
 	}
+	if (!(lld->ulp_crypto & ULP_CRYPTO_LOOKASIDE)) {
+		u_ctx = ERR_PTR(-ENOMEM);
+		goto out;
+	}
 	u_ctx->lldi = *lld;
 	u_ctx->lldi = *lld;
-	mutex_lock(&dev_mutex);
-	list_add_tail(&u_ctx->entry, &uld_ctx_list);
-	mutex_unlock(&dev_mutex);
 out:
 out:
 	return u_ctx;
 	return u_ctx;
 }
 }

+ 4 - 1
drivers/crypto/chelsio/chcr_core.h

@@ -53,6 +53,9 @@
 #define MAC_ERROR_BIT		0
 #define MAC_ERROR_BIT		0
 #define CHK_MAC_ERR_BIT(x)	(((x) >> MAC_ERROR_BIT) & 1)
 #define CHK_MAC_ERR_BIT(x)	(((x) >> MAC_ERROR_BIT) & 1)
 #define MAX_SALT                4
 #define MAX_SALT                4
+#define WR_MIN_LEN (sizeof(struct chcr_wr) + \
+		    sizeof(struct cpl_rx_phys_dsgl) + \
+		    sizeof(struct ulptx_sgl))
 
 
 #define padap(dev) pci_get_drvdata(dev->u_ctx->lldi.pdev)
 #define padap(dev) pci_get_drvdata(dev->u_ctx->lldi.pdev)
 
 
@@ -86,7 +89,7 @@ struct uld_ctx {
 	struct chcr_dev *dev;
 	struct chcr_dev *dev;
 };
 };
 
 
-int assign_chcr_device(struct chcr_dev **dev);
+struct uld_ctx * assign_chcr_device(void);
 int chcr_send_wr(struct sk_buff *skb);
 int chcr_send_wr(struct sk_buff *skb);
 int start_crypto(void);
 int start_crypto(void);
 int stop_crypto(void);
 int stop_crypto(void);

+ 22 - 3
drivers/crypto/chelsio/chcr_crypto.h

@@ -139,6 +139,9 @@
 #define CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309    0x06000000
 #define CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309    0x06000000
 #define CRYPTO_ALG_SUB_TYPE_AEAD_NULL       0x07000000
 #define CRYPTO_ALG_SUB_TYPE_AEAD_NULL       0x07000000
 #define CRYPTO_ALG_SUB_TYPE_CTR             0x08000000
 #define CRYPTO_ALG_SUB_TYPE_CTR             0x08000000
+#define CRYPTO_ALG_SUB_TYPE_CTR_RFC3686     0x09000000
+#define CRYPTO_ALG_SUB_TYPE_XTS		    0x0a000000
+#define CRYPTO_ALG_SUB_TYPE_CBC		    0x0b000000
 #define CRYPTO_ALG_TYPE_HMAC (CRYPTO_ALG_TYPE_AHASH |\
 #define CRYPTO_ALG_TYPE_HMAC (CRYPTO_ALG_TYPE_AHASH |\
 			      CRYPTO_ALG_SUB_TYPE_HASH_HMAC)
 			      CRYPTO_ALG_SUB_TYPE_HASH_HMAC)
 
 
@@ -146,19 +149,23 @@
 
 
 #define CHCR_HASH_MAX_BLOCK_SIZE_64  64
 #define CHCR_HASH_MAX_BLOCK_SIZE_64  64
 #define CHCR_HASH_MAX_BLOCK_SIZE_128 128
 #define CHCR_HASH_MAX_BLOCK_SIZE_128 128
+#define CHCR_SG_SIZE 2048
 
 
 /* Aligned to 128 bit boundary */
 /* Aligned to 128 bit boundary */
 
 
 struct ablk_ctx {
 struct ablk_ctx {
+	struct crypto_skcipher *sw_cipher;
 	__be32 key_ctx_hdr;
 	__be32 key_ctx_hdr;
 	unsigned int enckey_len;
 	unsigned int enckey_len;
-	u8 key[CHCR_AES_MAX_KEY_LEN];
 	unsigned char ciph_mode;
 	unsigned char ciph_mode;
+	u8 key[CHCR_AES_MAX_KEY_LEN];
+	u8 nonce[4];
 	u8 rrkey[AES_MAX_KEY_SIZE];
 	u8 rrkey[AES_MAX_KEY_SIZE];
 };
 };
 struct chcr_aead_reqctx {
 struct chcr_aead_reqctx {
 	struct	sk_buff	*skb;
 	struct	sk_buff	*skb;
 	struct scatterlist *dst;
 	struct scatterlist *dst;
+	struct scatterlist *newdstsg;
 	struct scatterlist srcffwd[2];
 	struct scatterlist srcffwd[2];
 	struct scatterlist dstffwd[2];
 	struct scatterlist dstffwd[2];
 	short int dst_nents;
 	short int dst_nents;
@@ -233,7 +240,14 @@ struct chcr_ahash_req_ctx {
 
 
 struct chcr_blkcipher_req_ctx {
 struct chcr_blkcipher_req_ctx {
 	struct sk_buff *skb;
 	struct sk_buff *skb;
-	unsigned int dst_nents;
+	struct scatterlist srcffwd[2];
+	struct scatterlist dstffwd[2];
+	struct scatterlist *dstsg;
+	struct scatterlist *dst;
+	struct scatterlist *newdstsg;
+	unsigned int processed;
+	unsigned int op;
+	short int dst_nents;
 	u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
 	u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
 };
 };
 
 
@@ -275,5 +289,10 @@ static int chcr_aead_op(struct aead_request *req_base,
 			  int size,
 			  int size,
 			  create_wr_t create_wr_fn);
 			  create_wr_t create_wr_fn);
 static inline int get_aead_subtype(struct crypto_aead *aead);
 static inline int get_aead_subtype(struct crypto_aead *aead);
-
+static int is_newsg(struct scatterlist *sgl, unsigned int *newents);
+static struct scatterlist *alloc_new_sg(struct scatterlist *sgl,
+					unsigned int nents);
+static inline void free_new_sg(struct scatterlist *sgl);
+static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
+				   unsigned char *input, int err);
 #endif /* __CHCR_CRYPTO_H__ */
 #endif /* __CHCR_CRYPTO_H__ */

+ 10 - 2
drivers/crypto/img-hash.c

@@ -1088,9 +1088,17 @@ static int img_hash_suspend(struct device *dev)
 static int img_hash_resume(struct device *dev)
 static int img_hash_resume(struct device *dev)
 {
 {
 	struct img_hash_dev *hdev = dev_get_drvdata(dev);
 	struct img_hash_dev *hdev = dev_get_drvdata(dev);
+	int ret;
 
 
-	clk_prepare_enable(hdev->hash_clk);
-	clk_prepare_enable(hdev->sys_clk);
+	ret = clk_prepare_enable(hdev->hash_clk);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(hdev->sys_clk);
+	if (ret) {
+		clk_disable_unprepare(hdev->hash_clk);
+		return ret;
+	}
 
 
 	return 0;
 	return 0;
 }
 }

+ 2 - 0
drivers/crypto/inside-secure/Makefile

@@ -0,0 +1,2 @@
+obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += crypto_safexcel.o
+crypto_safexcel-objs := safexcel.o safexcel_ring.o safexcel_cipher.o safexcel_hash.o

+ 926 - 0
drivers/crypto/inside-secure/safexcel.c

@@ -0,0 +1,926 @@
+/*
+ * Copyright (C) 2017 Marvell
+ *
+ * Antoine Tenart <antoine.tenart@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/firmware.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+
+#include "safexcel.h"
+
+static u32 max_rings = EIP197_MAX_RINGS;
+module_param(max_rings, uint, 0644);
+MODULE_PARM_DESC(max_rings, "Maximum number of rings to use.");
+
+static void eip197_trc_cache_init(struct safexcel_crypto_priv *priv)
+{
+	u32 val, htable_offset;
+	int i;
+
+	/* Enable the record cache memory access */
+	val = readl(priv->base + EIP197_CS_RAM_CTRL);
+	val &= ~EIP197_TRC_ENABLE_MASK;
+	val |= EIP197_TRC_ENABLE_0;
+	writel(val, priv->base + EIP197_CS_RAM_CTRL);
+
+	/* Clear all ECC errors */
+	writel(0, priv->base + EIP197_TRC_ECCCTRL);
+
+	/*
+	 * Make sure the cache memory is accessible by taking record cache into
+	 * reset.
+	 */
+	val = readl(priv->base + EIP197_TRC_PARAMS);
+	val |= EIP197_TRC_PARAMS_SW_RESET;
+	val &= ~EIP197_TRC_PARAMS_DATA_ACCESS;
+	writel(val, priv->base + EIP197_TRC_PARAMS);
+
+	/* Clear all records */
+	for (i = 0; i < EIP197_CS_RC_MAX; i++) {
+		u32 val, offset = EIP197_CLASSIFICATION_RAMS + i * EIP197_CS_RC_SIZE;
+
+		writel(EIP197_CS_RC_NEXT(EIP197_RC_NULL) |
+		       EIP197_CS_RC_PREV(EIP197_RC_NULL),
+		       priv->base + offset);
+
+		val = EIP197_CS_RC_NEXT(i+1) | EIP197_CS_RC_PREV(i-1);
+		if (i == 0)
+			val |= EIP197_CS_RC_PREV(EIP197_RC_NULL);
+		else if (i == EIP197_CS_RC_MAX - 1)
+			val |= EIP197_CS_RC_NEXT(EIP197_RC_NULL);
+		writel(val, priv->base + offset + sizeof(u32));
+	}
+
+	/* Clear the hash table entries */
+	htable_offset = EIP197_CS_RC_MAX * EIP197_CS_RC_SIZE;
+	for (i = 0; i < 64; i++)
+		writel(GENMASK(29, 0),
+		       priv->base + EIP197_CLASSIFICATION_RAMS + htable_offset + i * sizeof(u32));
+
+	/* Disable the record cache memory access */
+	val = readl(priv->base + EIP197_CS_RAM_CTRL);
+	val &= ~EIP197_TRC_ENABLE_MASK;
+	writel(val, priv->base + EIP197_CS_RAM_CTRL);
+
+	/* Write head and tail pointers of the record free chain */
+	val = EIP197_TRC_FREECHAIN_HEAD_PTR(0) |
+	      EIP197_TRC_FREECHAIN_TAIL_PTR(EIP197_CS_RC_MAX - 1);
+	writel(val, priv->base + EIP197_TRC_FREECHAIN);
+
+	/* Configure the record cache #1 */
+	val = EIP197_TRC_PARAMS2_RC_SZ_SMALL(EIP197_CS_TRC_REC_WC) |
+	      EIP197_TRC_PARAMS2_HTABLE_PTR(EIP197_CS_RC_MAX);
+	writel(val, priv->base + EIP197_TRC_PARAMS2);
+
+	/* Configure the record cache #2 */
+	val = EIP197_TRC_PARAMS_RC_SZ_LARGE(EIP197_CS_TRC_LG_REC_WC) |
+	      EIP197_TRC_PARAMS_BLK_TIMER_SPEED(1) |
+	      EIP197_TRC_PARAMS_HTABLE_SZ(2);
+	writel(val, priv->base + EIP197_TRC_PARAMS);
+}
+
+static void eip197_write_firmware(struct safexcel_crypto_priv *priv,
+				  const struct firmware *fw, u32 ctrl,
+				  u32 prog_en)
+{
+	const u32 *data = (const u32 *)fw->data;
+	u32 val;
+	int i;
+
+	/* Reset the engine to make its program memory accessible */
+	writel(EIP197_PE_ICE_x_CTRL_SW_RESET |
+	       EIP197_PE_ICE_x_CTRL_CLR_ECC_CORR |
+	       EIP197_PE_ICE_x_CTRL_CLR_ECC_NON_CORR,
+	       priv->base + ctrl);
+
+	/* Enable access to the program memory */
+	writel(prog_en, priv->base + EIP197_PE_ICE_RAM_CTRL);
+
+	/* Write the firmware */
+	for (i = 0; i < fw->size / sizeof(u32); i++)
+		writel(be32_to_cpu(data[i]),
+		       priv->base + EIP197_CLASSIFICATION_RAMS + i * sizeof(u32));
+
+	/* Disable access to the program memory */
+	writel(0, priv->base + EIP197_PE_ICE_RAM_CTRL);
+
+	/* Release engine from reset */
+	val = readl(priv->base + ctrl);
+	val &= ~EIP197_PE_ICE_x_CTRL_SW_RESET;
+	writel(val, priv->base + ctrl);
+}
+
+static int eip197_load_firmwares(struct safexcel_crypto_priv *priv)
+{
+	const char *fw_name[] = {"ifpp.bin", "ipue.bin"};
+	const struct firmware *fw[FW_NB];
+	int i, j, ret = 0;
+	u32 val;
+
+	for (i = 0; i < FW_NB; i++) {
+		ret = request_firmware(&fw[i], fw_name[i], priv->dev);
+		if (ret) {
+			dev_err(priv->dev,
+				"Failed to request firmware %s (%d)\n",
+				fw_name[i], ret);
+			goto release_fw;
+		}
+	 }
+
+	/* Clear the scratchpad memory */
+	val = readl(priv->base + EIP197_PE_ICE_SCRATCH_CTRL);
+	val |= EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_TIMER |
+	       EIP197_PE_ICE_SCRATCH_CTRL_TIMER_EN |
+	       EIP197_PE_ICE_SCRATCH_CTRL_SCRATCH_ACCESS |
+	       EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_ACCESS;
+	writel(val, priv->base + EIP197_PE_ICE_SCRATCH_CTRL);
+
+	memset(priv->base + EIP197_PE_ICE_SCRATCH_RAM, 0,
+	       EIP197_NUM_OF_SCRATCH_BLOCKS * sizeof(u32));
+
+	eip197_write_firmware(priv, fw[FW_IFPP], EIP197_PE_ICE_FPP_CTRL,
+			      EIP197_PE_ICE_RAM_CTRL_FPP_PROG_EN);
+
+	eip197_write_firmware(priv, fw[FW_IPUE], EIP197_PE_ICE_PUE_CTRL,
+			      EIP197_PE_ICE_RAM_CTRL_PUE_PROG_EN);
+
+release_fw:
+	for (j = 0; j < i; j++)
+		release_firmware(fw[j]);
+
+	return ret;
+}
+
+static int safexcel_hw_setup_cdesc_rings(struct safexcel_crypto_priv *priv)
+{
+	u32 hdw, cd_size_rnd, val;
+	int i;
+
+	hdw = readl(priv->base + EIP197_HIA_OPTIONS);
+	hdw &= GENMASK(27, 25);
+	hdw >>= 25;
+
+	cd_size_rnd = (priv->config.cd_size + (BIT(hdw) - 1)) >> hdw;
+
+	for (i = 0; i < priv->config.rings; i++) {
+		/* ring base address */
+		writel(lower_32_bits(priv->ring[i].cdr.base_dma),
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_RING_BASE_ADDR_LO);
+		writel(upper_32_bits(priv->ring[i].cdr.base_dma),
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_RING_BASE_ADDR_HI);
+
+		writel(EIP197_xDR_DESC_MODE_64BIT | (priv->config.cd_offset << 16) |
+		       priv->config.cd_size,
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_DESC_SIZE);
+		writel(((EIP197_FETCH_COUNT * (cd_size_rnd << hdw)) << 16) |
+		       (EIP197_FETCH_COUNT * priv->config.cd_offset),
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_CFG);
+
+		/* Configure DMA tx control */
+		val = EIP197_HIA_xDR_CFG_WR_CACHE(WR_CACHE_3BITS);
+		val |= EIP197_HIA_xDR_CFG_RD_CACHE(RD_CACHE_3BITS);
+		writel(val,
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_DMA_CFG);
+
+		/* clear any pending interrupt */
+		writel(GENMASK(5, 0),
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_STAT);
+	}
+
+	return 0;
+}
+
+static int safexcel_hw_setup_rdesc_rings(struct safexcel_crypto_priv *priv)
+{
+	u32 hdw, rd_size_rnd, val;
+	int i;
+
+	hdw = readl(priv->base + EIP197_HIA_OPTIONS);
+	hdw &= GENMASK(27, 25);
+	hdw >>= 25;
+
+	rd_size_rnd = (priv->config.rd_size + (BIT(hdw) - 1)) >> hdw;
+
+	for (i = 0; i < priv->config.rings; i++) {
+		/* ring base address */
+		writel(lower_32_bits(priv->ring[i].rdr.base_dma),
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_RING_BASE_ADDR_LO);
+		writel(upper_32_bits(priv->ring[i].rdr.base_dma),
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_RING_BASE_ADDR_HI);
+
+		writel(EIP197_xDR_DESC_MODE_64BIT | (priv->config.rd_offset << 16) |
+		       priv->config.rd_size,
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_DESC_SIZE);
+
+		writel(((EIP197_FETCH_COUNT * (rd_size_rnd << hdw)) << 16) |
+		       (EIP197_FETCH_COUNT * priv->config.rd_offset),
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_CFG);
+
+		/* Configure DMA tx control */
+		val = EIP197_HIA_xDR_CFG_WR_CACHE(WR_CACHE_3BITS);
+		val |= EIP197_HIA_xDR_CFG_RD_CACHE(RD_CACHE_3BITS);
+		val |= EIP197_HIA_xDR_WR_RES_BUF | EIP197_HIA_xDR_WR_CTRL_BUG;
+		writel(val,
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_DMA_CFG);
+
+		/* clear any pending interrupt */
+		writel(GENMASK(7, 0),
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_STAT);
+
+		/* enable ring interrupt */
+		val = readl(priv->base + EIP197_HIA_AIC_R_ENABLE_CTRL(i));
+		val |= EIP197_RDR_IRQ(i);
+		writel(val, priv->base + EIP197_HIA_AIC_R_ENABLE_CTRL(i));
+	}
+
+	return 0;
+}
+
+static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
+{
+	u32 version, val;
+	int i, ret;
+
+	/* Determine endianess and configure byte swap */
+	version = readl(priv->base + EIP197_HIA_VERSION);
+	val = readl(priv->base + EIP197_HIA_MST_CTRL);
+
+	if ((version & 0xffff) == EIP197_HIA_VERSION_BE)
+		val |= EIP197_MST_CTRL_BYTE_SWAP;
+	else if (((version >> 16) & 0xffff) == EIP197_HIA_VERSION_LE)
+		val |= (EIP197_MST_CTRL_NO_BYTE_SWAP >> 24);
+
+	writel(val, priv->base + EIP197_HIA_MST_CTRL);
+
+
+	/* Configure wr/rd cache values */
+	writel(EIP197_MST_CTRL_RD_CACHE(RD_CACHE_4BITS) |
+	       EIP197_MST_CTRL_WD_CACHE(WR_CACHE_4BITS),
+	       priv->base + EIP197_MST_CTRL);
+
+	/* Interrupts reset */
+
+	/* Disable all global interrupts */
+	writel(0, priv->base + EIP197_HIA_AIC_G_ENABLE_CTRL);
+
+	/* Clear any pending interrupt */
+	writel(GENMASK(31, 0), priv->base + EIP197_HIA_AIC_G_ACK);
+
+	/* Data Fetch Engine configuration */
+
+	/* Reset all DFE threads */
+	writel(EIP197_DxE_THR_CTRL_RESET_PE,
+	       priv->base + EIP197_HIA_DFE_THR_CTRL);
+
+	/* Reset HIA input interface arbiter */
+	writel(EIP197_HIA_RA_PE_CTRL_RESET,
+	       priv->base + EIP197_HIA_RA_PE_CTRL);
+
+	/* DMA transfer size to use */
+	val = EIP197_HIA_DFE_CFG_DIS_DEBUG;
+	val |= EIP197_HIA_DxE_CFG_MIN_DATA_SIZE(5) | EIP197_HIA_DxE_CFG_MAX_DATA_SIZE(9);
+	val |= EIP197_HIA_DxE_CFG_MIN_CTRL_SIZE(5) | EIP197_HIA_DxE_CFG_MAX_CTRL_SIZE(7);
+	val |= EIP197_HIA_DxE_CFG_DATA_CACHE_CTRL(RD_CACHE_3BITS);
+	val |= EIP197_HIA_DxE_CFG_CTRL_CACHE_CTRL(RD_CACHE_3BITS);
+	writel(val, priv->base + EIP197_HIA_DFE_CFG);
+
+	/* Leave the DFE threads reset state */
+	writel(0, priv->base + EIP197_HIA_DFE_THR_CTRL);
+
+	/* Configure the procesing engine thresholds */
+	writel(EIP197_PE_IN_xBUF_THRES_MIN(5) | EIP197_PE_IN_xBUF_THRES_MAX(9),
+	      priv->base + EIP197_PE_IN_DBUF_THRES);
+	writel(EIP197_PE_IN_xBUF_THRES_MIN(5) | EIP197_PE_IN_xBUF_THRES_MAX(7),
+	      priv->base + EIP197_PE_IN_TBUF_THRES);
+
+	/* enable HIA input interface arbiter and rings */
+	writel(EIP197_HIA_RA_PE_CTRL_EN | GENMASK(priv->config.rings - 1, 0),
+	       priv->base + EIP197_HIA_RA_PE_CTRL);
+
+	/* Data Store Engine configuration */
+
+	/* Reset all DSE threads */
+	writel(EIP197_DxE_THR_CTRL_RESET_PE,
+	       priv->base + EIP197_HIA_DSE_THR_CTRL);
+
+	/* Wait for all DSE threads to complete */
+	while ((readl(priv->base + EIP197_HIA_DSE_THR_STAT) &
+		GENMASK(15, 12)) != GENMASK(15, 12))
+		;
+
+	/* DMA transfer size to use */
+	val = EIP197_HIA_DSE_CFG_DIS_DEBUG;
+	val |= EIP197_HIA_DxE_CFG_MIN_DATA_SIZE(7) | EIP197_HIA_DxE_CFG_MAX_DATA_SIZE(8);
+	val |= EIP197_HIA_DxE_CFG_DATA_CACHE_CTRL(WR_CACHE_3BITS);
+	val |= EIP197_HIA_DSE_CFG_ALLWAYS_BUFFERABLE;
+	val |= EIP197_HIA_DSE_CFG_EN_SINGLE_WR;
+	writel(val, priv->base + EIP197_HIA_DSE_CFG);
+
+	/* Leave the DSE threads reset state */
+	writel(0, priv->base + EIP197_HIA_DSE_THR_CTRL);
+
+	/* Configure the procesing engine thresholds */
+	writel(EIP197_PE_OUT_DBUF_THRES_MIN(7) | EIP197_PE_OUT_DBUF_THRES_MAX(8),
+	       priv->base + EIP197_PE_OUT_DBUF_THRES);
+
+	/* Processing Engine configuration */
+
+	/* H/W capabilities selection */
+	val = EIP197_FUNCTION_RSVD;
+	val |= EIP197_PROTOCOL_ENCRYPT_ONLY | EIP197_PROTOCOL_HASH_ONLY;
+	val |= EIP197_ALG_AES_ECB | EIP197_ALG_AES_CBC;
+	val |= EIP197_ALG_SHA1 | EIP197_ALG_HMAC_SHA1;
+	val |= EIP197_ALG_SHA2;
+	writel(val, priv->base + EIP197_PE_EIP96_FUNCTION_EN);
+
+	/* Command Descriptor Rings prepare */
+	for (i = 0; i < priv->config.rings; i++) {
+		/* Clear interrupts for this ring */
+		writel(GENMASK(31, 0),
+		       priv->base + EIP197_HIA_AIC_R_ENABLE_CLR(i));
+
+		/* Disable external triggering */
+		writel(0, priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_CFG);
+
+		/* Clear the pending prepared counter */
+		writel(EIP197_xDR_PREP_CLR_COUNT,
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_PREP_COUNT);
+
+		/* Clear the pending processed counter */
+		writel(EIP197_xDR_PROC_CLR_COUNT,
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_PROC_COUNT);
+
+		writel(0,
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_PREP_PNTR);
+		writel(0,
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_PROC_PNTR);
+
+		writel((EIP197_DEFAULT_RING_SIZE * priv->config.cd_offset) << 2,
+		       priv->base + EIP197_HIA_CDR(i) + EIP197_HIA_xDR_RING_SIZE);
+	}
+
+	/* Result Descriptor Ring prepare */
+	for (i = 0; i < priv->config.rings; i++) {
+		/* Disable external triggering*/
+		writel(0, priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_CFG);
+
+		/* Clear the pending prepared counter */
+		writel(EIP197_xDR_PREP_CLR_COUNT,
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_PREP_COUNT);
+
+		/* Clear the pending processed counter */
+		writel(EIP197_xDR_PROC_CLR_COUNT,
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_PROC_COUNT);
+
+		writel(0,
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_PREP_PNTR);
+		writel(0,
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_PROC_PNTR);
+
+		/* Ring size */
+		writel((EIP197_DEFAULT_RING_SIZE * priv->config.rd_offset) << 2,
+		       priv->base + EIP197_HIA_RDR(i) + EIP197_HIA_xDR_RING_SIZE);
+	}
+
+	/* Enable command descriptor rings */
+	writel(EIP197_DxE_THR_CTRL_EN | GENMASK(priv->config.rings - 1, 0),
+	       priv->base + EIP197_HIA_DFE_THR_CTRL);
+
+	/* Enable result descriptor rings */
+	writel(EIP197_DxE_THR_CTRL_EN | GENMASK(priv->config.rings - 1, 0),
+	       priv->base + EIP197_HIA_DSE_THR_CTRL);
+
+	/* Clear any HIA interrupt */
+	writel(GENMASK(30, 20), priv->base + EIP197_HIA_AIC_G_ACK);
+
+	eip197_trc_cache_init(priv);
+
+	ret = eip197_load_firmwares(priv);
+	if (ret)
+		return ret;
+
+	safexcel_hw_setup_cdesc_rings(priv);
+	safexcel_hw_setup_rdesc_rings(priv);
+
+	return 0;
+}
+
+void safexcel_dequeue(struct safexcel_crypto_priv *priv, int ring)
+{
+	struct crypto_async_request *req, *backlog;
+	struct safexcel_context *ctx;
+	struct safexcel_request *request;
+	int ret, nreq = 0, cdesc = 0, rdesc = 0, commands, results;
+
+	priv->ring[ring].need_dequeue = false;
+
+	do {
+		spin_lock_bh(&priv->ring[ring].queue_lock);
+		backlog = crypto_get_backlog(&priv->ring[ring].queue);
+		req = crypto_dequeue_request(&priv->ring[ring].queue);
+		spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+		if (!req)
+			goto finalize;
+
+		request = kzalloc(sizeof(*request), EIP197_GFP_FLAGS(*req));
+		if (!request) {
+			spin_lock_bh(&priv->ring[ring].queue_lock);
+			crypto_enqueue_request(&priv->ring[ring].queue, req);
+			spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+			priv->ring[ring].need_dequeue = true;
+			goto finalize;
+		}
+
+		ctx = crypto_tfm_ctx(req->tfm);
+		ret = ctx->send(req, ring, request, &commands, &results);
+		if (ret) {
+			kfree(request);
+			req->complete(req, ret);
+			priv->ring[ring].need_dequeue = true;
+			goto finalize;
+		}
+
+		if (backlog)
+			backlog->complete(backlog, -EINPROGRESS);
+
+		spin_lock_bh(&priv->ring[ring].egress_lock);
+		list_add_tail(&request->list, &priv->ring[ring].list);
+		spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+		cdesc += commands;
+		rdesc += results;
+	} while (nreq++ < EIP197_MAX_BATCH_SZ);
+
+finalize:
+	if (nreq == EIP197_MAX_BATCH_SZ)
+		priv->ring[ring].need_dequeue = true;
+	else if (!nreq)
+		return;
+
+	spin_lock_bh(&priv->ring[ring].lock);
+
+	/* Configure when we want an interrupt */
+	writel(EIP197_HIA_RDR_THRESH_PKT_MODE |
+	       EIP197_HIA_RDR_THRESH_PROC_PKT(nreq),
+	       priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_THRESH);
+
+	/* let the RDR know we have pending descriptors */
+	writel((rdesc * priv->config.rd_offset) << 2,
+	       priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_PREP_COUNT);
+
+	/* let the CDR know we have pending descriptors */
+	writel((cdesc * priv->config.cd_offset) << 2,
+	       priv->base + EIP197_HIA_CDR(ring) + EIP197_HIA_xDR_PREP_COUNT);
+
+	spin_unlock_bh(&priv->ring[ring].lock);
+}
+
+void safexcel_free_context(struct safexcel_crypto_priv *priv,
+			   struct crypto_async_request *req,
+			   int result_sz)
+{
+	struct safexcel_context *ctx = crypto_tfm_ctx(req->tfm);
+
+	if (ctx->result_dma)
+		dma_unmap_single(priv->dev, ctx->result_dma, result_sz,
+				 DMA_FROM_DEVICE);
+
+	if (ctx->cache) {
+		dma_unmap_single(priv->dev, ctx->cache_dma, ctx->cache_sz,
+				 DMA_TO_DEVICE);
+		kfree(ctx->cache);
+		ctx->cache = NULL;
+		ctx->cache_sz = 0;
+	}
+}
+
+void safexcel_complete(struct safexcel_crypto_priv *priv, int ring)
+{
+	struct safexcel_command_desc *cdesc;
+
+	/* Acknowledge the command descriptors */
+	do {
+		cdesc = safexcel_ring_next_rptr(priv, &priv->ring[ring].cdr);
+		if (IS_ERR(cdesc)) {
+			dev_err(priv->dev,
+				"Could not retrieve the command descriptor\n");
+			return;
+		}
+	} while (!cdesc->last_seg);
+}
+
+void safexcel_inv_complete(struct crypto_async_request *req, int error)
+{
+	struct safexcel_inv_result *result = req->data;
+
+	if (error == -EINPROGRESS)
+		return;
+
+	result->error = error;
+	complete(&result->completion);
+}
+
+int safexcel_invalidate_cache(struct crypto_async_request *async,
+			      struct safexcel_context *ctx,
+			      struct safexcel_crypto_priv *priv,
+			      dma_addr_t ctxr_dma, int ring,
+			      struct safexcel_request *request)
+{
+	struct safexcel_command_desc *cdesc;
+	struct safexcel_result_desc *rdesc;
+	int ret = 0;
+
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+
+	/* Prepare command descriptor */
+	cdesc = safexcel_add_cdesc(priv, ring, true, true, 0, 0, 0, ctxr_dma);
+	if (IS_ERR(cdesc)) {
+		ret = PTR_ERR(cdesc);
+		goto unlock;
+	}
+
+	cdesc->control_data.type = EIP197_TYPE_EXTENDED;
+	cdesc->control_data.options = 0;
+	cdesc->control_data.refresh = 0;
+	cdesc->control_data.control0 = CONTEXT_CONTROL_INV_TR;
+
+	/* Prepare result descriptor */
+	rdesc = safexcel_add_rdesc(priv, ring, true, true, 0, 0);
+
+	if (IS_ERR(rdesc)) {
+		ret = PTR_ERR(rdesc);
+		goto cdesc_rollback;
+	}
+
+	request->req = async;
+	goto unlock;
+
+cdesc_rollback:
+	safexcel_ring_rollback_wptr(priv, &priv->ring[ring].cdr);
+
+unlock:
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+	return ret;
+}
+
+static inline void safexcel_handle_result_descriptor(struct safexcel_crypto_priv *priv,
+						     int ring)
+{
+	struct safexcel_request *sreq;
+	struct safexcel_context *ctx;
+	int ret, i, nreq, ndesc = 0;
+	bool should_complete;
+
+	nreq = readl(priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_PROC_COUNT);
+	nreq >>= 24;
+	nreq &= GENMASK(6, 0);
+	if (!nreq)
+		return;
+
+	for (i = 0; i < nreq; i++) {
+		spin_lock_bh(&priv->ring[ring].egress_lock);
+		sreq = list_first_entry(&priv->ring[ring].list,
+					struct safexcel_request, list);
+		list_del(&sreq->list);
+		spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+		ctx = crypto_tfm_ctx(sreq->req->tfm);
+		ndesc = ctx->handle_result(priv, ring, sreq->req,
+					   &should_complete, &ret);
+		if (ndesc < 0) {
+			dev_err(priv->dev, "failed to handle result (%d)", ndesc);
+			return;
+		}
+
+		writel(EIP197_xDR_PROC_xD_PKT(1) |
+		       EIP197_xDR_PROC_xD_COUNT(ndesc * priv->config.rd_offset),
+		       priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_PROC_COUNT);
+
+		if (should_complete) {
+			local_bh_disable();
+			sreq->req->complete(sreq->req, ret);
+			local_bh_enable();
+		}
+
+		kfree(sreq);
+	}
+}
+
+static void safexcel_handle_result_work(struct work_struct *work)
+{
+	struct safexcel_work_data *data =
+			container_of(work, struct safexcel_work_data, work);
+	struct safexcel_crypto_priv *priv = data->priv;
+
+	safexcel_handle_result_descriptor(priv, data->ring);
+
+	if (priv->ring[data->ring].need_dequeue)
+		safexcel_dequeue(data->priv, data->ring);
+}
+
+struct safexcel_ring_irq_data {
+	struct safexcel_crypto_priv *priv;
+	int ring;
+};
+
+static irqreturn_t safexcel_irq_ring(int irq, void *data)
+{
+	struct safexcel_ring_irq_data *irq_data = data;
+	struct safexcel_crypto_priv *priv = irq_data->priv;
+	int ring = irq_data->ring;
+	u32 status, stat;
+
+	status = readl(priv->base + EIP197_HIA_AIC_R_ENABLED_STAT(ring));
+	if (!status)
+		return IRQ_NONE;
+
+	/* RDR interrupts */
+	if (status & EIP197_RDR_IRQ(ring)) {
+		stat = readl(priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_STAT);
+
+		if (unlikely(stat & EIP197_xDR_ERR)) {
+			/*
+			 * Fatal error, the RDR is unusable and must be
+			 * reinitialized. This should not happen under
+			 * normal circumstances.
+			 */
+			dev_err(priv->dev, "RDR: fatal error.");
+		} else if (likely(stat & EIP197_xDR_THRESH)) {
+			queue_work(priv->ring[ring].workqueue, &priv->ring[ring].work_data.work);
+		}
+
+		/* ACK the interrupts */
+		writel(stat & 0xff,
+		       priv->base + EIP197_HIA_RDR(ring) + EIP197_HIA_xDR_STAT);
+	}
+
+	/* ACK the interrupts */
+	writel(status, priv->base + EIP197_HIA_AIC_R_ACK(ring));
+
+	return IRQ_HANDLED;
+}
+
+static int safexcel_request_ring_irq(struct platform_device *pdev, const char *name,
+				     irq_handler_t handler,
+				     struct safexcel_ring_irq_data *ring_irq_priv)
+{
+	int ret, irq = platform_get_irq_byname(pdev, name);
+
+	if (irq < 0) {
+		dev_err(&pdev->dev, "unable to get IRQ '%s'\n", name);
+		return irq;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, handler, 0,
+			       dev_name(&pdev->dev), ring_irq_priv);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to request IRQ %d\n", irq);
+		return ret;
+	}
+
+	return irq;
+}
+
+static struct safexcel_alg_template *safexcel_algs[] = {
+	&safexcel_alg_ecb_aes,
+	&safexcel_alg_cbc_aes,
+	&safexcel_alg_sha1,
+	&safexcel_alg_sha224,
+	&safexcel_alg_sha256,
+	&safexcel_alg_hmac_sha1,
+};
+
+static int safexcel_register_algorithms(struct safexcel_crypto_priv *priv)
+{
+	int i, j, ret = 0;
+
+	for (i = 0; i < ARRAY_SIZE(safexcel_algs); i++) {
+		safexcel_algs[i]->priv = priv;
+
+		if (safexcel_algs[i]->type == SAFEXCEL_ALG_TYPE_SKCIPHER)
+			ret = crypto_register_skcipher(&safexcel_algs[i]->alg.skcipher);
+		else
+			ret = crypto_register_ahash(&safexcel_algs[i]->alg.ahash);
+
+		if (ret)
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	for (j = 0; j < i; j++) {
+		if (safexcel_algs[j]->type == SAFEXCEL_ALG_TYPE_SKCIPHER)
+			crypto_unregister_skcipher(&safexcel_algs[j]->alg.skcipher);
+		else
+			crypto_unregister_ahash(&safexcel_algs[j]->alg.ahash);
+	}
+
+	return ret;
+}
+
+static void safexcel_unregister_algorithms(struct safexcel_crypto_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(safexcel_algs); i++) {
+		if (safexcel_algs[i]->type == SAFEXCEL_ALG_TYPE_SKCIPHER)
+			crypto_unregister_skcipher(&safexcel_algs[i]->alg.skcipher);
+		else
+			crypto_unregister_ahash(&safexcel_algs[i]->alg.ahash);
+	}
+}
+
+static void safexcel_configure(struct safexcel_crypto_priv *priv)
+{
+	u32 val, mask;
+
+	val = readl(priv->base + EIP197_HIA_OPTIONS);
+	val = (val & GENMASK(27, 25)) >> 25;
+	mask = BIT(val) - 1;
+
+	val = readl(priv->base + EIP197_HIA_OPTIONS);
+	priv->config.rings = min_t(u32, val & GENMASK(3, 0), max_rings);
+
+	priv->config.cd_size = (sizeof(struct safexcel_command_desc) / sizeof(u32));
+	priv->config.cd_offset = (priv->config.cd_size + mask) & ~mask;
+
+	priv->config.rd_size = (sizeof(struct safexcel_result_desc) / sizeof(u32));
+	priv->config.rd_offset = (priv->config.rd_size + mask) & ~mask;
+}
+
+static int safexcel_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	struct safexcel_crypto_priv *priv;
+	u64 dma_mask;
+	int i, ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(priv->base)) {
+		dev_err(dev, "failed to get resource\n");
+		return PTR_ERR(priv->base);
+	}
+
+	priv->clk = of_clk_get(dev->of_node, 0);
+	if (!IS_ERR(priv->clk)) {
+		ret = clk_prepare_enable(priv->clk);
+		if (ret) {
+			dev_err(dev, "unable to enable clk (%d)\n", ret);
+			return ret;
+		}
+	} else {
+		/* The clock isn't mandatory */
+		if (PTR_ERR(priv->clk) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+	}
+
+	if (of_property_read_u64(dev->of_node, "dma-mask", &dma_mask))
+		dma_mask = DMA_BIT_MASK(64);
+	ret = dma_set_mask_and_coherent(dev, dma_mask);
+	if (ret)
+		goto err_clk;
+
+	priv->context_pool = dmam_pool_create("safexcel-context", dev,
+					      sizeof(struct safexcel_context_record),
+					      1, 0);
+	if (!priv->context_pool) {
+		ret = -ENOMEM;
+		goto err_clk;
+	}
+
+	safexcel_configure(priv);
+
+	for (i = 0; i < priv->config.rings; i++) {
+		char irq_name[6] = {0}; /* "ringX\0" */
+		char wq_name[9] = {0}; /* "wq_ringX\0" */
+		int irq;
+		struct safexcel_ring_irq_data *ring_irq;
+
+		ret = safexcel_init_ring_descriptors(priv,
+						     &priv->ring[i].cdr,
+						     &priv->ring[i].rdr);
+		if (ret)
+			goto err_clk;
+
+		ring_irq = devm_kzalloc(dev, sizeof(*ring_irq), GFP_KERNEL);
+		if (!ring_irq) {
+			ret = -ENOMEM;
+			goto err_clk;
+		}
+
+		ring_irq->priv = priv;
+		ring_irq->ring = i;
+
+		snprintf(irq_name, 6, "ring%d", i);
+		irq = safexcel_request_ring_irq(pdev, irq_name, safexcel_irq_ring,
+						ring_irq);
+
+		if (irq < 0)
+			goto err_clk;
+
+		priv->ring[i].work_data.priv = priv;
+		priv->ring[i].work_data.ring = i;
+		INIT_WORK(&priv->ring[i].work_data.work, safexcel_handle_result_work);
+
+		snprintf(wq_name, 9, "wq_ring%d", i);
+		priv->ring[i].workqueue = create_singlethread_workqueue(wq_name);
+		if (!priv->ring[i].workqueue) {
+			ret = -ENOMEM;
+			goto err_clk;
+		}
+
+		crypto_init_queue(&priv->ring[i].queue,
+				  EIP197_DEFAULT_RING_SIZE);
+
+		INIT_LIST_HEAD(&priv->ring[i].list);
+		spin_lock_init(&priv->ring[i].lock);
+		spin_lock_init(&priv->ring[i].egress_lock);
+		spin_lock_init(&priv->ring[i].queue_lock);
+	}
+
+	platform_set_drvdata(pdev, priv);
+	atomic_set(&priv->ring_used, 0);
+
+	ret = safexcel_hw_init(priv);
+	if (ret) {
+		dev_err(dev, "EIP h/w init failed (%d)\n", ret);
+		goto err_clk;
+	}
+
+	ret = safexcel_register_algorithms(priv);
+	if (ret) {
+		dev_err(dev, "Failed to register algorithms (%d)\n", ret);
+		goto err_clk;
+	}
+
+	return 0;
+
+err_clk:
+	clk_disable_unprepare(priv->clk);
+	return ret;
+}
+
+
+static int safexcel_remove(struct platform_device *pdev)
+{
+	struct safexcel_crypto_priv *priv = platform_get_drvdata(pdev);
+	int i;
+
+	safexcel_unregister_algorithms(priv);
+	clk_disable_unprepare(priv->clk);
+
+	for (i = 0; i < priv->config.rings; i++)
+		destroy_workqueue(priv->ring[i].workqueue);
+
+	return 0;
+}
+
+static const struct of_device_id safexcel_of_match_table[] = {
+	{ .compatible = "inside-secure,safexcel-eip197" },
+	{},
+};
+
+
+static struct platform_driver  crypto_safexcel = {
+	.probe		= safexcel_probe,
+	.remove		= safexcel_remove,
+	.driver		= {
+		.name	= "crypto-safexcel",
+		.of_match_table = safexcel_of_match_table,
+	},
+};
+module_platform_driver(crypto_safexcel);
+
+MODULE_AUTHOR("Antoine Tenart <antoine.tenart@free-electrons.com>");
+MODULE_AUTHOR("Ofer Heifetz <oferh@marvell.com>");
+MODULE_AUTHOR("Igal Liberman <igall@marvell.com>");
+MODULE_DESCRIPTION("Support for SafeXcel cryptographic engine EIP197");
+MODULE_LICENSE("GPL v2");

+ 574 - 0
drivers/crypto/inside-secure/safexcel.h

@@ -0,0 +1,574 @@
+/*
+ * Copyright (C) 2017 Marvell
+ *
+ * Antoine Tenart <antoine.tenart@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __SAFEXCEL_H__
+#define __SAFEXCEL_H__
+
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/skcipher.h>
+
+#define EIP197_HIA_VERSION_LE			0xca35
+#define EIP197_HIA_VERSION_BE			0x35ca
+
+/* Static configuration */
+#define EIP197_DEFAULT_RING_SIZE		64
+#define EIP197_MAX_TOKENS			5
+#define EIP197_MAX_RINGS			4
+#define EIP197_FETCH_COUNT			1
+#define EIP197_MAX_BATCH_SZ			EIP197_DEFAULT_RING_SIZE
+
+#define EIP197_GFP_FLAGS(base)	((base).flags & CRYPTO_TFM_REQ_MAY_SLEEP ? \
+				 GFP_KERNEL : GFP_ATOMIC)
+
+/* CDR/RDR register offsets */
+#define EIP197_HIA_xDR_OFF(r)			(0x80000 + (r) * 0x1000)
+#define EIP197_HIA_CDR(r)			(EIP197_HIA_xDR_OFF(r))
+#define EIP197_HIA_RDR(r)			(EIP197_HIA_xDR_OFF(r) + 0x800)
+#define EIP197_HIA_xDR_RING_BASE_ADDR_LO	0x0
+#define EIP197_HIA_xDR_RING_BASE_ADDR_HI	0x4
+#define EIP197_HIA_xDR_RING_SIZE		0x18
+#define EIP197_HIA_xDR_DESC_SIZE		0x1c
+#define EIP197_HIA_xDR_CFG			0x20
+#define EIP197_HIA_xDR_DMA_CFG			0x24
+#define EIP197_HIA_xDR_THRESH			0x28
+#define EIP197_HIA_xDR_PREP_COUNT		0x2c
+#define EIP197_HIA_xDR_PROC_COUNT		0x30
+#define EIP197_HIA_xDR_PREP_PNTR		0x34
+#define EIP197_HIA_xDR_PROC_PNTR		0x38
+#define EIP197_HIA_xDR_STAT			0x3c
+
+/* register offsets */
+#define EIP197_HIA_DFE_CFG			0x8c000
+#define EIP197_HIA_DFE_THR_CTRL			0x8c040
+#define EIP197_HIA_DFE_THR_STAT			0x8c044
+#define EIP197_HIA_DSE_CFG			0x8d000
+#define EIP197_HIA_DSE_THR_CTRL			0x8d040
+#define EIP197_HIA_DSE_THR_STAT			0x8d044
+#define EIP197_HIA_RA_PE_CTRL			0x90010
+#define EIP197_HIA_RA_PE_STAT			0x90014
+#define EIP197_HIA_AIC_R_OFF(r)			((r) * 0x1000)
+#define EIP197_HIA_AIC_R_ENABLE_CTRL(r)		(0x9e808 - EIP197_HIA_AIC_R_OFF(r))
+#define EIP197_HIA_AIC_R_ENABLED_STAT(r)	(0x9e810 - EIP197_HIA_AIC_R_OFF(r))
+#define EIP197_HIA_AIC_R_ACK(r)			(0x9e810 - EIP197_HIA_AIC_R_OFF(r))
+#define EIP197_HIA_AIC_R_ENABLE_CLR(r)		(0x9e814 - EIP197_HIA_AIC_R_OFF(r))
+#define EIP197_HIA_AIC_G_ENABLE_CTRL		0x9f808
+#define EIP197_HIA_AIC_G_ENABLED_STAT		0x9f810
+#define EIP197_HIA_AIC_G_ACK			0x9f810
+#define EIP197_HIA_MST_CTRL			0x9fff4
+#define EIP197_HIA_OPTIONS			0x9fff8
+#define EIP197_HIA_VERSION			0x9fffc
+#define EIP197_PE_IN_DBUF_THRES			0xa0000
+#define EIP197_PE_IN_TBUF_THRES			0xa0100
+#define EIP197_PE_ICE_SCRATCH_RAM		0xa0800
+#define EIP197_PE_ICE_PUE_CTRL			0xa0c80
+#define EIP197_PE_ICE_SCRATCH_CTRL		0xa0d04
+#define EIP197_PE_ICE_FPP_CTRL			0xa0d80
+#define EIP197_PE_ICE_RAM_CTRL			0xa0ff0
+#define EIP197_PE_EIP96_FUNCTION_EN		0xa1004
+#define EIP197_PE_EIP96_CONTEXT_CTRL		0xa1008
+#define EIP197_PE_EIP96_CONTEXT_STAT		0xa100c
+#define EIP197_PE_OUT_DBUF_THRES		0xa1c00
+#define EIP197_PE_OUT_TBUF_THRES		0xa1d00
+#define EIP197_CLASSIFICATION_RAMS		0xe0000
+#define EIP197_TRC_CTRL				0xf0800
+#define EIP197_TRC_LASTRES			0xf0804
+#define EIP197_TRC_REGINDEX			0xf0808
+#define EIP197_TRC_PARAMS			0xf0820
+#define EIP197_TRC_FREECHAIN			0xf0824
+#define EIP197_TRC_PARAMS2			0xf0828
+#define EIP197_TRC_ECCCTRL			0xf0830
+#define EIP197_TRC_ECCSTAT			0xf0834
+#define EIP197_TRC_ECCADMINSTAT			0xf0838
+#define EIP197_TRC_ECCDATASTAT			0xf083c
+#define EIP197_TRC_ECCDATA			0xf0840
+#define EIP197_CS_RAM_CTRL			0xf7ff0
+#define EIP197_MST_CTRL				0xffff4
+
+/* EIP197_HIA_xDR_DESC_SIZE */
+#define EIP197_xDR_DESC_MODE_64BIT		BIT(31)
+
+/* EIP197_HIA_xDR_DMA_CFG */
+#define EIP197_HIA_xDR_WR_RES_BUF		BIT(22)
+#define EIP197_HIA_xDR_WR_CTRL_BUG		BIT(23)
+#define EIP197_HIA_xDR_WR_OWN_BUF		BIT(24)
+#define EIP197_HIA_xDR_CFG_WR_CACHE(n)		(((n) & 0x7) << 25)
+#define EIP197_HIA_xDR_CFG_RD_CACHE(n)		(((n) & 0x7) << 29)
+
+/* EIP197_HIA_CDR_THRESH */
+#define EIP197_HIA_CDR_THRESH_PROC_PKT(n)	(n)
+#define EIP197_HIA_CDR_THRESH_PROC_MODE		BIT(22)
+#define EIP197_HIA_CDR_THRESH_PKT_MODE		BIT(23)
+#define EIP197_HIA_CDR_THRESH_TIMEOUT(n)	((n) << 24) /* x256 clk cycles */
+
+/* EIP197_HIA_RDR_THRESH */
+#define EIP197_HIA_RDR_THRESH_PROC_PKT(n)	(n)
+#define EIP197_HIA_RDR_THRESH_PKT_MODE		BIT(23)
+#define EIP197_HIA_RDR_THRESH_TIMEOUT(n)	((n) << 24) /* x256 clk cycles */
+
+/* EIP197_HIA_xDR_PREP_COUNT */
+#define EIP197_xDR_PREP_CLR_COUNT		BIT(31)
+
+/* EIP197_HIA_xDR_PROC_COUNT */
+#define EIP197_xDR_PROC_xD_COUNT(n)		((n) << 2)
+#define EIP197_xDR_PROC_xD_PKT(n)		((n) << 24)
+#define EIP197_xDR_PROC_CLR_COUNT		BIT(31)
+
+/* EIP197_HIA_xDR_STAT */
+#define EIP197_xDR_DMA_ERR			BIT(0)
+#define EIP197_xDR_PREP_CMD_THRES		BIT(1)
+#define EIP197_xDR_ERR				BIT(2)
+#define EIP197_xDR_THRESH			BIT(4)
+#define EIP197_xDR_TIMEOUT			BIT(5)
+
+#define EIP197_HIA_RA_PE_CTRL_RESET		BIT(31)
+#define EIP197_HIA_RA_PE_CTRL_EN		BIT(30)
+
+/* EIP197_HIA_AIC_R_ENABLE_CTRL */
+#define EIP197_CDR_IRQ(n)			BIT((n) * 2)
+#define EIP197_RDR_IRQ(n)			BIT((n) * 2 + 1)
+
+/* EIP197_HIA_DFE/DSE_CFG */
+#define EIP197_HIA_DxE_CFG_MIN_DATA_SIZE(n)	((n) << 0)
+#define EIP197_HIA_DxE_CFG_DATA_CACHE_CTRL(n)	(((n) & 0x7) << 4)
+#define EIP197_HIA_DxE_CFG_MAX_DATA_SIZE(n)	((n) << 8)
+#define EIP197_HIA_DSE_CFG_ALLWAYS_BUFFERABLE	GENMASK(15, 14)
+#define EIP197_HIA_DxE_CFG_MIN_CTRL_SIZE(n)	((n) << 16)
+#define EIP197_HIA_DxE_CFG_CTRL_CACHE_CTRL(n)	(((n) & 0x7) << 20)
+#define EIP197_HIA_DxE_CFG_MAX_CTRL_SIZE(n)	((n) << 24)
+#define EIP197_HIA_DFE_CFG_DIS_DEBUG		(BIT(31) | BIT(29))
+#define EIP197_HIA_DSE_CFG_EN_SINGLE_WR		BIT(29)
+#define EIP197_HIA_DSE_CFG_DIS_DEBUG		BIT(31)
+
+/* EIP197_HIA_DFE/DSE_THR_CTRL */
+#define EIP197_DxE_THR_CTRL_EN			BIT(30)
+#define EIP197_DxE_THR_CTRL_RESET_PE		BIT(31)
+
+/* EIP197_HIA_AIC_G_ENABLED_STAT */
+#define EIP197_G_IRQ_DFE(n)			BIT((n) << 1)
+#define EIP197_G_IRQ_DSE(n)			BIT(((n) << 1) + 1)
+#define EIP197_G_IRQ_RING			BIT(16)
+#define EIP197_G_IRQ_PE(n)			BIT((n) + 20)
+
+/* EIP197_HIA_MST_CTRL */
+#define RD_CACHE_3BITS				0x5
+#define WR_CACHE_3BITS				0x3
+#define RD_CACHE_4BITS				(RD_CACHE_3BITS << 1 | BIT(0))
+#define WR_CACHE_4BITS				(WR_CACHE_3BITS << 1 | BIT(0))
+#define EIP197_MST_CTRL_RD_CACHE(n)		(((n) & 0xf) << 0)
+#define EIP197_MST_CTRL_WD_CACHE(n)		(((n) & 0xf) << 4)
+#define EIP197_MST_CTRL_BYTE_SWAP		BIT(24)
+#define EIP197_MST_CTRL_NO_BYTE_SWAP		BIT(25)
+
+/* EIP197_PE_IN_DBUF/TBUF_THRES */
+#define EIP197_PE_IN_xBUF_THRES_MIN(n)		((n) << 8)
+#define EIP197_PE_IN_xBUF_THRES_MAX(n)		((n) << 12)
+
+/* EIP197_PE_OUT_DBUF_THRES */
+#define EIP197_PE_OUT_DBUF_THRES_MIN(n)		((n) << 0)
+#define EIP197_PE_OUT_DBUF_THRES_MAX(n)		((n) << 4)
+
+/* EIP197_PE_ICE_SCRATCH_CTRL */
+#define EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_TIMER		BIT(2)
+#define EIP197_PE_ICE_SCRATCH_CTRL_TIMER_EN		BIT(3)
+#define EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_ACCESS	BIT(24)
+#define EIP197_PE_ICE_SCRATCH_CTRL_SCRATCH_ACCESS	BIT(25)
+
+/* EIP197_PE_ICE_SCRATCH_RAM */
+#define EIP197_NUM_OF_SCRATCH_BLOCKS		32
+
+/* EIP197_PE_ICE_PUE/FPP_CTRL */
+#define EIP197_PE_ICE_x_CTRL_SW_RESET			BIT(0)
+#define EIP197_PE_ICE_x_CTRL_CLR_ECC_NON_CORR		BIT(14)
+#define EIP197_PE_ICE_x_CTRL_CLR_ECC_CORR		BIT(15)
+
+/* EIP197_PE_ICE_RAM_CTRL */
+#define EIP197_PE_ICE_RAM_CTRL_PUE_PROG_EN	BIT(0)
+#define EIP197_PE_ICE_RAM_CTRL_FPP_PROG_EN	BIT(1)
+
+/* EIP197_PE_EIP96_FUNCTION_EN */
+#define EIP197_FUNCTION_RSVD			(BIT(6) | BIT(15) | BIT(20) | BIT(23))
+#define EIP197_PROTOCOL_HASH_ONLY		BIT(0)
+#define EIP197_PROTOCOL_ENCRYPT_ONLY		BIT(1)
+#define EIP197_PROTOCOL_HASH_ENCRYPT		BIT(2)
+#define EIP197_PROTOCOL_HASH_DECRYPT		BIT(3)
+#define EIP197_PROTOCOL_ENCRYPT_HASH		BIT(4)
+#define EIP197_PROTOCOL_DECRYPT_HASH		BIT(5)
+#define EIP197_ALG_ARC4				BIT(7)
+#define EIP197_ALG_AES_ECB			BIT(8)
+#define EIP197_ALG_AES_CBC			BIT(9)
+#define EIP197_ALG_AES_CTR_ICM			BIT(10)
+#define EIP197_ALG_AES_OFB			BIT(11)
+#define EIP197_ALG_AES_CFB			BIT(12)
+#define EIP197_ALG_DES_ECB			BIT(13)
+#define EIP197_ALG_DES_CBC			BIT(14)
+#define EIP197_ALG_DES_OFB			BIT(16)
+#define EIP197_ALG_DES_CFB			BIT(17)
+#define EIP197_ALG_3DES_ECB			BIT(18)
+#define EIP197_ALG_3DES_CBC			BIT(19)
+#define EIP197_ALG_3DES_OFB			BIT(21)
+#define EIP197_ALG_3DES_CFB			BIT(22)
+#define EIP197_ALG_MD5				BIT(24)
+#define EIP197_ALG_HMAC_MD5			BIT(25)
+#define EIP197_ALG_SHA1				BIT(26)
+#define EIP197_ALG_HMAC_SHA1			BIT(27)
+#define EIP197_ALG_SHA2				BIT(28)
+#define EIP197_ALG_HMAC_SHA2			BIT(29)
+#define EIP197_ALG_AES_XCBC_MAC			BIT(30)
+#define EIP197_ALG_GCM_HASH			BIT(31)
+
+/* EIP197_PE_EIP96_CONTEXT_CTRL */
+#define EIP197_CONTEXT_SIZE(n)			(n)
+#define EIP197_ADDRESS_MODE			BIT(8)
+#define EIP197_CONTROL_MODE			BIT(9)
+
+/* Context Control */
+struct safexcel_context_record {
+	u32 control0;
+	u32 control1;
+
+	__le32 data[12];
+} __packed;
+
+/* control0 */
+#define CONTEXT_CONTROL_TYPE_NULL_OUT		0x0
+#define CONTEXT_CONTROL_TYPE_NULL_IN		0x1
+#define CONTEXT_CONTROL_TYPE_HASH_OUT		0x2
+#define CONTEXT_CONTROL_TYPE_HASH_IN		0x3
+#define CONTEXT_CONTROL_TYPE_CRYPTO_OUT		0x4
+#define CONTEXT_CONTROL_TYPE_CRYPTO_IN		0x5
+#define CONTEXT_CONTROL_TYPE_ENCRYPT_HASH_OUT	0x6
+#define CONTEXT_CONTROL_TYPE_DECRYPT_HASH_IN	0x7
+#define CONTEXT_CONTROL_TYPE_HASH_ENCRYPT_OUT	0x14
+#define CONTEXT_CONTROL_TYPE_HASH_DECRYPT_OUT	0x15
+#define CONTEXT_CONTROL_RESTART_HASH		BIT(4)
+#define CONTEXT_CONTROL_NO_FINISH_HASH		BIT(5)
+#define CONTEXT_CONTROL_SIZE(n)			((n) << 8)
+#define CONTEXT_CONTROL_KEY_EN			BIT(16)
+#define CONTEXT_CONTROL_CRYPTO_ALG_AES128	(0x5 << 17)
+#define CONTEXT_CONTROL_CRYPTO_ALG_AES192	(0x6 << 17)
+#define CONTEXT_CONTROL_CRYPTO_ALG_AES256	(0x7 << 17)
+#define CONTEXT_CONTROL_DIGEST_PRECOMPUTED	(0x1 << 21)
+#define CONTEXT_CONTROL_DIGEST_HMAC		(0x3 << 21)
+#define CONTEXT_CONTROL_CRYPTO_ALG_SHA1		(0x2 << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_SHA224	(0x4 << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_SHA256	(0x3 << 23)
+#define CONTEXT_CONTROL_INV_FR			(0x5 << 24)
+#define CONTEXT_CONTROL_INV_TR			(0x6 << 24)
+
+/* control1 */
+#define CONTEXT_CONTROL_CRYPTO_MODE_ECB		(0 << 0)
+#define CONTEXT_CONTROL_CRYPTO_MODE_CBC		(1 << 0)
+#define CONTEXT_CONTROL_IV0			BIT(5)
+#define CONTEXT_CONTROL_IV1			BIT(6)
+#define CONTEXT_CONTROL_IV2			BIT(7)
+#define CONTEXT_CONTROL_IV3			BIT(8)
+#define CONTEXT_CONTROL_DIGEST_CNT		BIT(9)
+#define CONTEXT_CONTROL_COUNTER_MODE		BIT(10)
+#define CONTEXT_CONTROL_HASH_STORE		BIT(19)
+
+/* EIP197_CS_RAM_CTRL */
+#define EIP197_TRC_ENABLE_0			BIT(4)
+#define EIP197_TRC_ENABLE_1			BIT(5)
+#define EIP197_TRC_ENABLE_2			BIT(6)
+#define EIP197_TRC_ENABLE_MASK			GENMASK(6, 4)
+
+/* EIP197_TRC_PARAMS */
+#define EIP197_TRC_PARAMS_SW_RESET		BIT(0)
+#define EIP197_TRC_PARAMS_DATA_ACCESS		BIT(2)
+#define EIP197_TRC_PARAMS_HTABLE_SZ(x)		((x) << 4)
+#define EIP197_TRC_PARAMS_BLK_TIMER_SPEED(x)	((x) << 10)
+#define EIP197_TRC_PARAMS_RC_SZ_LARGE(n)	((n) << 18)
+
+/* EIP197_TRC_FREECHAIN */
+#define EIP197_TRC_FREECHAIN_HEAD_PTR(p)	(p)
+#define EIP197_TRC_FREECHAIN_TAIL_PTR(p)	((p) << 16)
+
+/* EIP197_TRC_PARAMS2 */
+#define EIP197_TRC_PARAMS2_HTABLE_PTR(p)	(p)
+#define EIP197_TRC_PARAMS2_RC_SZ_SMALL(n)	((n) << 18)
+
+/* Cache helpers */
+#define EIP197_CS_RC_MAX			52
+#define EIP197_CS_RC_SIZE			(4 * sizeof(u32))
+#define EIP197_CS_RC_NEXT(x)			(x)
+#define EIP197_CS_RC_PREV(x)			((x) << 10)
+#define EIP197_RC_NULL				0x3ff
+#define EIP197_CS_TRC_REC_WC			59
+#define EIP197_CS_TRC_LG_REC_WC			73
+
+/* Result data */
+struct result_data_desc {
+	u32 packet_length:17;
+	u32 error_code:15;
+
+	u8 bypass_length:4;
+	u8 e15:1;
+	u16 rsvd0;
+	u8 hash_bytes:1;
+	u8 hash_length:6;
+	u8 generic_bytes:1;
+	u8 checksum:1;
+	u8 next_header:1;
+	u8 length:1;
+
+	u16 application_id;
+	u16 rsvd1;
+
+	u32 rsvd2;
+} __packed;
+
+
+/* Basic Result Descriptor format */
+struct safexcel_result_desc {
+	u32 particle_size:17;
+	u8 rsvd0:3;
+	u8 descriptor_overflow:1;
+	u8 buffer_overflow:1;
+	u8 last_seg:1;
+	u8 first_seg:1;
+	u16 result_size:8;
+
+	u32 rsvd1;
+
+	u32 data_lo;
+	u32 data_hi;
+
+	struct result_data_desc result_data;
+} __packed;
+
+struct safexcel_token {
+	u32 packet_length:17;
+	u8 stat:2;
+	u16 instructions:9;
+	u8 opcode:4;
+} __packed;
+
+#define EIP197_TOKEN_STAT_LAST_HASH		BIT(0)
+#define EIP197_TOKEN_STAT_LAST_PACKET		BIT(1)
+#define EIP197_TOKEN_OPCODE_DIRECTION		0x0
+#define EIP197_TOKEN_OPCODE_INSERT		0x2
+#define EIP197_TOKEN_OPCODE_NOOP		EIP197_TOKEN_OPCODE_INSERT
+#define EIP197_TOKEN_OPCODE_BYPASS		GENMASK(3, 0)
+
+static inline void eip197_noop_token(struct safexcel_token *token)
+{
+	token->opcode = EIP197_TOKEN_OPCODE_NOOP;
+	token->packet_length = BIT(2);
+}
+
+/* Instructions */
+#define EIP197_TOKEN_INS_INSERT_HASH_DIGEST	0x1c
+#define EIP197_TOKEN_INS_TYPE_OUTPUT		BIT(5)
+#define EIP197_TOKEN_INS_TYPE_HASH		BIT(6)
+#define EIP197_TOKEN_INS_TYPE_CRYTO		BIT(7)
+#define EIP197_TOKEN_INS_LAST			BIT(8)
+
+/* Processing Engine Control Data  */
+struct safexcel_control_data_desc {
+	u32 packet_length:17;
+	u16 options:13;
+	u8 type:2;
+
+	u16 application_id;
+	u16 rsvd;
+
+	u8 refresh:2;
+	u32 context_lo:30;
+	u32 context_hi;
+
+	u32 control0;
+	u32 control1;
+
+	u32 token[EIP197_MAX_TOKENS];
+} __packed;
+
+#define EIP197_OPTION_MAGIC_VALUE	BIT(0)
+#define EIP197_OPTION_64BIT_CTX		BIT(1)
+#define EIP197_OPTION_CTX_CTRL_IN_CMD	BIT(8)
+#define EIP197_OPTION_4_TOKEN_IV_CMD	GENMASK(11, 9)
+
+#define EIP197_TYPE_EXTENDED		0x3
+
+/* Basic Command Descriptor format */
+struct safexcel_command_desc {
+	u32 particle_size:17;
+	u8 rsvd0:5;
+	u8 last_seg:1;
+	u8 first_seg:1;
+	u16 additional_cdata_size:8;
+
+	u32 rsvd1;
+
+	u32 data_lo;
+	u32 data_hi;
+
+	struct safexcel_control_data_desc control_data;
+} __packed;
+
+/*
+ * Internal structures & functions
+ */
+
+enum eip197_fw {
+	FW_IFPP = 0,
+	FW_IPUE,
+	FW_NB
+};
+
+struct safexcel_ring {
+	void *base;
+	void *base_end;
+	dma_addr_t base_dma;
+
+	/* write and read pointers */
+	void *write;
+	void *read;
+
+	/* number of elements used in the ring */
+	unsigned nr;
+	unsigned offset;
+};
+
+enum safexcel_alg_type {
+	SAFEXCEL_ALG_TYPE_SKCIPHER,
+	SAFEXCEL_ALG_TYPE_AHASH,
+};
+
+struct safexcel_request {
+	struct list_head list;
+	struct crypto_async_request *req;
+};
+
+struct safexcel_config {
+	u32 rings;
+
+	u32 cd_size;
+	u32 cd_offset;
+
+	u32 rd_size;
+	u32 rd_offset;
+};
+
+struct safexcel_work_data {
+	struct work_struct work;
+	struct safexcel_crypto_priv *priv;
+	int ring;
+};
+
+struct safexcel_crypto_priv {
+	void __iomem *base;
+	struct device *dev;
+	struct clk *clk;
+	struct safexcel_config config;
+
+	/* context DMA pool */
+	struct dma_pool *context_pool;
+
+	atomic_t ring_used;
+
+	struct {
+		spinlock_t lock;
+		spinlock_t egress_lock;
+
+		struct list_head list;
+		struct workqueue_struct *workqueue;
+		struct safexcel_work_data work_data;
+
+		/* command/result rings */
+		struct safexcel_ring cdr;
+		struct safexcel_ring rdr;
+
+		/* queue */
+		struct crypto_queue queue;
+		spinlock_t queue_lock;
+		bool need_dequeue;
+	} ring[EIP197_MAX_RINGS];
+};
+
+struct safexcel_context {
+	int (*send)(struct crypto_async_request *req, int ring,
+		    struct safexcel_request *request, int *commands,
+		    int *results);
+	int (*handle_result)(struct safexcel_crypto_priv *priv, int ring,
+			     struct crypto_async_request *req, bool *complete,
+			     int *ret);
+	struct safexcel_context_record *ctxr;
+	dma_addr_t ctxr_dma;
+
+	int ring;
+	bool needs_inv;
+	bool exit_inv;
+
+	/* Used for ahash requests */
+	dma_addr_t result_dma;
+	void *cache;
+	dma_addr_t cache_dma;
+	unsigned int cache_sz;
+};
+
+/*
+ * Template structure to describe the algorithms in order to register them.
+ * It also has the purpose to contain our private structure and is actually
+ * the only way I know in this framework to avoid having global pointers...
+ */
+struct safexcel_alg_template {
+	struct safexcel_crypto_priv *priv;
+	enum safexcel_alg_type type;
+	union {
+		struct skcipher_alg skcipher;
+		struct ahash_alg ahash;
+	} alg;
+};
+
+struct safexcel_inv_result {
+	struct completion completion;
+	int error;
+};
+
+void safexcel_dequeue(struct safexcel_crypto_priv *priv, int ring);
+void safexcel_complete(struct safexcel_crypto_priv *priv, int ring);
+void safexcel_free_context(struct safexcel_crypto_priv *priv,
+				  struct crypto_async_request *req,
+				  int result_sz);
+int safexcel_invalidate_cache(struct crypto_async_request *async,
+			      struct safexcel_context *ctx,
+			      struct safexcel_crypto_priv *priv,
+			      dma_addr_t ctxr_dma, int ring,
+			      struct safexcel_request *request);
+int safexcel_init_ring_descriptors(struct safexcel_crypto_priv *priv,
+				   struct safexcel_ring *cdr,
+				   struct safexcel_ring *rdr);
+int safexcel_select_ring(struct safexcel_crypto_priv *priv);
+void *safexcel_ring_next_rptr(struct safexcel_crypto_priv *priv,
+			      struct safexcel_ring *ring);
+void safexcel_ring_rollback_wptr(struct safexcel_crypto_priv *priv,
+				 struct safexcel_ring *ring);
+struct safexcel_command_desc *safexcel_add_cdesc(struct safexcel_crypto_priv *priv,
+						 int ring_id,
+						 bool first, bool last,
+						 dma_addr_t data, u32 len,
+						 u32 full_data_len,
+						 dma_addr_t context);
+struct safexcel_result_desc *safexcel_add_rdesc(struct safexcel_crypto_priv *priv,
+						 int ring_id,
+						bool first, bool last,
+						dma_addr_t data, u32 len);
+void safexcel_inv_complete(struct crypto_async_request *req, int error);
+
+/* available algorithms */
+extern struct safexcel_alg_template safexcel_alg_ecb_aes;
+extern struct safexcel_alg_template safexcel_alg_cbc_aes;
+extern struct safexcel_alg_template safexcel_alg_sha1;
+extern struct safexcel_alg_template safexcel_alg_sha224;
+extern struct safexcel_alg_template safexcel_alg_sha256;
+extern struct safexcel_alg_template safexcel_alg_hmac_sha1;
+
+#endif

+ 561 - 0
drivers/crypto/inside-secure/safexcel_cipher.c

@@ -0,0 +1,561 @@
+/*
+ * Copyright (C) 2017 Marvell
+ *
+ * Antoine Tenart <antoine.tenart@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+
+#include <crypto/aes.h>
+#include <crypto/skcipher.h>
+
+#include "safexcel.h"
+
+enum safexcel_cipher_direction {
+	SAFEXCEL_ENCRYPT,
+	SAFEXCEL_DECRYPT,
+};
+
+struct safexcel_cipher_ctx {
+	struct safexcel_context base;
+	struct safexcel_crypto_priv *priv;
+
+	enum safexcel_cipher_direction direction;
+	u32 mode;
+
+	__le32 key[8];
+	unsigned int key_len;
+};
+
+static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx,
+				  struct crypto_async_request *async,
+				  struct safexcel_command_desc *cdesc,
+				  u32 length)
+{
+	struct skcipher_request *req = skcipher_request_cast(async);
+	struct safexcel_token *token;
+	unsigned offset = 0;
+
+	if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CBC) {
+		offset = AES_BLOCK_SIZE / sizeof(u32);
+		memcpy(cdesc->control_data.token, req->iv, AES_BLOCK_SIZE);
+
+		cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
+	}
+
+	token = (struct safexcel_token *)(cdesc->control_data.token + offset);
+
+	token[0].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
+	token[0].packet_length = length;
+	token[0].stat = EIP197_TOKEN_STAT_LAST_PACKET;
+	token[0].instructions = EIP197_TOKEN_INS_LAST |
+				EIP197_TOKEN_INS_TYPE_CRYTO |
+				EIP197_TOKEN_INS_TYPE_OUTPUT;
+}
+
+static int safexcel_aes_setkey(struct crypto_skcipher *ctfm, const u8 *key,
+			       unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(ctfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_aes_ctx aes;
+	int ret, i;
+
+	ret = crypto_aes_expand_key(&aes, key, len);
+	if (ret) {
+		crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return ret;
+	}
+
+	for (i = 0; i < len / sizeof(u32); i++) {
+		if (ctx->key[i] != cpu_to_le32(aes.key_enc[i])) {
+			ctx->base.needs_inv = true;
+			break;
+		}
+	}
+
+	for (i = 0; i < len / sizeof(u32); i++)
+		ctx->key[i] = cpu_to_le32(aes.key_enc[i]);
+
+	ctx->key_len = len;
+
+	memzero_explicit(&aes, sizeof(aes));
+	return 0;
+}
+
+static int safexcel_context_control(struct safexcel_cipher_ctx *ctx,
+				    struct safexcel_command_desc *cdesc)
+{
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	int ctrl_size;
+
+	if (ctx->direction == SAFEXCEL_ENCRYPT)
+		cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_CRYPTO_OUT;
+	else
+		cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_CRYPTO_IN;
+
+	cdesc->control_data.control0 |= CONTEXT_CONTROL_KEY_EN;
+	cdesc->control_data.control1 |= ctx->mode;
+
+	switch (ctx->key_len) {
+	case AES_KEYSIZE_128:
+		cdesc->control_data.control0 |= CONTEXT_CONTROL_CRYPTO_ALG_AES128;
+		ctrl_size = 4;
+		break;
+	case AES_KEYSIZE_192:
+		cdesc->control_data.control0 |= CONTEXT_CONTROL_CRYPTO_ALG_AES192;
+		ctrl_size = 6;
+		break;
+	case AES_KEYSIZE_256:
+		cdesc->control_data.control0 |= CONTEXT_CONTROL_CRYPTO_ALG_AES256;
+		ctrl_size = 8;
+		break;
+	default:
+		dev_err(priv->dev, "aes keysize not supported: %u\n",
+			ctx->key_len);
+		return -EINVAL;
+	}
+	cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(ctrl_size);
+
+	return 0;
+}
+
+static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
+				  struct crypto_async_request *async,
+				  bool *should_complete, int *ret)
+{
+	struct skcipher_request *req = skcipher_request_cast(async);
+	struct safexcel_result_desc *rdesc;
+	int ndesc = 0;
+
+	*ret = 0;
+
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+	do {
+		rdesc = safexcel_ring_next_rptr(priv, &priv->ring[ring].rdr);
+		if (IS_ERR(rdesc)) {
+			dev_err(priv->dev,
+				"cipher: result: could not retrieve the result descriptor\n");
+			*ret = PTR_ERR(rdesc);
+			break;
+		}
+
+		if (rdesc->result_data.error_code) {
+			dev_err(priv->dev,
+				"cipher: result: result descriptor error (%d)\n",
+				rdesc->result_data.error_code);
+			*ret = -EIO;
+		}
+
+		ndesc++;
+	} while (!rdesc->last_seg);
+
+	safexcel_complete(priv, ring);
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+	if (req->src == req->dst) {
+		dma_unmap_sg(priv->dev, req->src,
+			     sg_nents_for_len(req->src, req->cryptlen),
+			     DMA_BIDIRECTIONAL);
+	} else {
+		dma_unmap_sg(priv->dev, req->src,
+			     sg_nents_for_len(req->src, req->cryptlen),
+			     DMA_TO_DEVICE);
+		dma_unmap_sg(priv->dev, req->dst,
+			     sg_nents_for_len(req->dst, req->cryptlen),
+			     DMA_FROM_DEVICE);
+	}
+
+	*should_complete = true;
+
+	return ndesc;
+}
+
+static int safexcel_aes_send(struct crypto_async_request *async,
+			     int ring, struct safexcel_request *request,
+			     int *commands, int *results)
+{
+	struct skcipher_request *req = skcipher_request_cast(async);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	struct safexcel_command_desc *cdesc;
+	struct safexcel_result_desc *rdesc;
+	struct scatterlist *sg;
+	int nr_src, nr_dst, n_cdesc = 0, n_rdesc = 0, queued = req->cryptlen;
+	int i, ret = 0;
+
+	if (req->src == req->dst) {
+		nr_src = dma_map_sg(priv->dev, req->src,
+				    sg_nents_for_len(req->src, req->cryptlen),
+				    DMA_BIDIRECTIONAL);
+		nr_dst = nr_src;
+		if (!nr_src)
+			return -EINVAL;
+	} else {
+		nr_src = dma_map_sg(priv->dev, req->src,
+				    sg_nents_for_len(req->src, req->cryptlen),
+				    DMA_TO_DEVICE);
+		if (!nr_src)
+			return -EINVAL;
+
+		nr_dst = dma_map_sg(priv->dev, req->dst,
+				    sg_nents_for_len(req->dst, req->cryptlen),
+				    DMA_FROM_DEVICE);
+		if (!nr_dst) {
+			dma_unmap_sg(priv->dev, req->src,
+				     sg_nents_for_len(req->src, req->cryptlen),
+				     DMA_TO_DEVICE);
+			return -EINVAL;
+		}
+	}
+
+	memcpy(ctx->base.ctxr->data, ctx->key, ctx->key_len);
+
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+
+	/* command descriptors */
+	for_each_sg(req->src, sg, nr_src, i) {
+		int len = sg_dma_len(sg);
+
+		/* Do not overflow the request */
+		if (queued - len < 0)
+			len = queued;
+
+		cdesc = safexcel_add_cdesc(priv, ring, !n_cdesc, !(queued - len),
+					   sg_dma_address(sg), len, req->cryptlen,
+					   ctx->base.ctxr_dma);
+		if (IS_ERR(cdesc)) {
+			/* No space left in the command descriptor ring */
+			ret = PTR_ERR(cdesc);
+			goto cdesc_rollback;
+		}
+		n_cdesc++;
+
+		if (n_cdesc == 1) {
+			safexcel_context_control(ctx, cdesc);
+			safexcel_cipher_token(ctx, async, cdesc, req->cryptlen);
+		}
+
+		queued -= len;
+		if (!queued)
+			break;
+	}
+
+	/* result descriptors */
+	for_each_sg(req->dst, sg, nr_dst, i) {
+		bool first = !i, last = (i == nr_dst - 1);
+		u32 len = sg_dma_len(sg);
+
+		rdesc = safexcel_add_rdesc(priv, ring, first, last,
+					   sg_dma_address(sg), len);
+		if (IS_ERR(rdesc)) {
+			/* No space left in the result descriptor ring */
+			ret = PTR_ERR(rdesc);
+			goto rdesc_rollback;
+		}
+		n_rdesc++;
+	}
+
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+	request->req = &req->base;
+	ctx->base.handle_result = safexcel_handle_result;
+
+	*commands = n_cdesc;
+	*results = n_rdesc;
+	return 0;
+
+rdesc_rollback:
+	for (i = 0; i < n_rdesc; i++)
+		safexcel_ring_rollback_wptr(priv, &priv->ring[ring].rdr);
+cdesc_rollback:
+	for (i = 0; i < n_cdesc; i++)
+		safexcel_ring_rollback_wptr(priv, &priv->ring[ring].cdr);
+
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+	if (req->src == req->dst) {
+		dma_unmap_sg(priv->dev, req->src,
+			     sg_nents_for_len(req->src, req->cryptlen),
+			     DMA_BIDIRECTIONAL);
+	} else {
+		dma_unmap_sg(priv->dev, req->src,
+			     sg_nents_for_len(req->src, req->cryptlen),
+			     DMA_TO_DEVICE);
+		dma_unmap_sg(priv->dev, req->dst,
+			     sg_nents_for_len(req->dst, req->cryptlen),
+			     DMA_FROM_DEVICE);
+	}
+
+	return ret;
+}
+
+static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
+				      int ring,
+				      struct crypto_async_request *async,
+				      bool *should_complete, int *ret)
+{
+	struct skcipher_request *req = skcipher_request_cast(async);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct safexcel_result_desc *rdesc;
+	int ndesc = 0, enq_ret;
+
+	*ret = 0;
+
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+	do {
+		rdesc = safexcel_ring_next_rptr(priv, &priv->ring[ring].rdr);
+		if (IS_ERR(rdesc)) {
+			dev_err(priv->dev,
+				"cipher: invalidate: could not retrieve the result descriptor\n");
+			*ret = PTR_ERR(rdesc);
+			break;
+		}
+
+		if (rdesc->result_data.error_code) {
+			dev_err(priv->dev, "cipher: invalidate: result descriptor error (%d)\n",
+				rdesc->result_data.error_code);
+			*ret = -EIO;
+		}
+
+		ndesc++;
+	} while (!rdesc->last_seg);
+
+	safexcel_complete(priv, ring);
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+	if (ctx->base.exit_inv) {
+		dma_pool_free(priv->context_pool, ctx->base.ctxr,
+			      ctx->base.ctxr_dma);
+
+		*should_complete = true;
+
+		return ndesc;
+	}
+
+	ring = safexcel_select_ring(priv);
+	ctx->base.ring = ring;
+	ctx->base.needs_inv = false;
+	ctx->base.send = safexcel_aes_send;
+
+	spin_lock_bh(&priv->ring[ring].queue_lock);
+	enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async);
+	spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+	if (enq_ret != -EINPROGRESS)
+		*ret = enq_ret;
+
+	if (!priv->ring[ring].need_dequeue)
+		safexcel_dequeue(priv, ring);
+
+	*should_complete = false;
+
+	return ndesc;
+}
+
+static int safexcel_cipher_send_inv(struct crypto_async_request *async,
+				    int ring, struct safexcel_request *request,
+				    int *commands, int *results)
+{
+	struct skcipher_request *req = skcipher_request_cast(async);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	int ret;
+
+	ctx->base.handle_result = safexcel_handle_inv_result;
+
+	ret = safexcel_invalidate_cache(async, &ctx->base, priv,
+					ctx->base.ctxr_dma, ring, request);
+	if (unlikely(ret))
+		return ret;
+
+	*commands = 1;
+	*results = 1;
+
+	return 0;
+}
+
+static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	struct skcipher_request req;
+	struct safexcel_inv_result result = { 0 };
+	int ring = ctx->base.ring;
+
+	memset(&req, 0, sizeof(struct skcipher_request));
+
+	/* create invalidation request */
+	init_completion(&result.completion);
+	skcipher_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+					safexcel_inv_complete, &result);
+
+	skcipher_request_set_tfm(&req, __crypto_skcipher_cast(tfm));
+	ctx = crypto_tfm_ctx(req.base.tfm);
+	ctx->base.exit_inv = true;
+	ctx->base.send = safexcel_cipher_send_inv;
+
+	spin_lock_bh(&priv->ring[ring].queue_lock);
+	crypto_enqueue_request(&priv->ring[ring].queue, &req.base);
+	spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+	if (!priv->ring[ring].need_dequeue)
+		safexcel_dequeue(priv, ring);
+
+	wait_for_completion_interruptible(&result.completion);
+
+	if (result.error) {
+		dev_warn(priv->dev,
+			"cipher: sync: invalidate: completion error %d\n",
+			 result.error);
+		return result.error;
+	}
+
+	return 0;
+}
+
+static int safexcel_aes(struct skcipher_request *req,
+			enum safexcel_cipher_direction dir, u32 mode)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	int ret, ring;
+
+	ctx->direction = dir;
+	ctx->mode = mode;
+
+	if (ctx->base.ctxr) {
+		if (ctx->base.needs_inv)
+			ctx->base.send = safexcel_cipher_send_inv;
+	} else {
+		ctx->base.ring = safexcel_select_ring(priv);
+		ctx->base.send = safexcel_aes_send;
+
+		ctx->base.ctxr = dma_pool_zalloc(priv->context_pool,
+						 EIP197_GFP_FLAGS(req->base),
+						 &ctx->base.ctxr_dma);
+		if (!ctx->base.ctxr)
+			return -ENOMEM;
+	}
+
+	ring = ctx->base.ring;
+
+	spin_lock_bh(&priv->ring[ring].queue_lock);
+	ret = crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
+	spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+	if (!priv->ring[ring].need_dequeue)
+		safexcel_dequeue(priv, ring);
+
+	return ret;
+}
+
+static int safexcel_ecb_aes_encrypt(struct skcipher_request *req)
+{
+	return safexcel_aes(req, SAFEXCEL_ENCRYPT,
+			    CONTEXT_CONTROL_CRYPTO_MODE_ECB);
+}
+
+static int safexcel_ecb_aes_decrypt(struct skcipher_request *req)
+{
+	return safexcel_aes(req, SAFEXCEL_DECRYPT,
+			    CONTEXT_CONTROL_CRYPTO_MODE_ECB);
+}
+
+static int safexcel_skcipher_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_alg_template *tmpl =
+		container_of(tfm->__crt_alg, struct safexcel_alg_template,
+			     alg.skcipher.base);
+
+	ctx->priv = tmpl->priv;
+
+	return 0;
+}
+
+static void safexcel_skcipher_cra_exit(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	int ret;
+
+	memzero_explicit(ctx->key, 8 * sizeof(u32));
+
+	/* context not allocated, skip invalidation */
+	if (!ctx->base.ctxr)
+		return;
+
+	memzero_explicit(ctx->base.ctxr->data, 8 * sizeof(u32));
+
+	ret = safexcel_cipher_exit_inv(tfm);
+	if (ret)
+		dev_warn(priv->dev, "cipher: invalidation error %d\n", ret);
+}
+
+struct safexcel_alg_template safexcel_alg_ecb_aes = {
+	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
+	.alg.skcipher = {
+		.setkey = safexcel_aes_setkey,
+		.encrypt = safexcel_ecb_aes_encrypt,
+		.decrypt = safexcel_ecb_aes_decrypt,
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "safexcel-ecb-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_skcipher_cra_init,
+			.cra_exit = safexcel_skcipher_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_cbc_aes_encrypt(struct skcipher_request *req)
+{
+	return safexcel_aes(req, SAFEXCEL_ENCRYPT,
+			    CONTEXT_CONTROL_CRYPTO_MODE_CBC);
+}
+
+static int safexcel_cbc_aes_decrypt(struct skcipher_request *req)
+{
+	return safexcel_aes(req, SAFEXCEL_DECRYPT,
+			    CONTEXT_CONTROL_CRYPTO_MODE_CBC);
+}
+
+struct safexcel_alg_template safexcel_alg_cbc_aes = {
+	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
+	.alg.skcipher = {
+		.setkey = safexcel_aes_setkey,
+		.encrypt = safexcel_cbc_aes_encrypt,
+		.decrypt = safexcel_cbc_aes_decrypt,
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize = AES_BLOCK_SIZE,
+		.base = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "safexcel-cbc-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_skcipher_cra_init,
+			.cra_exit = safexcel_skcipher_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};

+ 1052 - 0
drivers/crypto/inside-secure/safexcel_hash.c

@@ -0,0 +1,1052 @@
+/*
+ * Copyright (C) 2017 Marvell
+ *
+ * Antoine Tenart <antoine.tenart@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <crypto/hmac.h>
+#include <crypto/sha.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+
+
+#include "safexcel.h"
+
+struct safexcel_ahash_ctx {
+	struct safexcel_context base;
+	struct safexcel_crypto_priv *priv;
+
+	u32 alg;
+	u32 digest;
+
+	u32 ipad[SHA1_DIGEST_SIZE / sizeof(u32)];
+	u32 opad[SHA1_DIGEST_SIZE / sizeof(u32)];
+};
+
+struct safexcel_ahash_req {
+	bool last_req;
+	bool finish;
+	bool hmac;
+
+	u8 state_sz;    /* expected sate size, only set once */
+	u32 state[SHA256_DIGEST_SIZE / sizeof(u32)];
+
+	u64 len;
+	u64 processed;
+
+	u8 cache[SHA256_BLOCK_SIZE] __aligned(sizeof(u32));
+	u8 cache_next[SHA256_BLOCK_SIZE] __aligned(sizeof(u32));
+};
+
+struct safexcel_ahash_export_state {
+	u64 len;
+	u64 processed;
+
+	u32 state[SHA256_DIGEST_SIZE / sizeof(u32)];
+	u8 cache[SHA256_BLOCK_SIZE];
+};
+
+static void safexcel_hash_token(struct safexcel_command_desc *cdesc,
+				u32 input_length, u32 result_length)
+{
+	struct safexcel_token *token =
+		(struct safexcel_token *)cdesc->control_data.token;
+
+	token[0].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
+	token[0].packet_length = input_length;
+	token[0].stat = EIP197_TOKEN_STAT_LAST_HASH;
+	token[0].instructions = EIP197_TOKEN_INS_TYPE_HASH;
+
+	token[1].opcode = EIP197_TOKEN_OPCODE_INSERT;
+	token[1].packet_length = result_length;
+	token[1].stat = EIP197_TOKEN_STAT_LAST_HASH |
+			EIP197_TOKEN_STAT_LAST_PACKET;
+	token[1].instructions = EIP197_TOKEN_INS_TYPE_OUTPUT |
+				EIP197_TOKEN_INS_INSERT_HASH_DIGEST;
+}
+
+static void safexcel_context_control(struct safexcel_ahash_ctx *ctx,
+				     struct safexcel_ahash_req *req,
+				     struct safexcel_command_desc *cdesc,
+				     unsigned int digestsize,
+				     unsigned int blocksize)
+{
+	int i;
+
+	cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_HASH_OUT;
+	cdesc->control_data.control0 |= ctx->alg;
+	cdesc->control_data.control0 |= ctx->digest;
+
+	if (ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED) {
+		if (req->processed) {
+			if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA1)
+				cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(6);
+			else if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA224 ||
+				 ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA256)
+				cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(9);
+
+			cdesc->control_data.control1 |= CONTEXT_CONTROL_DIGEST_CNT;
+		} else {
+			cdesc->control_data.control0 |= CONTEXT_CONTROL_RESTART_HASH;
+		}
+
+		if (!req->finish)
+			cdesc->control_data.control0 |= CONTEXT_CONTROL_NO_FINISH_HASH;
+
+		/*
+		 * Copy the input digest if needed, and setup the context
+		 * fields. Do this now as we need it to setup the first command
+		 * descriptor.
+		 */
+		if (req->processed) {
+			for (i = 0; i < digestsize / sizeof(u32); i++)
+				ctx->base.ctxr->data[i] = cpu_to_le32(req->state[i]);
+
+			if (req->finish)
+				ctx->base.ctxr->data[i] = cpu_to_le32(req->processed / blocksize);
+		}
+	} else if (ctx->digest == CONTEXT_CONTROL_DIGEST_HMAC) {
+		cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(10);
+
+		memcpy(ctx->base.ctxr->data, ctx->ipad, digestsize);
+		memcpy(ctx->base.ctxr->data + digestsize / sizeof(u32),
+		       ctx->opad, digestsize);
+	}
+}
+
+static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
+				  struct crypto_async_request *async,
+				  bool *should_complete, int *ret)
+{
+	struct safexcel_result_desc *rdesc;
+	struct ahash_request *areq = ahash_request_cast(async);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_req *sreq = ahash_request_ctx(areq);
+	int cache_len, result_sz = sreq->state_sz;
+
+	*ret = 0;
+
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+	rdesc = safexcel_ring_next_rptr(priv, &priv->ring[ring].rdr);
+	if (IS_ERR(rdesc)) {
+		dev_err(priv->dev,
+			"hash: result: could not retrieve the result descriptor\n");
+		*ret = PTR_ERR(rdesc);
+	} else if (rdesc->result_data.error_code) {
+		dev_err(priv->dev,
+			"hash: result: result descriptor error (%d)\n",
+			rdesc->result_data.error_code);
+		*ret = -EINVAL;
+	}
+
+	safexcel_complete(priv, ring);
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+	if (sreq->finish)
+		result_sz = crypto_ahash_digestsize(ahash);
+	memcpy(sreq->state, areq->result, result_sz);
+
+	dma_unmap_sg(priv->dev, areq->src,
+		     sg_nents_for_len(areq->src, areq->nbytes), DMA_TO_DEVICE);
+
+	safexcel_free_context(priv, async, sreq->state_sz);
+
+	cache_len = sreq->len - sreq->processed;
+	if (cache_len)
+		memcpy(sreq->cache, sreq->cache_next, cache_len);
+
+	*should_complete = true;
+
+	return 1;
+}
+
+static int safexcel_ahash_send(struct crypto_async_request *async, int ring,
+			       struct safexcel_request *request, int *commands,
+			       int *results)
+{
+	struct ahash_request *areq = ahash_request_cast(async);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	struct safexcel_command_desc *cdesc, *first_cdesc = NULL;
+	struct safexcel_result_desc *rdesc;
+	struct scatterlist *sg;
+	int i, nents, queued, len, cache_len, extra, n_cdesc = 0, ret = 0;
+
+	queued = len = req->len - req->processed;
+	if (queued < crypto_ahash_blocksize(ahash))
+		cache_len = queued;
+	else
+		cache_len = queued - areq->nbytes;
+
+	/*
+	 * If this is not the last request and the queued data does not fit
+	 * into full blocks, cache it for the next send() call.
+	 */
+	extra = queued & (crypto_ahash_blocksize(ahash) - 1);
+	if (!req->last_req && extra) {
+		sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
+				   req->cache_next, extra, areq->nbytes - extra);
+
+		queued -= extra;
+		len -= extra;
+	}
+
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+
+	/* Add a command descriptor for the cached data, if any */
+	if (cache_len) {
+		ctx->base.cache = kzalloc(cache_len, EIP197_GFP_FLAGS(*async));
+		if (!ctx->base.cache) {
+			ret = -ENOMEM;
+			goto unlock;
+		}
+		memcpy(ctx->base.cache, req->cache, cache_len);
+		ctx->base.cache_dma = dma_map_single(priv->dev, ctx->base.cache,
+						     cache_len, DMA_TO_DEVICE);
+		if (dma_mapping_error(priv->dev, ctx->base.cache_dma)) {
+			ret = -EINVAL;
+			goto free_cache;
+		}
+
+		ctx->base.cache_sz = cache_len;
+		first_cdesc = safexcel_add_cdesc(priv, ring, 1,
+						 (cache_len == len),
+						 ctx->base.cache_dma,
+						 cache_len, len,
+						 ctx->base.ctxr_dma);
+		if (IS_ERR(first_cdesc)) {
+			ret = PTR_ERR(first_cdesc);
+			goto unmap_cache;
+		}
+		n_cdesc++;
+
+		queued -= cache_len;
+		if (!queued)
+			goto send_command;
+	}
+
+	/* Now handle the current ahash request buffer(s) */
+	nents = dma_map_sg(priv->dev, areq->src,
+		       sg_nents_for_len(areq->src, areq->nbytes),
+		       DMA_TO_DEVICE);
+	if (!nents) {
+		ret = -ENOMEM;
+		goto cdesc_rollback;
+	}
+
+	for_each_sg(areq->src, sg, nents, i) {
+		int sglen = sg_dma_len(sg);
+
+		/* Do not overflow the request */
+		if (queued - sglen < 0)
+			sglen = queued;
+
+		cdesc = safexcel_add_cdesc(priv, ring, !n_cdesc,
+					   !(queued - sglen), sg_dma_address(sg),
+					   sglen, len, ctx->base.ctxr_dma);
+		if (IS_ERR(cdesc)) {
+			ret = PTR_ERR(cdesc);
+			goto cdesc_rollback;
+		}
+		n_cdesc++;
+
+		if (n_cdesc == 1)
+			first_cdesc = cdesc;
+
+		queued -= sglen;
+		if (!queued)
+			break;
+	}
+
+send_command:
+	/* Setup the context options */
+	safexcel_context_control(ctx, req, first_cdesc, req->state_sz,
+				 crypto_ahash_blocksize(ahash));
+
+	/* Add the token */
+	safexcel_hash_token(first_cdesc, len, req->state_sz);
+
+	ctx->base.result_dma = dma_map_single(priv->dev, areq->result,
+					      req->state_sz, DMA_FROM_DEVICE);
+	if (dma_mapping_error(priv->dev, ctx->base.result_dma)) {
+		ret = -EINVAL;
+		goto cdesc_rollback;
+	}
+
+	/* Add a result descriptor */
+	rdesc = safexcel_add_rdesc(priv, ring, 1, 1, ctx->base.result_dma,
+				   req->state_sz);
+	if (IS_ERR(rdesc)) {
+		ret = PTR_ERR(rdesc);
+		goto cdesc_rollback;
+	}
+
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+	req->processed += len;
+	request->req = &areq->base;
+	ctx->base.handle_result = safexcel_handle_result;
+
+	*commands = n_cdesc;
+	*results = 1;
+	return 0;
+
+cdesc_rollback:
+	for (i = 0; i < n_cdesc; i++)
+		safexcel_ring_rollback_wptr(priv, &priv->ring[ring].cdr);
+unmap_cache:
+	if (ctx->base.cache_dma) {
+		dma_unmap_single(priv->dev, ctx->base.cache_dma,
+				 ctx->base.cache_sz, DMA_TO_DEVICE);
+		ctx->base.cache_sz = 0;
+	}
+free_cache:
+	if (ctx->base.cache) {
+		kfree(ctx->base.cache);
+		ctx->base.cache = NULL;
+	}
+
+unlock:
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+	return ret;
+}
+
+static inline bool safexcel_ahash_needs_inv_get(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+	unsigned int state_w_sz = req->state_sz / sizeof(u32);
+	int i;
+
+	for (i = 0; i < state_w_sz; i++)
+		if (ctx->base.ctxr->data[i] != cpu_to_le32(req->state[i]))
+			return true;
+
+	if (ctx->base.ctxr->data[state_w_sz] !=
+	    cpu_to_le32(req->processed / crypto_ahash_blocksize(ahash)))
+		return true;
+
+	return false;
+}
+
+static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
+				      int ring,
+				      struct crypto_async_request *async,
+				      bool *should_complete, int *ret)
+{
+	struct safexcel_result_desc *rdesc;
+	struct ahash_request *areq = ahash_request_cast(async);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int enq_ret;
+
+	*ret = 0;
+
+	spin_lock_bh(&priv->ring[ring].egress_lock);
+	rdesc = safexcel_ring_next_rptr(priv, &priv->ring[ring].rdr);
+	if (IS_ERR(rdesc)) {
+		dev_err(priv->dev,
+			"hash: invalidate: could not retrieve the result descriptor\n");
+		*ret = PTR_ERR(rdesc);
+	} else if (rdesc->result_data.error_code) {
+		dev_err(priv->dev,
+			"hash: invalidate: result descriptor error (%d)\n",
+			rdesc->result_data.error_code);
+		*ret = -EINVAL;
+	}
+
+	safexcel_complete(priv, ring);
+	spin_unlock_bh(&priv->ring[ring].egress_lock);
+
+	if (ctx->base.exit_inv) {
+		dma_pool_free(priv->context_pool, ctx->base.ctxr,
+			      ctx->base.ctxr_dma);
+
+		*should_complete = true;
+		return 1;
+	}
+
+	ring = safexcel_select_ring(priv);
+	ctx->base.ring = ring;
+	ctx->base.needs_inv = false;
+	ctx->base.send = safexcel_ahash_send;
+
+	spin_lock_bh(&priv->ring[ring].queue_lock);
+	enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async);
+	spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+	if (enq_ret != -EINPROGRESS)
+		*ret = enq_ret;
+
+	if (!priv->ring[ring].need_dequeue)
+		safexcel_dequeue(priv, ring);
+
+	*should_complete = false;
+
+	return 1;
+}
+
+static int safexcel_ahash_send_inv(struct crypto_async_request *async,
+				   int ring, struct safexcel_request *request,
+				   int *commands, int *results)
+{
+	struct ahash_request *areq = ahash_request_cast(async);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	int ret;
+
+	ctx->base.handle_result = safexcel_handle_inv_result;
+	ret = safexcel_invalidate_cache(async, &ctx->base, ctx->priv,
+					ctx->base.ctxr_dma, ring, request);
+	if (unlikely(ret))
+		return ret;
+
+	*commands = 1;
+	*results = 1;
+
+	return 0;
+}
+
+static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	struct ahash_request req;
+	struct safexcel_inv_result result = { 0 };
+	int ring = ctx->base.ring;
+
+	memset(&req, 0, sizeof(struct ahash_request));
+
+	/* create invalidation request */
+	init_completion(&result.completion);
+	ahash_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				   safexcel_inv_complete, &result);
+
+	ahash_request_set_tfm(&req, __crypto_ahash_cast(tfm));
+	ctx = crypto_tfm_ctx(req.base.tfm);
+	ctx->base.exit_inv = true;
+	ctx->base.send = safexcel_ahash_send_inv;
+
+	spin_lock_bh(&priv->ring[ring].queue_lock);
+	crypto_enqueue_request(&priv->ring[ring].queue, &req.base);
+	spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+	if (!priv->ring[ring].need_dequeue)
+		safexcel_dequeue(priv, ring);
+
+	wait_for_completion_interruptible(&result.completion);
+
+	if (result.error) {
+		dev_warn(priv->dev, "hash: completion error (%d)\n",
+			 result.error);
+		return result.error;
+	}
+
+	return 0;
+}
+
+static int safexcel_ahash_cache(struct ahash_request *areq)
+{
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+	int queued, cache_len;
+
+	cache_len = req->len - areq->nbytes - req->processed;
+	queued = req->len - req->processed;
+
+	/*
+	 * In case there isn't enough bytes to proceed (less than a
+	 * block size), cache the data until we have enough.
+	 */
+	if (cache_len + areq->nbytes <= crypto_ahash_blocksize(ahash)) {
+		sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
+				   req->cache + cache_len,
+				   areq->nbytes, 0);
+		return areq->nbytes;
+	}
+
+	/* We could'nt cache all the data */
+	return -E2BIG;
+}
+
+static int safexcel_ahash_enqueue(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	int ret, ring;
+
+	ctx->base.send = safexcel_ahash_send;
+
+	if (req->processed && ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED)
+		ctx->base.needs_inv = safexcel_ahash_needs_inv_get(areq);
+
+	if (ctx->base.ctxr) {
+		if (ctx->base.needs_inv)
+			ctx->base.send = safexcel_ahash_send_inv;
+	} else {
+		ctx->base.ring = safexcel_select_ring(priv);
+		ctx->base.ctxr = dma_pool_zalloc(priv->context_pool,
+						 EIP197_GFP_FLAGS(areq->base),
+						 &ctx->base.ctxr_dma);
+		if (!ctx->base.ctxr)
+			return -ENOMEM;
+	}
+
+	ring = ctx->base.ring;
+
+	spin_lock_bh(&priv->ring[ring].queue_lock);
+	ret = crypto_enqueue_request(&priv->ring[ring].queue, &areq->base);
+	spin_unlock_bh(&priv->ring[ring].queue_lock);
+
+	if (!priv->ring[ring].need_dequeue)
+		safexcel_dequeue(priv, ring);
+
+	return ret;
+}
+
+static int safexcel_ahash_update(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+
+	/* If the request is 0 length, do nothing */
+	if (!areq->nbytes)
+		return 0;
+
+	req->len += areq->nbytes;
+
+	safexcel_ahash_cache(areq);
+
+	/*
+	 * We're not doing partial updates when performing an hmac request.
+	 * Everything will be handled by the final() call.
+	 */
+	if (ctx->digest == CONTEXT_CONTROL_DIGEST_HMAC)
+		return 0;
+
+	if (req->hmac)
+		return safexcel_ahash_enqueue(areq);
+
+	if (!req->last_req &&
+	    req->len - req->processed > crypto_ahash_blocksize(ahash))
+		return safexcel_ahash_enqueue(areq);
+
+	return 0;
+}
+
+static int safexcel_ahash_final(struct ahash_request *areq)
+{
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+
+	req->last_req = true;
+	req->finish = true;
+
+	/* If we have an overall 0 length request */
+	if (!(req->len + areq->nbytes)) {
+		if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA1)
+			memcpy(areq->result, sha1_zero_message_hash,
+			       SHA1_DIGEST_SIZE);
+		else if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA224)
+			memcpy(areq->result, sha224_zero_message_hash,
+			       SHA224_DIGEST_SIZE);
+		else if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA256)
+			memcpy(areq->result, sha256_zero_message_hash,
+			       SHA256_DIGEST_SIZE);
+
+		return 0;
+	}
+
+	return safexcel_ahash_enqueue(areq);
+}
+
+static int safexcel_ahash_finup(struct ahash_request *areq)
+{
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	req->last_req = true;
+	req->finish = true;
+
+	safexcel_ahash_update(areq);
+	return safexcel_ahash_final(areq);
+}
+
+static int safexcel_ahash_export(struct ahash_request *areq, void *out)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_export_state *export = out;
+
+	export->len = req->len;
+	export->processed = req->processed;
+
+	memcpy(export->state, req->state, req->state_sz);
+	memset(export->cache, 0, crypto_ahash_blocksize(ahash));
+	memcpy(export->cache, req->cache, crypto_ahash_blocksize(ahash));
+
+	return 0;
+}
+
+static int safexcel_ahash_import(struct ahash_request *areq, const void *in)
+{
+	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	const struct safexcel_ahash_export_state *export = in;
+	int ret;
+
+	ret = crypto_ahash_init(areq);
+	if (ret)
+		return ret;
+
+	req->len = export->len;
+	req->processed = export->processed;
+
+	memcpy(req->cache, export->cache, crypto_ahash_blocksize(ahash));
+	memcpy(req->state, export->state, req->state_sz);
+
+	return 0;
+}
+
+static int safexcel_ahash_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_alg_template *tmpl =
+		container_of(__crypto_ahash_alg(tfm->__crt_alg),
+			     struct safexcel_alg_template, alg.ahash);
+
+	ctx->priv = tmpl->priv;
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct safexcel_ahash_req));
+	return 0;
+}
+
+static int safexcel_sha1_init(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	req->state[0] = SHA1_H0;
+	req->state[1] = SHA1_H1;
+	req->state[2] = SHA1_H2;
+	req->state[3] = SHA1_H3;
+	req->state[4] = SHA1_H4;
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA1;
+	ctx->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->state_sz = SHA1_DIGEST_SIZE;
+
+	return 0;
+}
+
+static int safexcel_sha1_digest(struct ahash_request *areq)
+{
+	int ret = safexcel_sha1_init(areq);
+
+	if (ret)
+		return ret;
+
+	return safexcel_ahash_finup(areq);
+}
+
+static void safexcel_ahash_cra_exit(struct crypto_tfm *tfm)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	int ret;
+
+	/* context not allocated, skip invalidation */
+	if (!ctx->base.ctxr)
+		return;
+
+	ret = safexcel_ahash_exit_inv(tfm);
+	if (ret)
+		dev_warn(priv->dev, "hash: invalidation error %d\n", ret);
+}
+
+struct safexcel_alg_template safexcel_alg_sha1 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.alg.ahash = {
+		.init = safexcel_sha1_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_sha1_digest,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = SHA1_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "sha1",
+				.cra_driver_name = "safexcel-sha1",
+				.cra_priority = 300,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA1_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_ahash_cra_init,
+				.cra_exit = safexcel_ahash_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_hmac_sha1_init(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+
+	safexcel_sha1_init(areq);
+	ctx->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	return 0;
+}
+
+static int safexcel_hmac_sha1_digest(struct ahash_request *areq)
+{
+	int ret = safexcel_hmac_sha1_init(areq);
+
+	if (ret)
+		return ret;
+
+	return safexcel_ahash_finup(areq);
+}
+
+struct safexcel_ahash_result {
+	struct completion completion;
+	int error;
+};
+
+static void safexcel_ahash_complete(struct crypto_async_request *req, int error)
+{
+	struct safexcel_ahash_result *result = req->data;
+
+	if (error == -EINPROGRESS)
+		return;
+
+	result->error = error;
+	complete(&result->completion);
+}
+
+static int safexcel_hmac_init_pad(struct ahash_request *areq,
+				  unsigned int blocksize, const u8 *key,
+				  unsigned int keylen, u8 *ipad, u8 *opad)
+{
+	struct safexcel_ahash_result result;
+	struct scatterlist sg;
+	int ret, i;
+	u8 *keydup;
+
+	if (keylen <= blocksize) {
+		memcpy(ipad, key, keylen);
+	} else {
+		keydup = kmemdup(key, keylen, GFP_KERNEL);
+		if (!keydup)
+			return -ENOMEM;
+
+		ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_BACKLOG,
+					   safexcel_ahash_complete, &result);
+		sg_init_one(&sg, keydup, keylen);
+		ahash_request_set_crypt(areq, &sg, ipad, keylen);
+		init_completion(&result.completion);
+
+		ret = crypto_ahash_digest(areq);
+		if (ret == -EINPROGRESS) {
+			wait_for_completion_interruptible(&result.completion);
+			ret = result.error;
+		}
+
+		/* Avoid leaking */
+		memzero_explicit(keydup, keylen);
+		kfree(keydup);
+
+		if (ret)
+			return ret;
+
+		keylen = crypto_ahash_digestsize(crypto_ahash_reqtfm(areq));
+	}
+
+	memset(ipad + keylen, 0, blocksize - keylen);
+	memcpy(opad, ipad, blocksize);
+
+	for (i = 0; i < blocksize; i++) {
+		ipad[i] ^= HMAC_IPAD_VALUE;
+		opad[i] ^= HMAC_OPAD_VALUE;
+	}
+
+	return 0;
+}
+
+static int safexcel_hmac_init_iv(struct ahash_request *areq,
+				 unsigned int blocksize, u8 *pad, void *state)
+{
+	struct safexcel_ahash_result result;
+	struct safexcel_ahash_req *req;
+	struct scatterlist sg;
+	int ret;
+
+	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				   safexcel_ahash_complete, &result);
+	sg_init_one(&sg, pad, blocksize);
+	ahash_request_set_crypt(areq, &sg, pad, blocksize);
+	init_completion(&result.completion);
+
+	ret = crypto_ahash_init(areq);
+	if (ret)
+		return ret;
+
+	req = ahash_request_ctx(areq);
+	req->hmac = true;
+	req->last_req = true;
+
+	ret = crypto_ahash_update(areq);
+	if (ret && ret != -EINPROGRESS)
+		return ret;
+
+	wait_for_completion_interruptible(&result.completion);
+	if (result.error)
+		return result.error;
+
+	return crypto_ahash_export(areq, state);
+}
+
+static int safexcel_hmac_setkey(const char *alg, const u8 *key,
+				unsigned int keylen, void *istate, void *ostate)
+{
+	struct ahash_request *areq;
+	struct crypto_ahash *tfm;
+	unsigned int blocksize;
+	u8 *ipad, *opad;
+	int ret;
+
+	tfm = crypto_alloc_ahash(alg, CRYPTO_ALG_TYPE_AHASH,
+				 CRYPTO_ALG_TYPE_AHASH_MASK);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	areq = ahash_request_alloc(tfm, GFP_KERNEL);
+	if (!areq) {
+		ret = -ENOMEM;
+		goto free_ahash;
+	}
+
+	crypto_ahash_clear_flags(tfm, ~0);
+	blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+
+	ipad = kzalloc(2 * blocksize, GFP_KERNEL);
+	if (!ipad) {
+		ret = -ENOMEM;
+		goto free_request;
+	}
+
+	opad = ipad + blocksize;
+
+	ret = safexcel_hmac_init_pad(areq, blocksize, key, keylen, ipad, opad);
+	if (ret)
+		goto free_ipad;
+
+	ret = safexcel_hmac_init_iv(areq, blocksize, ipad, istate);
+	if (ret)
+		goto free_ipad;
+
+	ret = safexcel_hmac_init_iv(areq, blocksize, opad, ostate);
+
+free_ipad:
+	kfree(ipad);
+free_request:
+	ahash_request_free(areq);
+free_ahash:
+	crypto_free_ahash(tfm);
+
+	return ret;
+}
+
+static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key,
+				     unsigned int keylen)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct safexcel_ahash_export_state istate, ostate;
+	int ret, i;
+
+	ret = safexcel_hmac_setkey("safexcel-sha1", key, keylen, &istate, &ostate);
+	if (ret)
+		return ret;
+
+	memcpy(ctx->ipad, &istate.state, SHA1_DIGEST_SIZE);
+	memcpy(ctx->opad, &ostate.state, SHA1_DIGEST_SIZE);
+
+	for (i = 0; i < ARRAY_SIZE(istate.state); i++) {
+		if (ctx->ipad[i] != le32_to_cpu(istate.state[i]) ||
+		    ctx->opad[i] != le32_to_cpu(ostate.state[i])) {
+			ctx->base.needs_inv = true;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_hmac_sha1 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.alg.ahash = {
+		.init = safexcel_hmac_sha1_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_hmac_sha1_digest,
+		.setkey = safexcel_hmac_sha1_setkey,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = SHA1_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "hmac(sha1)",
+				.cra_driver_name = "safexcel-hmac-sha1",
+				.cra_priority = 300,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA1_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_ahash_cra_init,
+				.cra_exit = safexcel_ahash_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_sha256_init(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	req->state[0] = SHA256_H0;
+	req->state[1] = SHA256_H1;
+	req->state[2] = SHA256_H2;
+	req->state[3] = SHA256_H3;
+	req->state[4] = SHA256_H4;
+	req->state[5] = SHA256_H5;
+	req->state[6] = SHA256_H6;
+	req->state[7] = SHA256_H7;
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA256;
+	ctx->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->state_sz = SHA256_DIGEST_SIZE;
+
+	return 0;
+}
+
+static int safexcel_sha256_digest(struct ahash_request *areq)
+{
+	int ret = safexcel_sha256_init(areq);
+
+	if (ret)
+		return ret;
+
+	return safexcel_ahash_finup(areq);
+}
+
+struct safexcel_alg_template safexcel_alg_sha256 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.alg.ahash = {
+		.init = safexcel_sha256_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_sha256_digest,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = SHA256_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "sha256",
+				.cra_driver_name = "safexcel-sha256",
+				.cra_priority = 300,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA256_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_ahash_cra_init,
+				.cra_exit = safexcel_ahash_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};
+
+static int safexcel_sha224_init(struct ahash_request *areq)
+{
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
+	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+
+	memset(req, 0, sizeof(*req));
+
+	req->state[0] = SHA224_H0;
+	req->state[1] = SHA224_H1;
+	req->state[2] = SHA224_H2;
+	req->state[3] = SHA224_H3;
+	req->state[4] = SHA224_H4;
+	req->state[5] = SHA224_H5;
+	req->state[6] = SHA224_H6;
+	req->state[7] = SHA224_H7;
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA224;
+	ctx->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->state_sz = SHA256_DIGEST_SIZE;
+
+	return 0;
+}
+
+static int safexcel_sha224_digest(struct ahash_request *areq)
+{
+	int ret = safexcel_sha224_init(areq);
+
+	if (ret)
+		return ret;
+
+	return safexcel_ahash_finup(areq);
+}
+
+struct safexcel_alg_template safexcel_alg_sha224 = {
+	.type = SAFEXCEL_ALG_TYPE_AHASH,
+	.alg.ahash = {
+		.init = safexcel_sha224_init,
+		.update = safexcel_ahash_update,
+		.final = safexcel_ahash_final,
+		.finup = safexcel_ahash_finup,
+		.digest = safexcel_sha224_digest,
+		.export = safexcel_ahash_export,
+		.import = safexcel_ahash_import,
+		.halg = {
+			.digestsize = SHA224_DIGEST_SIZE,
+			.statesize = sizeof(struct safexcel_ahash_export_state),
+			.base = {
+				.cra_name = "sha224",
+				.cra_driver_name = "safexcel-sha224",
+				.cra_priority = 300,
+				.cra_flags = CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_KERN_DRIVER_ONLY,
+				.cra_blocksize = SHA224_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct safexcel_ahash_ctx),
+				.cra_init = safexcel_ahash_cra_init,
+				.cra_exit = safexcel_ahash_cra_exit,
+				.cra_module = THIS_MODULE,
+			},
+		},
+	},
+};

+ 157 - 0
drivers/crypto/inside-secure/safexcel_ring.c

@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2017 Marvell
+ *
+ * Antoine Tenart <antoine.tenart@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+
+#include "safexcel.h"
+
+int safexcel_init_ring_descriptors(struct safexcel_crypto_priv *priv,
+				   struct safexcel_ring *cdr,
+				   struct safexcel_ring *rdr)
+{
+	cdr->offset = sizeof(u32) * priv->config.cd_offset;
+	cdr->base = dmam_alloc_coherent(priv->dev,
+					cdr->offset * EIP197_DEFAULT_RING_SIZE,
+					&cdr->base_dma, GFP_KERNEL);
+	if (!cdr->base)
+		return -ENOMEM;
+	cdr->write = cdr->base;
+	cdr->base_end = cdr->base + cdr->offset * EIP197_DEFAULT_RING_SIZE;
+	cdr->read = cdr->base;
+
+	rdr->offset = sizeof(u32) * priv->config.rd_offset;
+	rdr->base = dmam_alloc_coherent(priv->dev,
+					rdr->offset * EIP197_DEFAULT_RING_SIZE,
+					&rdr->base_dma, GFP_KERNEL);
+	if (!rdr->base)
+		return -ENOMEM;
+	rdr->write = rdr->base;
+	rdr->base_end = rdr->base + rdr->offset * EIP197_DEFAULT_RING_SIZE;
+	rdr->read = rdr->base;
+
+	return 0;
+}
+
+inline int safexcel_select_ring(struct safexcel_crypto_priv *priv)
+{
+	return (atomic_inc_return(&priv->ring_used) % priv->config.rings);
+}
+
+static void *safexcel_ring_next_wptr(struct safexcel_crypto_priv *priv,
+				     struct safexcel_ring *ring)
+{
+	void *ptr = ring->write;
+
+	if (ring->nr == EIP197_DEFAULT_RING_SIZE - 1)
+		return ERR_PTR(-ENOMEM);
+
+	ring->write += ring->offset;
+	if (ring->write == ring->base_end)
+		ring->write = ring->base;
+
+	ring->nr++;
+	return ptr;
+}
+
+void *safexcel_ring_next_rptr(struct safexcel_crypto_priv *priv,
+			      struct safexcel_ring *ring)
+{
+	void *ptr = ring->read;
+
+	if (!ring->nr)
+		return ERR_PTR(-ENOENT);
+
+	ring->read += ring->offset;
+	if (ring->read == ring->base_end)
+		ring->read = ring->base;
+
+	ring->nr--;
+	return ptr;
+}
+
+void safexcel_ring_rollback_wptr(struct safexcel_crypto_priv *priv,
+				 struct safexcel_ring *ring)
+{
+	if (!ring->nr)
+		return;
+
+	if (ring->write == ring->base)
+		ring->write = ring->base_end - ring->offset;
+	else
+		ring->write -= ring->offset;
+
+	ring->nr--;
+}
+
+struct safexcel_command_desc *safexcel_add_cdesc(struct safexcel_crypto_priv *priv,
+						 int ring_id,
+						 bool first, bool last,
+						 dma_addr_t data, u32 data_len,
+						 u32 full_data_len,
+						 dma_addr_t context) {
+	struct safexcel_command_desc *cdesc;
+	int i;
+
+	cdesc = safexcel_ring_next_wptr(priv, &priv->ring[ring_id].cdr);
+	if (IS_ERR(cdesc))
+		return cdesc;
+
+	memset(cdesc, 0, sizeof(struct safexcel_command_desc));
+
+	cdesc->first_seg = first;
+	cdesc->last_seg = last;
+	cdesc->particle_size = data_len;
+	cdesc->data_lo = lower_32_bits(data);
+	cdesc->data_hi = upper_32_bits(data);
+
+	if (first && context) {
+		struct safexcel_token *token =
+			(struct safexcel_token *)cdesc->control_data.token;
+
+		cdesc->control_data.packet_length = full_data_len;
+		cdesc->control_data.options = EIP197_OPTION_MAGIC_VALUE |
+					      EIP197_OPTION_64BIT_CTX |
+					      EIP197_OPTION_CTX_CTRL_IN_CMD;
+		cdesc->control_data.context_lo =
+			(lower_32_bits(context) & GENMASK(31, 2)) >> 2;
+		cdesc->control_data.context_hi = upper_32_bits(context);
+
+		/* TODO: large xform HMAC with SHA-384/512 uses refresh = 3 */
+		cdesc->control_data.refresh = 2;
+
+		for (i = 0; i < EIP197_MAX_TOKENS; i++)
+			eip197_noop_token(&token[i]);
+	}
+
+	return cdesc;
+}
+
+struct safexcel_result_desc *safexcel_add_rdesc(struct safexcel_crypto_priv *priv,
+						int ring_id,
+						bool first, bool last,
+						dma_addr_t data, u32 len)
+{
+	struct safexcel_result_desc *rdesc;
+
+	rdesc = safexcel_ring_next_wptr(priv, &priv->ring[ring_id].rdr);
+	if (IS_ERR(rdesc))
+		return rdesc;
+
+	memset(rdesc, 0, sizeof(struct safexcel_result_desc));
+
+	rdesc->first_seg = first;
+	rdesc->last_seg = last;
+	rdesc->particle_size = len;
+	rdesc->data_lo = lower_32_bits(data);
+	rdesc->data_hi = upper_32_bits(data);
+
+	return rdesc;
+}

+ 1 - 2
drivers/crypto/ixp4xx_crypto.c

@@ -23,6 +23,7 @@
 #include <crypto/ctr.h>
 #include <crypto/ctr.h>
 #include <crypto/des.h>
 #include <crypto/des.h>
 #include <crypto/aes.h>
 #include <crypto/aes.h>
+#include <crypto/hmac.h>
 #include <crypto/sha.h>
 #include <crypto/sha.h>
 #include <crypto/algapi.h>
 #include <crypto/algapi.h>
 #include <crypto/internal/aead.h>
 #include <crypto/internal/aead.h>
@@ -90,8 +91,6 @@
 #define CTL_FLAG_PERFORM_AEAD	0x0008
 #define CTL_FLAG_PERFORM_AEAD	0x0008
 #define CTL_FLAG_MASK		0x000f
 #define CTL_FLAG_MASK		0x000f
 
 
-#define HMAC_IPAD_VALUE   0x36
-#define HMAC_OPAD_VALUE   0x5C
 #define HMAC_PAD_BLOCKLEN SHA1_BLOCK_SIZE
 #define HMAC_PAD_BLOCKLEN SHA1_BLOCK_SIZE
 
 
 #define MD5_DIGEST_SIZE   16
 #define MD5_DIGEST_SIZE   16

+ 3 - 2
drivers/crypto/marvell/hash.c

@@ -12,6 +12,7 @@
  * by the Free Software Foundation.
  * by the Free Software Foundation.
  */
  */
 
 
+#include <crypto/hmac.h>
 #include <crypto/md5.h>
 #include <crypto/md5.h>
 #include <crypto/sha.h>
 #include <crypto/sha.h>
 
 
@@ -1164,8 +1165,8 @@ static int mv_cesa_ahmac_pad_init(struct ahash_request *req,
 	memcpy(opad, ipad, blocksize);
 	memcpy(opad, ipad, blocksize);
 
 
 	for (i = 0; i < blocksize; i++) {
 	for (i = 0; i < blocksize; i++) {
-		ipad[i] ^= 0x36;
-		opad[i] ^= 0x5c;
+		ipad[i] ^= HMAC_IPAD_VALUE;
+		opad[i] ^= HMAC_OPAD_VALUE;
 	}
 	}
 
 
 	return 0;
 	return 0;

+ 1 - 10
drivers/crypto/mediatek/mtk-platform.c

@@ -504,19 +504,14 @@ static int mtk_crypto_probe(struct platform_device *pdev)
 		}
 		}
 	}
 	}
 
 
-	cryp->clk_ethif = devm_clk_get(&pdev->dev, "ethif");
 	cryp->clk_cryp = devm_clk_get(&pdev->dev, "cryp");
 	cryp->clk_cryp = devm_clk_get(&pdev->dev, "cryp");
-	if (IS_ERR(cryp->clk_ethif) || IS_ERR(cryp->clk_cryp))
+	if (IS_ERR(cryp->clk_cryp))
 		return -EPROBE_DEFER;
 		return -EPROBE_DEFER;
 
 
 	cryp->dev = &pdev->dev;
 	cryp->dev = &pdev->dev;
 	pm_runtime_enable(cryp->dev);
 	pm_runtime_enable(cryp->dev);
 	pm_runtime_get_sync(cryp->dev);
 	pm_runtime_get_sync(cryp->dev);
 
 
-	err = clk_prepare_enable(cryp->clk_ethif);
-	if (err)
-		goto err_clk_ethif;
-
 	err = clk_prepare_enable(cryp->clk_cryp);
 	err = clk_prepare_enable(cryp->clk_cryp);
 	if (err)
 	if (err)
 		goto err_clk_cryp;
 		goto err_clk_cryp;
@@ -559,8 +554,6 @@ err_engine:
 err_resource:
 err_resource:
 	clk_disable_unprepare(cryp->clk_cryp);
 	clk_disable_unprepare(cryp->clk_cryp);
 err_clk_cryp:
 err_clk_cryp:
-	clk_disable_unprepare(cryp->clk_ethif);
-err_clk_ethif:
 	pm_runtime_put_sync(cryp->dev);
 	pm_runtime_put_sync(cryp->dev);
 	pm_runtime_disable(cryp->dev);
 	pm_runtime_disable(cryp->dev);
 
 
@@ -576,7 +569,6 @@ static int mtk_crypto_remove(struct platform_device *pdev)
 	mtk_desc_dma_free(cryp);
 	mtk_desc_dma_free(cryp);
 
 
 	clk_disable_unprepare(cryp->clk_cryp);
 	clk_disable_unprepare(cryp->clk_cryp);
-	clk_disable_unprepare(cryp->clk_ethif);
 
 
 	pm_runtime_put_sync(cryp->dev);
 	pm_runtime_put_sync(cryp->dev);
 	pm_runtime_disable(cryp->dev);
 	pm_runtime_disable(cryp->dev);
@@ -596,7 +588,6 @@ static struct platform_driver mtk_crypto_driver = {
 	.remove = mtk_crypto_remove,
 	.remove = mtk_crypto_remove,
 	.driver = {
 	.driver = {
 		   .name = "mtk-crypto",
 		   .name = "mtk-crypto",
-		   .owner = THIS_MODULE,
 		   .of_match_table = of_crypto_id,
 		   .of_match_table = of_crypto_id,
 	},
 	},
 };
 };

+ 0 - 2
drivers/crypto/mediatek/mtk-platform.h

@@ -200,7 +200,6 @@ struct mtk_sha_rec {
  * struct mtk_cryp - Cryptographic device
  * struct mtk_cryp - Cryptographic device
  * @base:	pointer to mapped register I/O base
  * @base:	pointer to mapped register I/O base
  * @dev:	pointer to device
  * @dev:	pointer to device
- * @clk_ethif:	pointer to ethif clock
  * @clk_cryp:	pointer to crypto clock
  * @clk_cryp:	pointer to crypto clock
  * @irq:	global system and rings IRQ
  * @irq:	global system and rings IRQ
  * @ring:	pointer to descriptor rings
  * @ring:	pointer to descriptor rings
@@ -215,7 +214,6 @@ struct mtk_sha_rec {
 struct mtk_cryp {
 struct mtk_cryp {
 	void __iomem *base;
 	void __iomem *base;
 	struct device *dev;
 	struct device *dev;
-	struct clk *clk_ethif;
 	struct clk *clk_cryp;
 	struct clk *clk_cryp;
 	int irq[MTK_IRQ_NUM];
 	int irq[MTK_IRQ_NUM];
 
 

+ 3 - 2
drivers/crypto/mediatek/mtk-sha.c

@@ -12,6 +12,7 @@
  * Some ideas are from atmel-sha.c and omap-sham.c drivers.
  * Some ideas are from atmel-sha.c and omap-sham.c drivers.
  */
  */
 
 
+#include <crypto/hmac.h>
 #include <crypto/sha.h>
 #include <crypto/sha.h>
 #include "mtk-platform.h"
 #include "mtk-platform.h"
 
 
@@ -825,8 +826,8 @@ static int mtk_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
 	memcpy(bctx->opad, bctx->ipad, bs);
 	memcpy(bctx->opad, bctx->ipad, bs);
 
 
 	for (i = 0; i < bs; i++) {
 	for (i = 0; i < bs; i++) {
-		bctx->ipad[i] ^= 0x36;
-		bctx->opad[i] ^= 0x5c;
+		bctx->ipad[i] ^= HMAC_IPAD_VALUE;
+		bctx->opad[i] ^= HMAC_OPAD_VALUE;
 	}
 	}
 
 
 	return 0;
 	return 0;

+ 3 - 2
drivers/crypto/mv_cesa.c

@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/clk.h>
 #include <linux/clk.h>
+#include <crypto/hmac.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <crypto/sha.h>
 #include <linux/of.h>
 #include <linux/of.h>
@@ -822,8 +823,8 @@ static int mv_hash_setkey(struct crypto_ahash *tfm, const u8 * key,
 		memcpy(opad, ipad, bs);
 		memcpy(opad, ipad, bs);
 
 
 		for (i = 0; i < bs; i++) {
 		for (i = 0; i < bs; i++) {
-			ipad[i] ^= 0x36;
-			opad[i] ^= 0x5c;
+			ipad[i] ^= HMAC_IPAD_VALUE;
+			opad[i] ^= HMAC_OPAD_VALUE;
 		}
 		}
 
 
 		rc = crypto_shash_init(shash) ? :
 		rc = crypto_shash_init(shash) ? :

+ 2 - 2
drivers/crypto/n2_core.c

@@ -2169,7 +2169,7 @@ static int n2_mau_remove(struct platform_device *dev)
 	return 0;
 	return 0;
 }
 }
 
 
-static struct of_device_id n2_crypto_match[] = {
+static const struct of_device_id n2_crypto_match[] = {
 	{
 	{
 		.name = "n2cp",
 		.name = "n2cp",
 		.compatible = "SUNW,n2-cwq",
 		.compatible = "SUNW,n2-cwq",
@@ -2196,7 +2196,7 @@ static struct platform_driver n2_crypto_driver = {
 	.remove		=	n2_crypto_remove,
 	.remove		=	n2_crypto_remove,
 };
 };
 
 
-static struct of_device_id n2_mau_match[] = {
+static const struct of_device_id n2_mau_match[] = {
 	{
 	{
 		.name = "ncp",
 		.name = "ncp",
 		.compatible = "SUNW,n2-mau",
 		.compatible = "SUNW,n2-mau",

+ 408 - 0
drivers/crypto/omap-aes-gcm.c

@@ -0,0 +1,408 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for OMAP AES GCM HW acceleration.
+ *
+ * Copyright (c) 2016 Texas Instruments Incorporated
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/omap-dma.h>
+#include <linux/interrupt.h>
+#include <crypto/aes.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/skcipher.h>
+#include <crypto/internal/aead.h>
+
+#include "omap-crypto.h"
+#include "omap-aes.h"
+
+static int omap_aes_gcm_handle_queue(struct omap_aes_dev *dd,
+				     struct aead_request *req);
+
+static void omap_aes_gcm_finish_req(struct omap_aes_dev *dd, int ret)
+{
+	struct aead_request *req = dd->aead_req;
+
+	dd->flags &= ~FLAGS_BUSY;
+	dd->in_sg = NULL;
+	dd->out_sg = NULL;
+
+	req->base.complete(&req->base, ret);
+}
+
+static void omap_aes_gcm_done_task(struct omap_aes_dev *dd)
+{
+	u8 *tag;
+	int alen, clen, i, ret = 0, nsg;
+	struct omap_aes_reqctx *rctx;
+
+	alen = ALIGN(dd->assoc_len, AES_BLOCK_SIZE);
+	clen = ALIGN(dd->total, AES_BLOCK_SIZE);
+	rctx = aead_request_ctx(dd->aead_req);
+
+	nsg = !!(dd->assoc_len && dd->total);
+
+	dma_sync_sg_for_device(dd->dev, dd->out_sg, dd->out_sg_len,
+			       DMA_FROM_DEVICE);
+	dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE);
+	dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len, DMA_FROM_DEVICE);
+	omap_aes_crypt_dma_stop(dd);
+
+	omap_crypto_cleanup(dd->out_sg, dd->orig_out,
+			    dd->aead_req->assoclen, dd->total,
+			    FLAGS_OUT_DATA_ST_SHIFT, dd->flags);
+
+	if (dd->flags & FLAGS_ENCRYPT)
+		scatterwalk_map_and_copy(rctx->auth_tag,
+					 dd->aead_req->dst,
+					 dd->total + dd->aead_req->assoclen,
+					 dd->authsize, 1);
+
+	omap_crypto_cleanup(&dd->in_sgl[0], NULL, 0, alen,
+			    FLAGS_ASSOC_DATA_ST_SHIFT, dd->flags);
+
+	omap_crypto_cleanup(&dd->in_sgl[nsg], NULL, 0, clen,
+			    FLAGS_IN_DATA_ST_SHIFT, dd->flags);
+
+	if (!(dd->flags & FLAGS_ENCRYPT)) {
+		tag = (u8 *)rctx->auth_tag;
+		for (i = 0; i < dd->authsize; i++) {
+			if (tag[i]) {
+				dev_err(dd->dev, "GCM decryption: Tag Message is wrong\n");
+				ret = -EBADMSG;
+			}
+		}
+	}
+
+	omap_aes_gcm_finish_req(dd, ret);
+	omap_aes_gcm_handle_queue(dd, NULL);
+}
+
+static int omap_aes_gcm_copy_buffers(struct omap_aes_dev *dd,
+				     struct aead_request *req)
+{
+	int alen, clen, cryptlen, assoclen, ret;
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	unsigned int authlen = crypto_aead_authsize(aead);
+	struct scatterlist *tmp, sg_arr[2];
+	int nsg;
+	u16 flags;
+
+	assoclen = req->assoclen;
+	cryptlen = req->cryptlen;
+
+	if (dd->flags & FLAGS_RFC4106_GCM)
+		assoclen -= 8;
+
+	if (!(dd->flags & FLAGS_ENCRYPT))
+		cryptlen -= authlen;
+
+	alen = ALIGN(assoclen, AES_BLOCK_SIZE);
+	clen = ALIGN(cryptlen, AES_BLOCK_SIZE);
+
+	nsg = !!(assoclen && cryptlen);
+
+	omap_aes_clear_copy_flags(dd);
+
+	sg_init_table(dd->in_sgl, nsg + 1);
+	if (assoclen) {
+		tmp = req->src;
+		ret = omap_crypto_align_sg(&tmp, assoclen,
+					   AES_BLOCK_SIZE, dd->in_sgl,
+					   OMAP_CRYPTO_COPY_DATA |
+					   OMAP_CRYPTO_ZERO_BUF |
+					   OMAP_CRYPTO_FORCE_SINGLE_ENTRY,
+					   FLAGS_ASSOC_DATA_ST_SHIFT,
+					   &dd->flags);
+	}
+
+	if (cryptlen) {
+		tmp = scatterwalk_ffwd(sg_arr, req->src, req->assoclen);
+
+		ret = omap_crypto_align_sg(&tmp, cryptlen,
+					   AES_BLOCK_SIZE, &dd->in_sgl[nsg],
+					   OMAP_CRYPTO_COPY_DATA |
+					   OMAP_CRYPTO_ZERO_BUF |
+					   OMAP_CRYPTO_FORCE_SINGLE_ENTRY,
+					   FLAGS_IN_DATA_ST_SHIFT,
+					   &dd->flags);
+	}
+
+	dd->in_sg = dd->in_sgl;
+	dd->total = cryptlen;
+	dd->assoc_len = assoclen;
+	dd->authsize = authlen;
+
+	dd->out_sg = req->dst;
+	dd->orig_out = req->dst;
+
+	dd->out_sg = scatterwalk_ffwd(sg_arr, req->dst, assoclen);
+
+	flags = 0;
+	if (req->src == req->dst || dd->out_sg == sg_arr)
+		flags |= OMAP_CRYPTO_FORCE_COPY;
+
+	ret = omap_crypto_align_sg(&dd->out_sg, cryptlen,
+				   AES_BLOCK_SIZE, &dd->out_sgl,
+				   flags,
+				   FLAGS_OUT_DATA_ST_SHIFT, &dd->flags);
+	if (ret)
+		return ret;
+
+	dd->in_sg_len = sg_nents_for_len(dd->in_sg, alen + clen);
+	dd->out_sg_len = sg_nents_for_len(dd->out_sg, clen);
+
+	return 0;
+}
+
+static void omap_aes_gcm_complete(struct crypto_async_request *req, int err)
+{
+	struct omap_aes_gcm_result *res = req->data;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	res->err = err;
+	complete(&res->completion);
+}
+
+static int do_encrypt_iv(struct aead_request *req, u32 *tag, u32 *iv)
+{
+	struct scatterlist iv_sg, tag_sg;
+	struct skcipher_request *sk_req;
+	struct omap_aes_gcm_result result;
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	int ret = 0;
+
+	sk_req = skcipher_request_alloc(ctx->ctr, GFP_KERNEL);
+	if (!sk_req) {
+		pr_err("skcipher: Failed to allocate request\n");
+		return -1;
+	}
+
+	init_completion(&result.completion);
+
+	sg_init_one(&iv_sg, iv, AES_BLOCK_SIZE);
+	sg_init_one(&tag_sg, tag, AES_BLOCK_SIZE);
+	skcipher_request_set_callback(sk_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      omap_aes_gcm_complete, &result);
+	ret = crypto_skcipher_setkey(ctx->ctr, (u8 *)ctx->key, ctx->keylen);
+	skcipher_request_set_crypt(sk_req, &iv_sg, &tag_sg, AES_BLOCK_SIZE,
+				   NULL);
+	ret = crypto_skcipher_encrypt(sk_req);
+	switch (ret) {
+	case 0:
+		break;
+	case -EINPROGRESS:
+	case -EBUSY:
+		ret = wait_for_completion_interruptible(&result.completion);
+		if (!ret) {
+			ret = result.err;
+			if (!ret) {
+				reinit_completion(&result.completion);
+				break;
+			}
+		}
+		/* fall through */
+	default:
+		pr_err("Encryption of IV failed for GCM mode");
+		break;
+	}
+
+	skcipher_request_free(sk_req);
+	return ret;
+}
+
+void omap_aes_gcm_dma_out_callback(void *data)
+{
+	struct omap_aes_dev *dd = data;
+	struct omap_aes_reqctx *rctx;
+	int i, val;
+	u32 *auth_tag, tag[4];
+
+	if (!(dd->flags & FLAGS_ENCRYPT))
+		scatterwalk_map_and_copy(tag, dd->aead_req->src,
+					 dd->total + dd->aead_req->assoclen,
+					 dd->authsize, 0);
+
+	rctx = aead_request_ctx(dd->aead_req);
+	auth_tag = (u32 *)rctx->auth_tag;
+	for (i = 0; i < 4; i++) {
+		val = omap_aes_read(dd, AES_REG_TAG_N(dd, i));
+		auth_tag[i] = val ^ auth_tag[i];
+		if (!(dd->flags & FLAGS_ENCRYPT))
+			auth_tag[i] = auth_tag[i] ^ tag[i];
+	}
+
+	omap_aes_gcm_done_task(dd);
+}
+
+static int omap_aes_gcm_handle_queue(struct omap_aes_dev *dd,
+				     struct aead_request *req)
+{
+	struct omap_aes_ctx *ctx;
+	struct aead_request *backlog;
+	struct omap_aes_reqctx *rctx;
+	unsigned long flags;
+	int err, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (req)
+		ret = aead_enqueue_request(&dd->aead_queue, req);
+	if (dd->flags & FLAGS_BUSY) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+
+	backlog = aead_get_backlog(&dd->aead_queue);
+	req = aead_dequeue_request(&dd->aead_queue);
+	if (req)
+		dd->flags |= FLAGS_BUSY;
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!req)
+		return ret;
+
+	if (backlog)
+		backlog->base.complete(&backlog->base, -EINPROGRESS);
+
+	ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	rctx = aead_request_ctx(req);
+
+	dd->ctx = ctx;
+	rctx->dd = dd;
+	dd->aead_req = req;
+
+	rctx->mode &= FLAGS_MODE_MASK;
+	dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode;
+
+	err = omap_aes_gcm_copy_buffers(dd, req);
+	if (err)
+		return err;
+
+	err = omap_aes_write_ctrl(dd);
+	if (!err)
+		err = omap_aes_crypt_dma_start(dd);
+
+	if (err) {
+		omap_aes_gcm_finish_req(dd, err);
+		omap_aes_gcm_handle_queue(dd, NULL);
+	}
+
+	return ret;
+}
+
+static int omap_aes_gcm_crypt(struct aead_request *req, unsigned long mode)
+{
+	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	unsigned int authlen = crypto_aead_authsize(aead);
+	struct omap_aes_dev *dd;
+	__be32 counter = cpu_to_be32(1);
+	int err, assoclen;
+
+	memset(rctx->auth_tag, 0, sizeof(rctx->auth_tag));
+	memcpy(rctx->iv + 12, &counter, 4);
+
+	err = do_encrypt_iv(req, (u32 *)rctx->auth_tag, (u32 *)rctx->iv);
+	if (err)
+		return err;
+
+	if (mode & FLAGS_RFC4106_GCM)
+		assoclen = req->assoclen - 8;
+	else
+		assoclen = req->assoclen;
+	if (assoclen + req->cryptlen == 0) {
+		scatterwalk_map_and_copy(rctx->auth_tag, req->dst, 0, authlen,
+					 1);
+		return 0;
+	}
+
+	dd = omap_aes_find_dev(rctx);
+	if (!dd)
+		return -ENODEV;
+	rctx->mode = mode;
+
+	return omap_aes_gcm_handle_queue(dd, req);
+}
+
+int omap_aes_gcm_encrypt(struct aead_request *req)
+{
+	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
+
+	memcpy(rctx->iv, req->iv, 12);
+	return omap_aes_gcm_crypt(req, FLAGS_ENCRYPT | FLAGS_GCM);
+}
+
+int omap_aes_gcm_decrypt(struct aead_request *req)
+{
+	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
+
+	memcpy(rctx->iv, req->iv, 12);
+	return omap_aes_gcm_crypt(req, FLAGS_GCM);
+}
+
+int omap_aes_4106gcm_encrypt(struct aead_request *req)
+{
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
+
+	memcpy(rctx->iv, ctx->nonce, 4);
+	memcpy(rctx->iv + 4, req->iv, 8);
+	return omap_aes_gcm_crypt(req, FLAGS_ENCRYPT | FLAGS_GCM |
+				  FLAGS_RFC4106_GCM);
+}
+
+int omap_aes_4106gcm_decrypt(struct aead_request *req)
+{
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct omap_aes_reqctx *rctx = aead_request_ctx(req);
+
+	memcpy(rctx->iv, ctx->nonce, 4);
+	memcpy(rctx->iv + 4, req->iv, 8);
+	return omap_aes_gcm_crypt(req, FLAGS_GCM | FLAGS_RFC4106_GCM);
+}
+
+int omap_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
+			unsigned int keylen)
+{
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
+
+	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_256)
+		return -EINVAL;
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+int omap_aes_4106gcm_setkey(struct crypto_aead *tfm, const u8 *key,
+			    unsigned int keylen)
+{
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
+
+	if (keylen < 4)
+		return -EINVAL;
+
+	keylen -= 4;
+	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_256)
+		return -EINVAL;
+
+	memcpy(ctx->key, key, keylen);
+	memcpy(ctx->nonce, key + keylen, 4);
+	ctx->keylen = keylen;
+
+	return 0;
+}

+ 195 - 266
drivers/crypto/omap-aes.c

@@ -37,155 +37,10 @@
 #include <crypto/aes.h>
 #include <crypto/aes.h>
 #include <crypto/engine.h>
 #include <crypto/engine.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/internal/aead.h>
 
 
-#define DST_MAXBURST			4
-#define DMA_MIN				(DST_MAXBURST * sizeof(u32))
-
-#define _calc_walked(inout) (dd->inout##_walk.offset - dd->inout##_sg->offset)
-
-/* OMAP TRM gives bitfields as start:end, where start is the higher bit
-   number. For example 7:0 */
-#define FLD_MASK(start, end)	(((1 << ((start) - (end) + 1)) - 1) << (end))
-#define FLD_VAL(val, start, end) (((val) << (end)) & FLD_MASK(start, end))
-
-#define AES_REG_KEY(dd, x)		((dd)->pdata->key_ofs - \
-						((x ^ 0x01) * 0x04))
-#define AES_REG_IV(dd, x)		((dd)->pdata->iv_ofs + ((x) * 0x04))
-
-#define AES_REG_CTRL(dd)		((dd)->pdata->ctrl_ofs)
-#define AES_REG_CTRL_CTR_WIDTH_MASK	GENMASK(8, 7)
-#define AES_REG_CTRL_CTR_WIDTH_32	0
-#define AES_REG_CTRL_CTR_WIDTH_64	BIT(7)
-#define AES_REG_CTRL_CTR_WIDTH_96	BIT(8)
-#define AES_REG_CTRL_CTR_WIDTH_128	GENMASK(8, 7)
-#define AES_REG_CTRL_CTR		BIT(6)
-#define AES_REG_CTRL_CBC		BIT(5)
-#define AES_REG_CTRL_KEY_SIZE		GENMASK(4, 3)
-#define AES_REG_CTRL_DIRECTION		BIT(2)
-#define AES_REG_CTRL_INPUT_READY	BIT(1)
-#define AES_REG_CTRL_OUTPUT_READY	BIT(0)
-#define AES_REG_CTRL_MASK		GENMASK(24, 2)
-
-#define AES_REG_DATA_N(dd, x)		((dd)->pdata->data_ofs + ((x) * 0x04))
-
-#define AES_REG_REV(dd)			((dd)->pdata->rev_ofs)
-
-#define AES_REG_MASK(dd)		((dd)->pdata->mask_ofs)
-#define AES_REG_MASK_SIDLE		BIT(6)
-#define AES_REG_MASK_START		BIT(5)
-#define AES_REG_MASK_DMA_OUT_EN		BIT(3)
-#define AES_REG_MASK_DMA_IN_EN		BIT(2)
-#define AES_REG_MASK_SOFTRESET		BIT(1)
-#define AES_REG_AUTOIDLE		BIT(0)
-
-#define AES_REG_LENGTH_N(x)		(0x54 + ((x) * 0x04))
-
-#define AES_REG_IRQ_STATUS(dd)         ((dd)->pdata->irq_status_ofs)
-#define AES_REG_IRQ_ENABLE(dd)         ((dd)->pdata->irq_enable_ofs)
-#define AES_REG_IRQ_DATA_IN            BIT(1)
-#define AES_REG_IRQ_DATA_OUT           BIT(2)
-#define DEFAULT_TIMEOUT		(5*HZ)
-
-#define DEFAULT_AUTOSUSPEND_DELAY	1000
-
-#define FLAGS_MODE_MASK		0x000f
-#define FLAGS_ENCRYPT		BIT(0)
-#define FLAGS_CBC		BIT(1)
-#define FLAGS_GIV		BIT(2)
-#define FLAGS_CTR		BIT(3)
-
-#define FLAGS_INIT		BIT(4)
-#define FLAGS_FAST		BIT(5)
-#define FLAGS_BUSY		BIT(6)
-
-#define AES_BLOCK_WORDS		(AES_BLOCK_SIZE >> 2)
-
-struct omap_aes_ctx {
-	struct omap_aes_dev *dd;
-
-	int		keylen;
-	u32		key[AES_KEYSIZE_256 / sizeof(u32)];
-	unsigned long	flags;
-	struct crypto_skcipher	*fallback;
-};
-
-struct omap_aes_reqctx {
-	unsigned long mode;
-};
-
-#define OMAP_AES_QUEUE_LENGTH	1
-#define OMAP_AES_CACHE_SIZE	0
-
-struct omap_aes_algs_info {
-	struct crypto_alg	*algs_list;
-	unsigned int		size;
-	unsigned int		registered;
-};
-
-struct omap_aes_pdata {
-	struct omap_aes_algs_info	*algs_info;
-	unsigned int	algs_info_size;
-
-	void		(*trigger)(struct omap_aes_dev *dd, int length);
-
-	u32		key_ofs;
-	u32		iv_ofs;
-	u32		ctrl_ofs;
-	u32		data_ofs;
-	u32		rev_ofs;
-	u32		mask_ofs;
-	u32             irq_enable_ofs;
-	u32             irq_status_ofs;
-
-	u32		dma_enable_in;
-	u32		dma_enable_out;
-	u32		dma_start;
-
-	u32		major_mask;
-	u32		major_shift;
-	u32		minor_mask;
-	u32		minor_shift;
-};
-
-struct omap_aes_dev {
-	struct list_head	list;
-	unsigned long		phys_base;
-	void __iomem		*io_base;
-	struct omap_aes_ctx	*ctx;
-	struct device		*dev;
-	unsigned long		flags;
-	int			err;
-
-	struct tasklet_struct	done_task;
-
-	struct ablkcipher_request	*req;
-	struct crypto_engine		*engine;
-
-	/*
-	 * total is used by PIO mode for book keeping so introduce
-	 * variable total_save as need it to calc page_order
-	 */
-	size_t				total;
-	size_t				total_save;
-
-	struct scatterlist		*in_sg;
-	struct scatterlist		*out_sg;
-
-	/* Buffers for copying for unaligned cases */
-	struct scatterlist		in_sgl;
-	struct scatterlist		out_sgl;
-	struct scatterlist		*orig_out;
-	int				sgs_copied;
-
-	struct scatter_walk		in_walk;
-	struct scatter_walk		out_walk;
-	struct dma_chan		*dma_lch_in;
-	struct dma_chan		*dma_lch_out;
-	int			in_sg_len;
-	int			out_sg_len;
-	int			pio_only;
-	const struct omap_aes_pdata	*pdata;
-};
+#include "omap-crypto.h"
+#include "omap-aes.h"
 
 
 /* keep registered devices data here */
 /* keep registered devices data here */
 static LIST_HEAD(dev_list);
 static LIST_HEAD(dev_list);
@@ -201,7 +56,7 @@ static DEFINE_SPINLOCK(list_lock);
 	_read_ret;						\
 	_read_ret;						\
 })
 })
 #else
 #else
-static inline u32 omap_aes_read(struct omap_aes_dev *dd, u32 offset)
+inline u32 omap_aes_read(struct omap_aes_dev *dd, u32 offset)
 {
 {
 	return __raw_readl(dd->io_base + offset);
 	return __raw_readl(dd->io_base + offset);
 }
 }
@@ -215,7 +70,7 @@ static inline u32 omap_aes_read(struct omap_aes_dev *dd, u32 offset)
 		__raw_writel(value, dd->io_base + offset);		\
 		__raw_writel(value, dd->io_base + offset);		\
 	} while (0)
 	} while (0)
 #else
 #else
-static inline void omap_aes_write(struct omap_aes_dev *dd, u32 offset,
+inline void omap_aes_write(struct omap_aes_dev *dd, u32 offset,
 				  u32 value)
 				  u32 value)
 {
 {
 	__raw_writel(value, dd->io_base + offset);
 	__raw_writel(value, dd->io_base + offset);
@@ -258,8 +113,16 @@ static int omap_aes_hw_init(struct omap_aes_dev *dd)
 	return 0;
 	return 0;
 }
 }
 
 
-static int omap_aes_write_ctrl(struct omap_aes_dev *dd)
+void omap_aes_clear_copy_flags(struct omap_aes_dev *dd)
+{
+	dd->flags &= ~(OMAP_CRYPTO_COPY_MASK << FLAGS_IN_DATA_ST_SHIFT);
+	dd->flags &= ~(OMAP_CRYPTO_COPY_MASK << FLAGS_OUT_DATA_ST_SHIFT);
+	dd->flags &= ~(OMAP_CRYPTO_COPY_MASK << FLAGS_ASSOC_DATA_ST_SHIFT);
+}
+
+int omap_aes_write_ctrl(struct omap_aes_dev *dd)
 {
 {
+	struct omap_aes_reqctx *rctx;
 	unsigned int key32;
 	unsigned int key32;
 	int i, err;
 	int i, err;
 	u32 val;
 	u32 val;
@@ -270,7 +133,11 @@ static int omap_aes_write_ctrl(struct omap_aes_dev *dd)
 
 
 	key32 = dd->ctx->keylen / sizeof(u32);
 	key32 = dd->ctx->keylen / sizeof(u32);
 
 
-	/* it seems a key should always be set even if it has not changed */
+	/* RESET the key as previous HASH keys should not get affected*/
+	if (dd->flags & FLAGS_GCM)
+		for (i = 0; i < 0x40; i = i + 4)
+			omap_aes_write(dd, i, 0x0);
+
 	for (i = 0; i < key32; i++) {
 	for (i = 0; i < key32; i++) {
 		omap_aes_write(dd, AES_REG_KEY(dd, i),
 		omap_aes_write(dd, AES_REG_KEY(dd, i),
 			__le32_to_cpu(dd->ctx->key[i]));
 			__le32_to_cpu(dd->ctx->key[i]));
@@ -279,12 +146,21 @@ static int omap_aes_write_ctrl(struct omap_aes_dev *dd)
 	if ((dd->flags & (FLAGS_CBC | FLAGS_CTR)) && dd->req->info)
 	if ((dd->flags & (FLAGS_CBC | FLAGS_CTR)) && dd->req->info)
 		omap_aes_write_n(dd, AES_REG_IV(dd, 0), dd->req->info, 4);
 		omap_aes_write_n(dd, AES_REG_IV(dd, 0), dd->req->info, 4);
 
 
+	if ((dd->flags & (FLAGS_GCM)) && dd->aead_req->iv) {
+		rctx = aead_request_ctx(dd->aead_req);
+		omap_aes_write_n(dd, AES_REG_IV(dd, 0), (u32 *)rctx->iv, 4);
+	}
+
 	val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3);
 	val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3);
 	if (dd->flags & FLAGS_CBC)
 	if (dd->flags & FLAGS_CBC)
 		val |= AES_REG_CTRL_CBC;
 		val |= AES_REG_CTRL_CBC;
-	if (dd->flags & FLAGS_CTR)
+
+	if (dd->flags & (FLAGS_CTR | FLAGS_GCM))
 		val |= AES_REG_CTRL_CTR | AES_REG_CTRL_CTR_WIDTH_128;
 		val |= AES_REG_CTRL_CTR | AES_REG_CTRL_CTR_WIDTH_128;
 
 
+	if (dd->flags & FLAGS_GCM)
+		val |= AES_REG_CTRL_GCM;
+
 	if (dd->flags & FLAGS_ENCRYPT)
 	if (dd->flags & FLAGS_ENCRYPT)
 		val |= AES_REG_CTRL_DIRECTION;
 		val |= AES_REG_CTRL_DIRECTION;
 
 
@@ -315,6 +191,8 @@ static void omap_aes_dma_trigger_omap4(struct omap_aes_dev *dd, int length)
 {
 {
 	omap_aes_write(dd, AES_REG_LENGTH_N(0), length);
 	omap_aes_write(dd, AES_REG_LENGTH_N(0), length);
 	omap_aes_write(dd, AES_REG_LENGTH_N(1), 0);
 	omap_aes_write(dd, AES_REG_LENGTH_N(1), 0);
+	if (dd->flags & FLAGS_GCM)
+		omap_aes_write(dd, AES_REG_A_LEN, dd->assoc_len);
 
 
 	omap_aes_dma_trigger_omap2(dd, length);
 	omap_aes_dma_trigger_omap2(dd, length);
 }
 }
@@ -329,14 +207,14 @@ static void omap_aes_dma_stop(struct omap_aes_dev *dd)
 	omap_aes_write_mask(dd, AES_REG_MASK(dd), 0, mask);
 	omap_aes_write_mask(dd, AES_REG_MASK(dd), 0, mask);
 }
 }
 
 
-static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx)
+struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_reqctx *rctx)
 {
 {
 	struct omap_aes_dev *dd;
 	struct omap_aes_dev *dd;
 
 
 	spin_lock_bh(&list_lock);
 	spin_lock_bh(&list_lock);
 	dd = list_first_entry(&dev_list, struct omap_aes_dev, list);
 	dd = list_first_entry(&dev_list, struct omap_aes_dev, list);
 	list_move_tail(&dd->list, &dev_list);
 	list_move_tail(&dd->list, &dev_list);
-	ctx->dd = dd;
+	rctx->dd = dd;
 	spin_unlock_bh(&list_lock);
 	spin_unlock_bh(&list_lock);
 
 
 	return dd;
 	return dd;
@@ -387,26 +265,11 @@ static void omap_aes_dma_cleanup(struct omap_aes_dev *dd)
 	dma_release_channel(dd->dma_lch_in);
 	dma_release_channel(dd->dma_lch_in);
 }
 }
 
 
-static void sg_copy_buf(void *buf, struct scatterlist *sg,
-			      unsigned int start, unsigned int nbytes, int out)
+static int omap_aes_crypt_dma(struct omap_aes_dev *dd,
+			      struct scatterlist *in_sg,
+			      struct scatterlist *out_sg,
+			      int in_sg_len, int out_sg_len)
 {
 {
-	struct scatter_walk walk;
-
-	if (!nbytes)
-		return;
-
-	scatterwalk_start(&walk, sg);
-	scatterwalk_advance(&walk, start);
-	scatterwalk_copychunks(buf, &walk, nbytes, out);
-	scatterwalk_done(&walk, out, 0);
-}
-
-static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
-		struct scatterlist *in_sg, struct scatterlist *out_sg,
-		int in_sg_len, int out_sg_len)
-{
-	struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct omap_aes_dev *dd = ctx->dd;
 	struct dma_async_tx_descriptor *tx_in, *tx_out;
 	struct dma_async_tx_descriptor *tx_in, *tx_out;
 	struct dma_slave_config cfg;
 	struct dma_slave_config cfg;
 	int ret;
 	int ret;
@@ -467,7 +330,10 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	tx_out->callback = omap_aes_dma_out_callback;
+	if (dd->flags & FLAGS_GCM)
+		tx_out->callback = omap_aes_gcm_dma_out_callback;
+	else
+		tx_out->callback = omap_aes_dma_out_callback;
 	tx_out->callback_param = dd;
 	tx_out->callback_param = dd;
 
 
 	dmaengine_submit(tx_in);
 	dmaengine_submit(tx_in);
@@ -482,10 +348,8 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
 	return 0;
 	return 0;
 }
 }
 
 
-static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
+int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
 {
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(
-					crypto_ablkcipher_reqtfm(dd->req));
 	int err;
 	int err;
 
 
 	pr_debug("total: %d\n", dd->total);
 	pr_debug("total: %d\n", dd->total);
@@ -506,7 +370,7 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
 		}
 		}
 	}
 	}
 
 
-	err = omap_aes_crypt_dma(tfm, dd->in_sg, dd->out_sg, dd->in_sg_len,
+	err = omap_aes_crypt_dma(dd, dd->in_sg, dd->out_sg, dd->in_sg_len,
 				 dd->out_sg_len);
 				 dd->out_sg_len);
 	if (err && !dd->pio_only) {
 	if (err && !dd->pio_only) {
 		dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE);
 		dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE);
@@ -529,7 +393,7 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
 	pm_runtime_put_autosuspend(dd->dev);
 	pm_runtime_put_autosuspend(dd->dev);
 }
 }
 
 
-static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
+int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
 {
 {
 	pr_debug("total: %d\n", dd->total);
 	pr_debug("total: %d\n", dd->total);
 
 
@@ -539,62 +403,6 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
 	return 0;
 	return 0;
 }
 }
 
 
-static int omap_aes_check_aligned(struct scatterlist *sg, int total)
-{
-	int len = 0;
-
-	if (!IS_ALIGNED(total, AES_BLOCK_SIZE))
-		return -EINVAL;
-
-	while (sg) {
-		if (!IS_ALIGNED(sg->offset, 4))
-			return -1;
-		if (!IS_ALIGNED(sg->length, AES_BLOCK_SIZE))
-			return -1;
-
-		len += sg->length;
-		sg = sg_next(sg);
-	}
-
-	if (len != total)
-		return -1;
-
-	return 0;
-}
-
-static int omap_aes_copy_sgs(struct omap_aes_dev *dd)
-{
-	void *buf_in, *buf_out;
-	int pages, total;
-
-	total = ALIGN(dd->total, AES_BLOCK_SIZE);
-	pages = get_order(total);
-
-	buf_in = (void *)__get_free_pages(GFP_ATOMIC, pages);
-	buf_out = (void *)__get_free_pages(GFP_ATOMIC, pages);
-
-	if (!buf_in || !buf_out) {
-		pr_err("Couldn't allocated pages for unaligned cases.\n");
-		return -1;
-	}
-
-	dd->orig_out = dd->out_sg;
-
-	sg_copy_buf(buf_in, dd->in_sg, 0, dd->total, 0);
-
-	sg_init_table(&dd->in_sgl, 1);
-	sg_set_buf(&dd->in_sgl, buf_in, total);
-	dd->in_sg = &dd->in_sgl;
-	dd->in_sg_len = 1;
-
-	sg_init_table(&dd->out_sgl, 1);
-	sg_set_buf(&dd->out_sgl, buf_out, total);
-	dd->out_sg = &dd->out_sgl;
-	dd->out_sg_len = 1;
-
-	return 0;
-}
-
 static int omap_aes_handle_queue(struct omap_aes_dev *dd,
 static int omap_aes_handle_queue(struct omap_aes_dev *dd,
 				 struct ablkcipher_request *req)
 				 struct ablkcipher_request *req)
 {
 {
@@ -609,8 +417,10 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 {
 {
 	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
 	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
 			crypto_ablkcipher_reqtfm(req));
 			crypto_ablkcipher_reqtfm(req));
-	struct omap_aes_dev *dd = ctx->dd;
-	struct omap_aes_reqctx *rctx;
+	struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct omap_aes_dev *dd = rctx->dd;
+	int ret;
+	u16 flags;
 
 
 	if (!dd)
 	if (!dd)
 		return -ENODEV;
 		return -ENODEV;
@@ -621,6 +431,23 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 	dd->total_save = req->nbytes;
 	dd->total_save = req->nbytes;
 	dd->in_sg = req->src;
 	dd->in_sg = req->src;
 	dd->out_sg = req->dst;
 	dd->out_sg = req->dst;
+	dd->orig_out = req->dst;
+
+	flags = OMAP_CRYPTO_COPY_DATA;
+	if (req->src == req->dst)
+		flags |= OMAP_CRYPTO_FORCE_COPY;
+
+	ret = omap_crypto_align_sg(&dd->in_sg, dd->total, AES_BLOCK_SIZE,
+				   dd->in_sgl, flags,
+				   FLAGS_IN_DATA_ST_SHIFT, &dd->flags);
+	if (ret)
+		return ret;
+
+	ret = omap_crypto_align_sg(&dd->out_sg, dd->total, AES_BLOCK_SIZE,
+				   &dd->out_sgl, 0,
+				   FLAGS_OUT_DATA_ST_SHIFT, &dd->flags);
+	if (ret)
+		return ret;
 
 
 	dd->in_sg_len = sg_nents_for_len(dd->in_sg, dd->total);
 	dd->in_sg_len = sg_nents_for_len(dd->in_sg, dd->total);
 	if (dd->in_sg_len < 0)
 	if (dd->in_sg_len < 0)
@@ -630,22 +457,11 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 	if (dd->out_sg_len < 0)
 	if (dd->out_sg_len < 0)
 		return dd->out_sg_len;
 		return dd->out_sg_len;
 
 
-	if (omap_aes_check_aligned(dd->in_sg, dd->total) ||
-	    omap_aes_check_aligned(dd->out_sg, dd->total)) {
-		if (omap_aes_copy_sgs(dd))
-			pr_err("Failed to copy SGs for unaligned cases\n");
-		dd->sgs_copied = 1;
-	} else {
-		dd->sgs_copied = 0;
-	}
-
-	rctx = ablkcipher_request_ctx(req);
-	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
 	rctx->mode &= FLAGS_MODE_MASK;
 	rctx->mode &= FLAGS_MODE_MASK;
 	dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode;
 	dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode;
 
 
 	dd->ctx = ctx;
 	dd->ctx = ctx;
-	ctx->dd = dd;
+	rctx->dd = dd;
 
 
 	return omap_aes_write_ctrl(dd);
 	return omap_aes_write_ctrl(dd);
 }
 }
@@ -653,9 +469,8 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 static int omap_aes_crypt_req(struct crypto_engine *engine,
 static int omap_aes_crypt_req(struct crypto_engine *engine,
 			      struct ablkcipher_request *req)
 			      struct ablkcipher_request *req)
 {
 {
-	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
-			crypto_ablkcipher_reqtfm(req));
-	struct omap_aes_dev *dd = ctx->dd;
+	struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct omap_aes_dev *dd = rctx->dd;
 
 
 	if (!dd)
 	if (!dd)
 		return -ENODEV;
 		return -ENODEV;
@@ -666,8 +481,6 @@ static int omap_aes_crypt_req(struct crypto_engine *engine,
 static void omap_aes_done_task(unsigned long data)
 static void omap_aes_done_task(unsigned long data)
 {
 {
 	struct omap_aes_dev *dd = (struct omap_aes_dev *)data;
 	struct omap_aes_dev *dd = (struct omap_aes_dev *)data;
-	void *buf_in, *buf_out;
-	int pages, len;
 
 
 	pr_debug("enter done_task\n");
 	pr_debug("enter done_task\n");
 
 
@@ -680,17 +493,11 @@ static void omap_aes_done_task(unsigned long data)
 		omap_aes_crypt_dma_stop(dd);
 		omap_aes_crypt_dma_stop(dd);
 	}
 	}
 
 
-	if (dd->sgs_copied) {
-		buf_in = sg_virt(&dd->in_sgl);
-		buf_out = sg_virt(&dd->out_sgl);
+	omap_crypto_cleanup(dd->in_sgl, NULL, 0, dd->total_save,
+			    FLAGS_IN_DATA_ST_SHIFT, dd->flags);
 
 
-		sg_copy_buf(buf_out, dd->orig_out, 0, dd->total_save, 1);
-
-		len = ALIGN(dd->total_save, AES_BLOCK_SIZE);
-		pages = get_order(len);
-		free_pages((unsigned long)buf_in, pages);
-		free_pages((unsigned long)buf_out, pages);
-	}
+	omap_crypto_cleanup(&dd->out_sgl, dd->orig_out, 0, dd->total_save,
+			    FLAGS_OUT_DATA_ST_SHIFT, dd->flags);
 
 
 	omap_aes_finish_req(dd, 0);
 	omap_aes_finish_req(dd, 0);
 
 
@@ -726,7 +533,7 @@ static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 		skcipher_request_zero(subreq);
 		skcipher_request_zero(subreq);
 		return ret;
 		return ret;
 	}
 	}
-	dd = omap_aes_find_dev(ctx);
+	dd = omap_aes_find_dev(rctx);
 	if (!dd)
 	if (!dd)
 		return -ENODEV;
 		return -ENODEV;
 
 
@@ -811,6 +618,36 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm)
 	return 0;
 	return 0;
 }
 }
 
 
+static int omap_aes_gcm_cra_init(struct crypto_aead *tfm)
+{
+	struct omap_aes_dev *dd = NULL;
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
+	int err;
+
+	/* Find AES device, currently picks the first device */
+	spin_lock_bh(&list_lock);
+	list_for_each_entry(dd, &dev_list, list) {
+		break;
+	}
+	spin_unlock_bh(&list_lock);
+
+	err = pm_runtime_get_sync(dd->dev);
+	if (err < 0) {
+		dev_err(dd->dev, "%s: failed to get_sync(%d)\n",
+			__func__, err);
+		return err;
+	}
+
+	tfm->reqsize = sizeof(struct omap_aes_reqctx);
+	ctx->ctr = crypto_alloc_skcipher("ecb(aes)", 0, 0);
+	if (IS_ERR(ctx->ctr)) {
+		pr_warn("could not load aes driver for encrypting IV\n");
+		return PTR_ERR(ctx->ctr);
+	}
+
+	return 0;
+}
+
 static void omap_aes_cra_exit(struct crypto_tfm *tfm)
 static void omap_aes_cra_exit(struct crypto_tfm *tfm)
 {
 {
 	struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -821,6 +658,16 @@ static void omap_aes_cra_exit(struct crypto_tfm *tfm)
 	ctx->fallback = NULL;
 	ctx->fallback = NULL;
 }
 }
 
 
+static void omap_aes_gcm_cra_exit(struct crypto_aead *tfm)
+{
+	struct omap_aes_ctx *ctx = crypto_aead_ctx(tfm);
+
+	omap_aes_cra_exit(crypto_aead_tfm(tfm));
+
+	if (ctx->ctr)
+		crypto_free_skcipher(ctx->ctr);
+}
+
 /* ********************** ALGS ************************************ */
 /* ********************** ALGS ************************************ */
 
 
 static struct crypto_alg algs_ecb_cbc[] = {
 static struct crypto_alg algs_ecb_cbc[] = {
@@ -905,6 +752,54 @@ static struct omap_aes_algs_info omap_aes_algs_info_ecb_cbc[] = {
 	},
 	},
 };
 };
 
 
+static struct aead_alg algs_aead_gcm[] = {
+{
+	.base = {
+		.cra_name		= "gcm(aes)",
+		.cra_driver_name	= "gcm-aes-omap",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_KERN_DRIVER_ONLY,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct omap_aes_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+	.init		= omap_aes_gcm_cra_init,
+	.exit		= omap_aes_gcm_cra_exit,
+	.ivsize		= 12,
+	.maxauthsize	= AES_BLOCK_SIZE,
+	.setkey		= omap_aes_gcm_setkey,
+	.encrypt	= omap_aes_gcm_encrypt,
+	.decrypt	= omap_aes_gcm_decrypt,
+},
+{
+	.base = {
+		.cra_name		= "rfc4106(gcm(aes))",
+		.cra_driver_name	= "rfc4106-gcm-aes-omap",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_KERN_DRIVER_ONLY,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct omap_aes_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+	.init		= omap_aes_gcm_cra_init,
+	.exit		= omap_aes_gcm_cra_exit,
+	.maxauthsize	= AES_BLOCK_SIZE,
+	.ivsize		= 8,
+	.setkey		= omap_aes_4106gcm_setkey,
+	.encrypt	= omap_aes_4106gcm_encrypt,
+	.decrypt	= omap_aes_4106gcm_decrypt,
+},
+};
+
+static struct omap_aes_aead_algs omap_aes_aead_info = {
+	.algs_list	=	algs_aead_gcm,
+	.size		=	ARRAY_SIZE(algs_aead_gcm),
+};
+
 static const struct omap_aes_pdata omap_aes_pdata_omap2 = {
 static const struct omap_aes_pdata omap_aes_pdata_omap2 = {
 	.algs_info	= omap_aes_algs_info_ecb_cbc,
 	.algs_info	= omap_aes_algs_info_ecb_cbc,
 	.algs_info_size	= ARRAY_SIZE(omap_aes_algs_info_ecb_cbc),
 	.algs_info_size	= ARRAY_SIZE(omap_aes_algs_info_ecb_cbc),
@@ -958,6 +853,7 @@ static const struct omap_aes_pdata omap_aes_pdata_omap3 = {
 static const struct omap_aes_pdata omap_aes_pdata_omap4 = {
 static const struct omap_aes_pdata omap_aes_pdata_omap4 = {
 	.algs_info	= omap_aes_algs_info_ecb_cbc_ctr,
 	.algs_info	= omap_aes_algs_info_ecb_cbc_ctr,
 	.algs_info_size	= ARRAY_SIZE(omap_aes_algs_info_ecb_cbc_ctr),
 	.algs_info_size	= ARRAY_SIZE(omap_aes_algs_info_ecb_cbc_ctr),
+	.aead_algs_info	= &omap_aes_aead_info,
 	.trigger	= omap_aes_dma_trigger_omap4,
 	.trigger	= omap_aes_dma_trigger_omap4,
 	.key_ofs	= 0x3c,
 	.key_ofs	= 0x3c,
 	.iv_ofs		= 0x40,
 	.iv_ofs		= 0x40,
@@ -1140,6 +1036,7 @@ static int omap_aes_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct device *dev = &pdev->dev;
 	struct omap_aes_dev *dd;
 	struct omap_aes_dev *dd;
 	struct crypto_alg *algp;
 	struct crypto_alg *algp;
+	struct aead_alg *aalg;
 	struct resource res;
 	struct resource res;
 	int err = -ENOMEM, i, j, irq = -1;
 	int err = -ENOMEM, i, j, irq = -1;
 	u32 reg;
 	u32 reg;
@@ -1152,6 +1049,8 @@ static int omap_aes_probe(struct platform_device *pdev)
 	dd->dev = dev;
 	dd->dev = dev;
 	platform_set_drvdata(pdev, dd);
 	platform_set_drvdata(pdev, dd);
 
 
+	aead_init_queue(&dd->aead_queue, OMAP_AES_QUEUE_LENGTH);
+
 	err = (dev->of_node) ? omap_aes_get_res_of(dd, dev, &res) :
 	err = (dev->of_node) ? omap_aes_get_res_of(dd, dev, &res) :
 			       omap_aes_get_res_pdev(dd, pdev, &res);
 			       omap_aes_get_res_pdev(dd, pdev, &res);
 	if (err)
 	if (err)
@@ -1207,6 +1106,7 @@ static int omap_aes_probe(struct platform_device *pdev)
 		}
 		}
 	}
 	}
 
 
+	spin_lock_init(&dd->lock);
 
 
 	INIT_LIST_HEAD(&dd->list);
 	INIT_LIST_HEAD(&dd->list);
 	spin_lock(&list_lock);
 	spin_lock(&list_lock);
@@ -1243,7 +1143,29 @@ static int omap_aes_probe(struct platform_device *pdev)
 		}
 		}
 	}
 	}
 
 
+	if (dd->pdata->aead_algs_info &&
+	    !dd->pdata->aead_algs_info->registered) {
+		for (i = 0; i < dd->pdata->aead_algs_info->size; i++) {
+			aalg = &dd->pdata->aead_algs_info->algs_list[i];
+			algp = &aalg->base;
+
+			pr_debug("reg alg: %s\n", algp->cra_name);
+			INIT_LIST_HEAD(&algp->cra_list);
+
+			err = crypto_register_aead(aalg);
+			if (err)
+				goto err_aead_algs;
+
+			dd->pdata->aead_algs_info->registered++;
+		}
+	}
+
 	return 0;
 	return 0;
+err_aead_algs:
+	for (i = dd->pdata->aead_algs_info->registered - 1; i >= 0; i--) {
+		aalg = &dd->pdata->aead_algs_info->algs_list[i];
+		crypto_unregister_aead(aalg);
+	}
 err_algs:
 err_algs:
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
@@ -1268,6 +1190,7 @@ err_data:
 static int omap_aes_remove(struct platform_device *pdev)
 static int omap_aes_remove(struct platform_device *pdev)
 {
 {
 	struct omap_aes_dev *dd = platform_get_drvdata(pdev);
 	struct omap_aes_dev *dd = platform_get_drvdata(pdev);
+	struct aead_alg *aalg;
 	int i, j;
 	int i, j;
 
 
 	if (!dd)
 	if (!dd)
@@ -1282,7 +1205,13 @@ static int omap_aes_remove(struct platform_device *pdev)
 			crypto_unregister_alg(
 			crypto_unregister_alg(
 					&dd->pdata->algs_info[i].algs_list[j]);
 					&dd->pdata->algs_info[i].algs_list[j]);
 
 
+	for (i = dd->pdata->aead_algs_info->size - 1; i >= 0; i--) {
+		aalg = &dd->pdata->aead_algs_info->algs_list[i];
+		crypto_unregister_aead(aalg);
+	}
+
 	crypto_engine_exit(dd->engine);
 	crypto_engine_exit(dd->engine);
+
 	tasklet_kill(&dd->done_task);
 	tasklet_kill(&dd->done_task);
 	omap_aes_dma_cleanup(dd);
 	omap_aes_dma_cleanup(dd);
 	pm_runtime_disable(dd->dev);
 	pm_runtime_disable(dd->dev);

+ 214 - 0
drivers/crypto/omap-aes.h

@@ -0,0 +1,214 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for OMAP AES HW ACCELERATOR defines
+ *
+ * Copyright (c) 2015 Texas Instruments Incorporated
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ */
+#ifndef __OMAP_AES_H__
+#define __OMAP_AES_H__
+
+#define DST_MAXBURST			4
+#define DMA_MIN				(DST_MAXBURST * sizeof(u32))
+
+#define _calc_walked(inout) (dd->inout##_walk.offset - dd->inout##_sg->offset)
+
+/*
+ * OMAP TRM gives bitfields as start:end, where start is the higher bit
+ * number. For example 7:0
+ */
+#define FLD_MASK(start, end)	(((1 << ((start) - (end) + 1)) - 1) << (end))
+#define FLD_VAL(val, start, end) (((val) << (end)) & FLD_MASK(start, end))
+
+#define AES_REG_KEY(dd, x)		((dd)->pdata->key_ofs - \
+						(((x) ^ 0x01) * 0x04))
+#define AES_REG_IV(dd, x)		((dd)->pdata->iv_ofs + ((x) * 0x04))
+
+#define AES_REG_CTRL(dd)		((dd)->pdata->ctrl_ofs)
+#define AES_REG_CTRL_CONTEXT_READY	BIT(31)
+#define AES_REG_CTRL_CTR_WIDTH_MASK	GENMASK(8, 7)
+#define AES_REG_CTRL_CTR_WIDTH_32	0
+#define AES_REG_CTRL_CTR_WIDTH_64	BIT(7)
+#define AES_REG_CTRL_CTR_WIDTH_96	BIT(8)
+#define AES_REG_CTRL_CTR_WIDTH_128	GENMASK(8, 7)
+#define AES_REG_CTRL_GCM		GENMASK(17, 16)
+#define AES_REG_CTRL_CTR		BIT(6)
+#define AES_REG_CTRL_CBC		BIT(5)
+#define AES_REG_CTRL_KEY_SIZE		GENMASK(4, 3)
+#define AES_REG_CTRL_DIRECTION		BIT(2)
+#define AES_REG_CTRL_INPUT_READY	BIT(1)
+#define AES_REG_CTRL_OUTPUT_READY	BIT(0)
+#define AES_REG_CTRL_MASK		GENMASK(24, 2)
+
+#define AES_REG_C_LEN_0			0x54
+#define AES_REG_C_LEN_1			0x58
+#define AES_REG_A_LEN			0x5C
+
+#define AES_REG_DATA_N(dd, x)		((dd)->pdata->data_ofs + ((x) * 0x04))
+#define AES_REG_TAG_N(dd, x)		(0x70 + ((x) * 0x04))
+
+#define AES_REG_REV(dd)			((dd)->pdata->rev_ofs)
+
+#define AES_REG_MASK(dd)		((dd)->pdata->mask_ofs)
+#define AES_REG_MASK_SIDLE		BIT(6)
+#define AES_REG_MASK_START		BIT(5)
+#define AES_REG_MASK_DMA_OUT_EN		BIT(3)
+#define AES_REG_MASK_DMA_IN_EN		BIT(2)
+#define AES_REG_MASK_SOFTRESET		BIT(1)
+#define AES_REG_AUTOIDLE		BIT(0)
+
+#define AES_REG_LENGTH_N(x)		(0x54 + ((x) * 0x04))
+
+#define AES_REG_IRQ_STATUS(dd)         ((dd)->pdata->irq_status_ofs)
+#define AES_REG_IRQ_ENABLE(dd)         ((dd)->pdata->irq_enable_ofs)
+#define AES_REG_IRQ_DATA_IN            BIT(1)
+#define AES_REG_IRQ_DATA_OUT           BIT(2)
+#define DEFAULT_TIMEOUT		(5 * HZ)
+
+#define DEFAULT_AUTOSUSPEND_DELAY	1000
+
+#define FLAGS_MODE_MASK		0x001f
+#define FLAGS_ENCRYPT		BIT(0)
+#define FLAGS_CBC		BIT(1)
+#define FLAGS_CTR		BIT(2)
+#define FLAGS_GCM		BIT(3)
+#define FLAGS_RFC4106_GCM	BIT(4)
+
+#define FLAGS_INIT		BIT(5)
+#define FLAGS_FAST		BIT(6)
+#define FLAGS_BUSY		BIT(7)
+
+#define FLAGS_IN_DATA_ST_SHIFT	8
+#define FLAGS_OUT_DATA_ST_SHIFT	10
+#define FLAGS_ASSOC_DATA_ST_SHIFT	12
+
+#define AES_BLOCK_WORDS		(AES_BLOCK_SIZE >> 2)
+
+struct omap_aes_gcm_result {
+	struct completion completion;
+	int err;
+};
+
+struct omap_aes_ctx {
+	int		keylen;
+	u32		key[AES_KEYSIZE_256 / sizeof(u32)];
+	u8		nonce[4];
+	struct crypto_skcipher	*fallback;
+	struct crypto_skcipher	*ctr;
+};
+
+struct omap_aes_reqctx {
+	struct omap_aes_dev *dd;
+	unsigned long mode;
+	u8 iv[AES_BLOCK_SIZE];
+	u32 auth_tag[AES_BLOCK_SIZE / sizeof(u32)];
+};
+
+#define OMAP_AES_QUEUE_LENGTH	1
+#define OMAP_AES_CACHE_SIZE	0
+
+struct omap_aes_algs_info {
+	struct crypto_alg	*algs_list;
+	unsigned int		size;
+	unsigned int		registered;
+};
+
+struct omap_aes_aead_algs {
+	struct aead_alg	*algs_list;
+	unsigned int	size;
+	unsigned int	registered;
+};
+
+struct omap_aes_pdata {
+	struct omap_aes_algs_info	*algs_info;
+	unsigned int	algs_info_size;
+	struct omap_aes_aead_algs	*aead_algs_info;
+
+	void		(*trigger)(struct omap_aes_dev *dd, int length);
+
+	u32		key_ofs;
+	u32		iv_ofs;
+	u32		ctrl_ofs;
+	u32		data_ofs;
+	u32		rev_ofs;
+	u32		mask_ofs;
+	u32             irq_enable_ofs;
+	u32             irq_status_ofs;
+
+	u32		dma_enable_in;
+	u32		dma_enable_out;
+	u32		dma_start;
+
+	u32		major_mask;
+	u32		major_shift;
+	u32		minor_mask;
+	u32		minor_shift;
+};
+
+struct omap_aes_dev {
+	struct list_head	list;
+	unsigned long		phys_base;
+	void __iomem		*io_base;
+	struct omap_aes_ctx	*ctx;
+	struct device		*dev;
+	unsigned long		flags;
+	int			err;
+
+	struct tasklet_struct	done_task;
+	struct aead_queue	aead_queue;
+	spinlock_t		lock;
+
+	struct ablkcipher_request	*req;
+	struct aead_request		*aead_req;
+	struct crypto_engine		*engine;
+
+	/*
+	 * total is used by PIO mode for book keeping so introduce
+	 * variable total_save as need it to calc page_order
+	 */
+	size_t				total;
+	size_t				total_save;
+	size_t				assoc_len;
+	size_t				authsize;
+
+	struct scatterlist		*in_sg;
+	struct scatterlist		*out_sg;
+
+	/* Buffers for copying for unaligned cases */
+	struct scatterlist		in_sgl[2];
+	struct scatterlist		out_sgl;
+	struct scatterlist		*orig_out;
+
+	struct scatter_walk		in_walk;
+	struct scatter_walk		out_walk;
+	struct dma_chan		*dma_lch_in;
+	struct dma_chan		*dma_lch_out;
+	int			in_sg_len;
+	int			out_sg_len;
+	int			pio_only;
+	const struct omap_aes_pdata	*pdata;
+};
+
+u32 omap_aes_read(struct omap_aes_dev *dd, u32 offset);
+void omap_aes_write(struct omap_aes_dev *dd, u32 offset, u32 value);
+struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_reqctx *rctx);
+int omap_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
+			unsigned int keylen);
+int omap_aes_4106gcm_setkey(struct crypto_aead *tfm, const u8 *key,
+			    unsigned int keylen);
+int omap_aes_gcm_encrypt(struct aead_request *req);
+int omap_aes_gcm_decrypt(struct aead_request *req);
+int omap_aes_4106gcm_encrypt(struct aead_request *req);
+int omap_aes_4106gcm_decrypt(struct aead_request *req);
+int omap_aes_write_ctrl(struct omap_aes_dev *dd);
+int omap_aes_crypt_dma_start(struct omap_aes_dev *dd);
+int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd);
+void omap_aes_gcm_dma_out_callback(void *data);
+void omap_aes_clear_copy_flags(struct omap_aes_dev *dd);
+
+#endif

Some files were not shown because too many files changed in this diff