Bladeren bron

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "Here is the crypto update for 4.9:

  API:
   - The crypto engine code now supports hashes.

  Algorithms:
   - Allow keys >= 2048 bits in FIPS mode for RSA.

  Drivers:
   - Memory overwrite fix for vmx ghash.
   - Add support for building ARM sha1-neon in Thumb2 mode.
   - Reenable ARM ghash-ce code by adding import/export.
   - Reenable img-hash by adding import/export.
   - Add support for multiple cores in omap-aes.
   - Add little-endian support for sha1-powerpc.
   - Add Cavium HWRNG driver for ThunderX SoC"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (137 commits)
  crypto: caam - treat SGT address pointer as u64
  crypto: ccp - Make syslog errors human-readable
  crypto: ccp - clean up data structure
  crypto: vmx - Ensure ghash-generic is enabled
  crypto: testmgr - add guard to dst buffer for ahash_export
  crypto: caam - Unmap region obtained by of_iomap
  crypto: sha1-powerpc - little-endian support
  crypto: gcm - Fix IV buffer size in crypto_gcm_setkey
  crypto: vmx - Fix memory corruption caused by p8_ghash
  crypto: ghash-generic - move common definitions to a new header file
  crypto: caam - fix sg dump
  hwrng: omap - Only fail if pm_runtime_get_sync returns < 0
  crypto: omap-sham - shrink the internal buffer size
  crypto: omap-sham - add support for export/import
  crypto: omap-sham - convert driver logic to use sgs for data xmit
  crypto: omap-sham - change the DMA threshold value to a define
  crypto: omap-sham - add support functions for sg based data handling
  crypto: omap-sham - rename sgl to sgl_tmp for deprecation
  crypto: omap-sham - align algorithms on word offset
  crypto: omap-sham - add context export/import stubs
  ...
Linus Torvalds 8 jaren geleden
bovenliggende
commit
30066ce675
77 gewijzigde bestanden met toevoegingen van 3852 en 1654 verwijderingen
  1. 26 12
      Documentation/DocBook/crypto-API.tmpl
  2. 25 1
      arch/arm/crypto/ghash-ce-glue.c
  3. 0 1
      arch/arm/crypto/sha1-armv7-neon.S
  4. 11 2
      arch/powerpc/crypto/sha1-powerpc-asm.S
  5. 61 12
      crypto/algif_hash.c
  6. 1 4
      crypto/crct10dif_generic.c
  7. 149 38
      crypto/crypto_engine.c
  8. 15 16
      crypto/drbg.c
  9. 1 1
      crypto/gcm.c
  10. 1 12
      crypto/ghash-generic.c
  11. 1 6
      crypto/mcryptd.c
  12. 2 2
      crypto/rsa_helper.c
  13. 18 6
      crypto/testmgr.c
  14. 4 0
      crypto/testmgr.h
  15. 20 21
      crypto/xor.c
  16. 1 1
      crypto/xts.c
  17. 13 0
      drivers/char/hw_random/Kconfig
  18. 1 0
      drivers/char/hw_random/Makefile
  19. 78 62
      drivers/char/hw_random/amd-rng.c
  20. 3 2
      drivers/char/hw_random/bcm2835-rng.c
  21. 99 0
      drivers/char/hw_random/cavium-rng-vf.c
  22. 94 0
      drivers/char/hw_random/cavium-rng.c
  23. 20 17
      drivers/char/hw_random/core.c
  24. 19 39
      drivers/char/hw_random/geode-rng.c
  25. 0 3
      drivers/char/hw_random/meson-rng.c
  26. 2 2
      drivers/char/hw_random/omap-rng.c
  27. 2 8
      drivers/char/hw_random/omap3-rom-rng.c
  28. 7 30
      drivers/char/hw_random/pasemi-rng.c
  29. 0 1
      drivers/char/hw_random/pic32-rng.c
  30. 1 3
      drivers/char/hw_random/st-rng.c
  31. 1 10
      drivers/char/hw_random/tx4939-rng.c
  32. 3 0
      drivers/crypto/Kconfig
  33. 101 60
      drivers/crypto/caam/caamalg.c
  34. 313 268
      drivers/crypto/caam/caamhash.c
  35. 3 0
      drivers/crypto/caam/ctrl.c
  36. 0 6
      drivers/crypto/caam/desc.h
  37. 17 0
      drivers/crypto/caam/desc_constr.h
  38. 0 1
      drivers/crypto/caam/intern.h
  39. 10 16
      drivers/crypto/caam/jr.c
  40. 8 0
      drivers/crypto/caam/regs.h
  41. 1 1
      drivers/crypto/caam/sg_sw_sec4.h
  42. 1 0
      drivers/crypto/ccp/Makefile
  43. 17 1
      drivers/crypto/ccp/ccp-crypto-sha.c
  44. 108 74
      drivers/crypto/ccp/ccp-dev-v3.c
  45. 1017 0
      drivers/crypto/ccp/ccp-dev-v5.c
  46. 109 4
      drivers/crypto/ccp/ccp-dev.c
  47. 246 66
      drivers/crypto/ccp/ccp-dev.h
  48. 6 5
      drivers/crypto/ccp/ccp-dmaengine.c
  49. 305 271
      drivers/crypto/ccp/ccp-ops.c
  50. 14 9
      drivers/crypto/ccp/ccp-pci.c
  51. 2 10
      drivers/crypto/hifn_795x.c
  52. 98 10
      drivers/crypto/img-hash.c
  53. 5 4
      drivers/crypto/ixp4xx_crypto.c
  54. 1 0
      drivers/crypto/marvell/cesa.c
  55. 21 23
      drivers/crypto/marvell/hash.c
  56. 1 0
      drivers/crypto/marvell/tdma.c
  57. 2 5
      drivers/crypto/mv_cesa.c
  58. 3 1
      drivers/crypto/mxc-scc.c
  59. 86 55
      drivers/crypto/omap-aes.c
  60. 20 15
      drivers/crypto/omap-des.c
  61. 342 226
      drivers/crypto/omap-sham.c
  62. 1 1
      drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h
  63. 12 8
      drivers/crypto/qat/qat_common/adf_admin.c
  64. 4 4
      drivers/crypto/qat/qat_common/qat_uclo.c
  65. 2 4
      drivers/crypto/rockchip/rk3288_crypto.c
  66. 4 2
      drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
  67. 34 34
      drivers/crypto/sunxi-ss/sun4i-ss-core.c
  68. 97 68
      drivers/crypto/sunxi-ss/sun4i-ss-hash.c
  69. 1 1
      drivers/crypto/sunxi-ss/sun4i-ss.h
  70. 1 0
      drivers/crypto/vmx/Kconfig
  71. 16 15
      drivers/crypto/vmx/ghash.c
  72. 11 0
      drivers/pci/quirks.c
  73. 0 70
      include/crypto/algapi.h
  74. 107 0
      include/crypto/engine.h
  75. 23 0
      include/crypto/ghash.h
  76. 0 3
      include/linux/ccp.h
  77. 3 1
      include/linux/hw_random.h

+ 26 - 12
Documentation/DocBook/crypto-API.tmpl

@@ -797,7 +797,8 @@ kernel crypto API            |       Caller
      include/linux/crypto.h and their definition can be seen below.
      include/linux/crypto.h and their definition can be seen below.
      The former function registers a single transformation, while
      The former function registers a single transformation, while
      the latter works on an array of transformation descriptions.
      the latter works on an array of transformation descriptions.
-     The latter is useful when registering transformations in bulk.
+     The latter is useful when registering transformations in bulk,
+     for example when a driver implements multiple transformations.
     </para>
     </para>
 
 
     <programlisting>
     <programlisting>
@@ -822,18 +823,31 @@ kernel crypto API            |       Caller
     </para>
     </para>
 
 
     <para>
     <para>
-     The bulk registration / unregistration functions require
-     that struct crypto_alg is an array of count size. These
-     functions simply loop over that array and register /
-     unregister each individual algorithm. If an error occurs,
-     the loop is terminated at the offending algorithm definition.
-     That means, the algorithms prior to the offending algorithm
-     are successfully registered. Note, the caller has no way of
-     knowing which cipher implementations have successfully
-     registered. If this is important to know, the caller should
-     loop through the different implementations using the single
-     instance *_alg functions for each individual implementation.
+     The bulk registration/unregistration functions
+     register/unregister each transformation in the given array of
+     length count.  They handle errors as follows:
     </para>
     </para>
+    <itemizedlist>
+     <listitem>
+      <para>
+       crypto_register_algs() succeeds if and only if it
+       successfully registers all the given transformations. If an
+       error occurs partway through, then it rolls back successful
+       registrations before returning the error code. Note that if
+       a driver needs to handle registration errors for individual
+       transformations, then it will need to use the non-bulk
+       function crypto_register_alg() instead.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       crypto_unregister_algs() tries to unregister all the given
+       transformations, continuing on error. It logs errors and
+       always returns zero.
+      </para>
+     </listitem>
+    </itemizedlist>
+
    </sect1>
    </sect1>
 
 
    <sect1><title>Single-Block Symmetric Ciphers [CIPHER]</title>
    <sect1><title>Single-Block Symmetric Ciphers [CIPHER]</title>

+ 25 - 1
arch/arm/crypto/ghash-ce-glue.c

@@ -138,7 +138,7 @@ static struct shash_alg ghash_alg = {
 	.setkey			= ghash_setkey,
 	.setkey			= ghash_setkey,
 	.descsize		= sizeof(struct ghash_desc_ctx),
 	.descsize		= sizeof(struct ghash_desc_ctx),
 	.base			= {
 	.base			= {
-		.cra_name	= "ghash",
+		.cra_name	= "__ghash",
 		.cra_driver_name = "__driver-ghash-ce",
 		.cra_driver_name = "__driver-ghash-ce",
 		.cra_priority	= 0,
 		.cra_priority	= 0,
 		.cra_flags	= CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL,
 		.cra_flags	= CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL,
@@ -220,6 +220,27 @@ static int ghash_async_digest(struct ahash_request *req)
 	}
 	}
 }
 }
 
 
+static int ghash_async_import(struct ahash_request *req, const void *in)
+{
+	struct ahash_request *cryptd_req = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+
+	desc->tfm = cryptd_ahash_child(ctx->cryptd_tfm);
+	desc->flags = req->base.flags;
+
+	return crypto_shash_import(desc, in);
+}
+
+static int ghash_async_export(struct ahash_request *req, void *out)
+{
+	struct ahash_request *cryptd_req = ahash_request_ctx(req);
+	struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+
+	return crypto_shash_export(desc, out);
+}
+
 static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
 static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
 			      unsigned int keylen)
 			      unsigned int keylen)
 {
 {
@@ -268,7 +289,10 @@ static struct ahash_alg ghash_async_alg = {
 	.final			= ghash_async_final,
 	.final			= ghash_async_final,
 	.setkey			= ghash_async_setkey,
 	.setkey			= ghash_async_setkey,
 	.digest			= ghash_async_digest,
 	.digest			= ghash_async_digest,
+	.import			= ghash_async_import,
+	.export			= ghash_async_export,
 	.halg.digestsize	= GHASH_DIGEST_SIZE,
 	.halg.digestsize	= GHASH_DIGEST_SIZE,
+	.halg.statesize		= sizeof(struct ghash_desc_ctx),
 	.halg.base		= {
 	.halg.base		= {
 		.cra_name	= "ghash",
 		.cra_name	= "ghash",
 		.cra_driver_name = "ghash-ce",
 		.cra_driver_name = "ghash-ce",

+ 0 - 1
arch/arm/crypto/sha1-armv7-neon.S

@@ -12,7 +12,6 @@
 #include <asm/assembler.h>
 #include <asm/assembler.h>
 
 
 .syntax unified
 .syntax unified
-.code   32
 .fpu neon
 .fpu neon
 
 
 .text
 .text

+ 11 - 2
arch/powerpc/crypto/sha1-powerpc-asm.S

@@ -7,6 +7,15 @@
 #include <asm/ppc_asm.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/asm-offsets.h>
 
 
+#ifdef __BIG_ENDIAN__
+#define LWZ(rt, d, ra)	\
+	lwz	rt,d(ra)
+#else
+#define LWZ(rt, d, ra)	\
+	li	rt,d;	\
+	lwbrx	rt,rt,ra
+#endif
+
 /*
 /*
  * We roll the registers for T, A, B, C, D, E around on each
  * We roll the registers for T, A, B, C, D, E around on each
  * iteration; T on iteration t is A on iteration t+1, and so on.
  * iteration; T on iteration t is A on iteration t+1, and so on.
@@ -23,7 +32,7 @@
 #define W(t)	(((t)%16)+16)
 #define W(t)	(((t)%16)+16)
 
 
 #define LOADW(t)				\
 #define LOADW(t)				\
-	lwz	W(t),(t)*4(r4)
+	LWZ(W(t),(t)*4,r4)
 
 
 #define STEPD0_LOAD(t)				\
 #define STEPD0_LOAD(t)				\
 	andc	r0,RD(t),RB(t);		\
 	andc	r0,RD(t),RB(t);		\
@@ -33,7 +42,7 @@
 	add	r0,RE(t),r15;			\
 	add	r0,RE(t),r15;			\
 	add	RT(t),RT(t),r6;		\
 	add	RT(t),RT(t),r6;		\
 	add	r14,r0,W(t);			\
 	add	r14,r0,W(t);			\
-	lwz	W((t)+4),((t)+4)*4(r4);	\
+	LWZ(W((t)+4),((t)+4)*4,r4);	\
 	rotlwi	RB(t),RB(t),30;			\
 	rotlwi	RB(t),RB(t),30;			\
 	add	RT(t),RT(t),r14
 	add	RT(t),RT(t),r14
 
 

+ 61 - 12
crypto/algif_hash.c

@@ -39,6 +39,37 @@ struct algif_hash_tfm {
 	bool has_key;
 	bool has_key;
 };
 };
 
 
+static int hash_alloc_result(struct sock *sk, struct hash_ctx *ctx)
+{
+	unsigned ds;
+
+	if (ctx->result)
+		return 0;
+
+	ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req));
+
+	ctx->result = sock_kmalloc(sk, ds, GFP_KERNEL);
+	if (!ctx->result)
+		return -ENOMEM;
+
+	memset(ctx->result, 0, ds);
+
+	return 0;
+}
+
+static void hash_free_result(struct sock *sk, struct hash_ctx *ctx)
+{
+	unsigned ds;
+
+	if (!ctx->result)
+		return;
+
+	ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req));
+
+	sock_kzfree_s(sk, ctx->result, ds);
+	ctx->result = NULL;
+}
+
 static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
 static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
 			size_t ignored)
 			size_t ignored)
 {
 {
@@ -54,6 +85,9 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
 
 
 	lock_sock(sk);
 	lock_sock(sk);
 	if (!ctx->more) {
 	if (!ctx->more) {
+		if ((msg->msg_flags & MSG_MORE))
+			hash_free_result(sk, ctx);
+
 		err = af_alg_wait_for_completion(crypto_ahash_init(&ctx->req),
 		err = af_alg_wait_for_completion(crypto_ahash_init(&ctx->req),
 						&ctx->completion);
 						&ctx->completion);
 		if (err)
 		if (err)
@@ -90,6 +124,10 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
 
 
 	ctx->more = msg->msg_flags & MSG_MORE;
 	ctx->more = msg->msg_flags & MSG_MORE;
 	if (!ctx->more) {
 	if (!ctx->more) {
+		err = hash_alloc_result(sk, ctx);
+		if (err)
+			goto unlock;
+
 		ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
 		ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
 		err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req),
 		err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req),
 						 &ctx->completion);
 						 &ctx->completion);
@@ -116,6 +154,13 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page,
 	sg_init_table(ctx->sgl.sg, 1);
 	sg_init_table(ctx->sgl.sg, 1);
 	sg_set_page(ctx->sgl.sg, page, size, offset);
 	sg_set_page(ctx->sgl.sg, page, size, offset);
 
 
+	if (!(flags & MSG_MORE)) {
+		err = hash_alloc_result(sk, ctx);
+		if (err)
+			goto unlock;
+	} else if (!ctx->more)
+		hash_free_result(sk, ctx);
+
 	ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, ctx->result, size);
 	ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, ctx->result, size);
 
 
 	if (!(flags & MSG_MORE)) {
 	if (!(flags & MSG_MORE)) {
@@ -153,6 +198,7 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	struct alg_sock *ask = alg_sk(sk);
 	struct alg_sock *ask = alg_sk(sk);
 	struct hash_ctx *ctx = ask->private;
 	struct hash_ctx *ctx = ask->private;
 	unsigned ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req));
 	unsigned ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req));
+	bool result;
 	int err;
 	int err;
 
 
 	if (len > ds)
 	if (len > ds)
@@ -161,17 +207,29 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		msg->msg_flags |= MSG_TRUNC;
 		msg->msg_flags |= MSG_TRUNC;
 
 
 	lock_sock(sk);
 	lock_sock(sk);
+	result = ctx->result;
+	err = hash_alloc_result(sk, ctx);
+	if (err)
+		goto unlock;
+
+	ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
+
 	if (ctx->more) {
 	if (ctx->more) {
 		ctx->more = 0;
 		ctx->more = 0;
-		ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
 		err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req),
 		err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req),
 						 &ctx->completion);
 						 &ctx->completion);
 		if (err)
 		if (err)
 			goto unlock;
 			goto unlock;
+	} else if (!result) {
+		err = af_alg_wait_for_completion(
+				crypto_ahash_digest(&ctx->req),
+				&ctx->completion);
 	}
 	}
 
 
 	err = memcpy_to_msg(msg, ctx->result, len);
 	err = memcpy_to_msg(msg, ctx->result, len);
 
 
+	hash_free_result(sk, ctx);
+
 unlock:
 unlock:
 	release_sock(sk);
 	release_sock(sk);
 
 
@@ -394,8 +452,7 @@ static void hash_sock_destruct(struct sock *sk)
 	struct alg_sock *ask = alg_sk(sk);
 	struct alg_sock *ask = alg_sk(sk);
 	struct hash_ctx *ctx = ask->private;
 	struct hash_ctx *ctx = ask->private;
 
 
-	sock_kzfree_s(sk, ctx->result,
-		      crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req)));
+	hash_free_result(sk, ctx);
 	sock_kfree_s(sk, ctx, ctx->len);
 	sock_kfree_s(sk, ctx, ctx->len);
 	af_alg_release_parent(sk);
 	af_alg_release_parent(sk);
 }
 }
@@ -407,20 +464,12 @@ static int hash_accept_parent_nokey(void *private, struct sock *sk)
 	struct algif_hash_tfm *tfm = private;
 	struct algif_hash_tfm *tfm = private;
 	struct crypto_ahash *hash = tfm->hash;
 	struct crypto_ahash *hash = tfm->hash;
 	unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(hash);
 	unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(hash);
-	unsigned ds = crypto_ahash_digestsize(hash);
 
 
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
 	ctx = sock_kmalloc(sk, len, GFP_KERNEL);
 	if (!ctx)
 	if (!ctx)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	ctx->result = sock_kmalloc(sk, ds, GFP_KERNEL);
-	if (!ctx->result) {
-		sock_kfree_s(sk, ctx, len);
-		return -ENOMEM;
-	}
-
-	memset(ctx->result, 0, ds);
-
+	ctx->result = NULL;
 	ctx->len = len;
 	ctx->len = len;
 	ctx->more = 0;
 	ctx->more = 0;
 	af_alg_init_completion(&ctx->completion);
 	af_alg_init_completion(&ctx->completion);

+ 1 - 4
crypto/crct10dif_generic.c

@@ -107,10 +107,7 @@ static struct shash_alg alg = {
 
 
 static int __init crct10dif_mod_init(void)
 static int __init crct10dif_mod_init(void)
 {
 {
-	int ret;
-
-	ret = crypto_register_shash(&alg);
-	return ret;
+	return crypto_register_shash(&alg);
 }
 }
 
 
 static void __exit crct10dif_mod_fini(void)
 static void __exit crct10dif_mod_fini(void)

+ 149 - 38
crypto/crypto_engine.c

@@ -14,13 +14,12 @@
 
 
 #include <linux/err.h>
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
+#include <crypto/engine.h>
+#include <crypto/internal/hash.h>
 #include "internal.h"
 #include "internal.h"
 
 
 #define CRYPTO_ENGINE_MAX_QLEN 10
 #define CRYPTO_ENGINE_MAX_QLEN 10
 
 
-void crypto_finalize_request(struct crypto_engine *engine,
-			     struct ablkcipher_request *req, int err);
-
 /**
 /**
  * crypto_pump_requests - dequeue one request from engine queue to process
  * crypto_pump_requests - dequeue one request from engine queue to process
  * @engine: the hardware engine
  * @engine: the hardware engine
@@ -34,10 +33,11 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 				 bool in_kthread)
 				 bool in_kthread)
 {
 {
 	struct crypto_async_request *async_req, *backlog;
 	struct crypto_async_request *async_req, *backlog;
-	struct ablkcipher_request *req;
+	struct ahash_request *hreq;
+	struct ablkcipher_request *breq;
 	unsigned long flags;
 	unsigned long flags;
 	bool was_busy = false;
 	bool was_busy = false;
-	int ret;
+	int ret, rtype;
 
 
 	spin_lock_irqsave(&engine->queue_lock, flags);
 	spin_lock_irqsave(&engine->queue_lock, flags);
 
 
@@ -82,9 +82,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 	if (!async_req)
 	if (!async_req)
 		goto out;
 		goto out;
 
 
-	req = ablkcipher_request_cast(async_req);
-
-	engine->cur_req = req;
+	engine->cur_req = async_req;
 	if (backlog)
 	if (backlog)
 		backlog->complete(backlog, -EINPROGRESS);
 		backlog->complete(backlog, -EINPROGRESS);
 
 
@@ -95,6 +93,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 
 
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 
 
+	rtype = crypto_tfm_alg_type(engine->cur_req->tfm);
 	/* Until here we get the request need to be encrypted successfully */
 	/* Until here we get the request need to be encrypted successfully */
 	if (!was_busy && engine->prepare_crypt_hardware) {
 	if (!was_busy && engine->prepare_crypt_hardware) {
 		ret = engine->prepare_crypt_hardware(engine);
 		ret = engine->prepare_crypt_hardware(engine);
@@ -104,24 +103,55 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 		}
 		}
 	}
 	}
 
 
-	if (engine->prepare_request) {
-		ret = engine->prepare_request(engine, engine->cur_req);
+	switch (rtype) {
+	case CRYPTO_ALG_TYPE_AHASH:
+		hreq = ahash_request_cast(engine->cur_req);
+		if (engine->prepare_hash_request) {
+			ret = engine->prepare_hash_request(engine, hreq);
+			if (ret) {
+				pr_err("failed to prepare request: %d\n", ret);
+				goto req_err;
+			}
+			engine->cur_req_prepared = true;
+		}
+		ret = engine->hash_one_request(engine, hreq);
 		if (ret) {
 		if (ret) {
-			pr_err("failed to prepare request: %d\n", ret);
+			pr_err("failed to hash one request from queue\n");
 			goto req_err;
 			goto req_err;
 		}
 		}
-		engine->cur_req_prepared = true;
-	}
-
-	ret = engine->crypt_one_request(engine, engine->cur_req);
-	if (ret) {
-		pr_err("failed to crypt one request from queue\n");
-		goto req_err;
+		return;
+	case CRYPTO_ALG_TYPE_ABLKCIPHER:
+		breq = ablkcipher_request_cast(engine->cur_req);
+		if (engine->prepare_cipher_request) {
+			ret = engine->prepare_cipher_request(engine, breq);
+			if (ret) {
+				pr_err("failed to prepare request: %d\n", ret);
+				goto req_err;
+			}
+			engine->cur_req_prepared = true;
+		}
+		ret = engine->cipher_one_request(engine, breq);
+		if (ret) {
+			pr_err("failed to cipher one request from queue\n");
+			goto req_err;
+		}
+		return;
+	default:
+		pr_err("failed to prepare request of unknown type\n");
+		return;
 	}
 	}
-	return;
 
 
 req_err:
 req_err:
-	crypto_finalize_request(engine, engine->cur_req, ret);
+	switch (rtype) {
+	case CRYPTO_ALG_TYPE_AHASH:
+		hreq = ahash_request_cast(engine->cur_req);
+		crypto_finalize_hash_request(engine, hreq, ret);
+		break;
+	case CRYPTO_ALG_TYPE_ABLKCIPHER:
+		breq = ablkcipher_request_cast(engine->cur_req);
+		crypto_finalize_cipher_request(engine, breq, ret);
+		break;
+	}
 	return;
 	return;
 
 
 out:
 out:
@@ -137,12 +167,14 @@ static void crypto_pump_work(struct kthread_work *work)
 }
 }
 
 
 /**
 /**
- * crypto_transfer_request - transfer the new request into the engine queue
+ * crypto_transfer_cipher_request - transfer the new request into the
+ * enginequeue
  * @engine: the hardware engine
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  * @req: the request need to be listed into the engine queue
  */
  */
-int crypto_transfer_request(struct crypto_engine *engine,
-			    struct ablkcipher_request *req, bool need_pump)
+int crypto_transfer_cipher_request(struct crypto_engine *engine,
+				   struct ablkcipher_request *req,
+				   bool need_pump)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 	int ret;
 	int ret;
@@ -162,46 +194,125 @@ int crypto_transfer_request(struct crypto_engine *engine,
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 	return ret;
 	return ret;
 }
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_request);
+EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request);
+
+/**
+ * crypto_transfer_cipher_request_to_engine - transfer one request to list
+ * into the engine queue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_cipher_request_to_engine(struct crypto_engine *engine,
+					     struct ablkcipher_request *req)
+{
+	return crypto_transfer_cipher_request(engine, req, true);
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_cipher_request_to_engine);
+
+/**
+ * crypto_transfer_hash_request - transfer the new request into the
+ * enginequeue
+ * @engine: the hardware engine
+ * @req: the request need to be listed into the engine queue
+ */
+int crypto_transfer_hash_request(struct crypto_engine *engine,
+				 struct ahash_request *req, bool need_pump)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&engine->queue_lock, flags);
+
+	if (!engine->running) {
+		spin_unlock_irqrestore(&engine->queue_lock, flags);
+		return -ESHUTDOWN;
+	}
+
+	ret = ahash_enqueue_request(&engine->queue, req);
+
+	if (!engine->busy && need_pump)
+		queue_kthread_work(&engine->kworker, &engine->pump_requests);
+
+	spin_unlock_irqrestore(&engine->queue_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_transfer_hash_request);
 
 
 /**
 /**
- * crypto_transfer_request_to_engine - transfer one request to list into the
- * engine queue
+ * crypto_transfer_hash_request_to_engine - transfer one request to list
+ * into the engine queue
  * @engine: the hardware engine
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
  * @req: the request need to be listed into the engine queue
  */
  */
-int crypto_transfer_request_to_engine(struct crypto_engine *engine,
-				      struct ablkcipher_request *req)
+int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine,
+					   struct ahash_request *req)
 {
 {
-	return crypto_transfer_request(engine, req, true);
+	return crypto_transfer_hash_request(engine, req, true);
 }
 }
-EXPORT_SYMBOL_GPL(crypto_transfer_request_to_engine);
+EXPORT_SYMBOL_GPL(crypto_transfer_hash_request_to_engine);
 
 
 /**
 /**
- * crypto_finalize_request - finalize one request if the request is done
+ * crypto_finalize_cipher_request - finalize one request if the request is done
  * @engine: the hardware engine
  * @engine: the hardware engine
  * @req: the request need to be finalized
  * @req: the request need to be finalized
  * @err: error number
  * @err: error number
  */
  */
-void crypto_finalize_request(struct crypto_engine *engine,
-			     struct ablkcipher_request *req, int err)
+void crypto_finalize_cipher_request(struct crypto_engine *engine,
+				    struct ablkcipher_request *req, int err)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 	bool finalize_cur_req = false;
 	bool finalize_cur_req = false;
 	int ret;
 	int ret;
 
 
 	spin_lock_irqsave(&engine->queue_lock, flags);
 	spin_lock_irqsave(&engine->queue_lock, flags);
-	if (engine->cur_req == req)
+	if (engine->cur_req == &req->base)
 		finalize_cur_req = true;
 		finalize_cur_req = true;
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 	spin_unlock_irqrestore(&engine->queue_lock, flags);
 
 
 	if (finalize_cur_req) {
 	if (finalize_cur_req) {
-		if (engine->cur_req_prepared && engine->unprepare_request) {
-			ret = engine->unprepare_request(engine, req);
+		if (engine->cur_req_prepared &&
+		    engine->unprepare_cipher_request) {
+			ret = engine->unprepare_cipher_request(engine, req);
 			if (ret)
 			if (ret)
 				pr_err("failed to unprepare request\n");
 				pr_err("failed to unprepare request\n");
 		}
 		}
+		spin_lock_irqsave(&engine->queue_lock, flags);
+		engine->cur_req = NULL;
+		engine->cur_req_prepared = false;
+		spin_unlock_irqrestore(&engine->queue_lock, flags);
+	}
+
+	req->base.complete(&req->base, err);
 
 
+	queue_kthread_work(&engine->kworker, &engine->pump_requests);
+}
+EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
+
+/**
+ * crypto_finalize_hash_request - finalize one request if the request is done
+ * @engine: the hardware engine
+ * @req: the request need to be finalized
+ * @err: error number
+ */
+void crypto_finalize_hash_request(struct crypto_engine *engine,
+				  struct ahash_request *req, int err)
+{
+	unsigned long flags;
+	bool finalize_cur_req = false;
+	int ret;
+
+	spin_lock_irqsave(&engine->queue_lock, flags);
+	if (engine->cur_req == &req->base)
+		finalize_cur_req = true;
+	spin_unlock_irqrestore(&engine->queue_lock, flags);
+
+	if (finalize_cur_req) {
+		if (engine->cur_req_prepared &&
+		    engine->unprepare_hash_request) {
+			ret = engine->unprepare_hash_request(engine, req);
+			if (ret)
+				pr_err("failed to unprepare request\n");
+		}
 		spin_lock_irqsave(&engine->queue_lock, flags);
 		spin_lock_irqsave(&engine->queue_lock, flags);
 		engine->cur_req = NULL;
 		engine->cur_req = NULL;
 		engine->cur_req_prepared = false;
 		engine->cur_req_prepared = false;
@@ -212,7 +323,7 @@ void crypto_finalize_request(struct crypto_engine *engine,
 
 
 	queue_kthread_work(&engine->kworker, &engine->pump_requests);
 	queue_kthread_work(&engine->kworker, &engine->pump_requests);
 }
 }
-EXPORT_SYMBOL_GPL(crypto_finalize_request);
+EXPORT_SYMBOL_GPL(crypto_finalize_hash_request);
 
 
 /**
 /**
  * crypto_engine_start - start the hardware engine
  * crypto_engine_start - start the hardware engine
@@ -249,7 +360,7 @@ EXPORT_SYMBOL_GPL(crypto_engine_start);
 int crypto_engine_stop(struct crypto_engine *engine)
 int crypto_engine_stop(struct crypto_engine *engine)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
-	unsigned limit = 500;
+	unsigned int limit = 500;
 	int ret = 0;
 	int ret = 0;
 
 
 	spin_lock_irqsave(&engine->queue_lock, flags);
 	spin_lock_irqsave(&engine->queue_lock, flags);

+ 15 - 16
crypto/drbg.c

@@ -1178,12 +1178,16 @@ static inline int drbg_alloc_state(struct drbg_state *drbg)
 		goto err;
 		goto err;
 
 
 	drbg->Vbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL);
 	drbg->Vbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL);
-	if (!drbg->Vbuf)
+	if (!drbg->Vbuf) {
+		ret = -ENOMEM;
 		goto fini;
 		goto fini;
+	}
 	drbg->V = PTR_ALIGN(drbg->Vbuf, ret + 1);
 	drbg->V = PTR_ALIGN(drbg->Vbuf, ret + 1);
 	drbg->Cbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL);
 	drbg->Cbuf = kmalloc(drbg_statelen(drbg) + ret, GFP_KERNEL);
-	if (!drbg->Cbuf)
+	if (!drbg->Cbuf) {
+		ret = -ENOMEM;
 		goto fini;
 		goto fini;
+	}
 	drbg->C = PTR_ALIGN(drbg->Cbuf, ret + 1);
 	drbg->C = PTR_ALIGN(drbg->Cbuf, ret + 1);
 	/* scratchpad is only generated for CTR and Hash */
 	/* scratchpad is only generated for CTR and Hash */
 	if (drbg->core->flags & DRBG_HMAC)
 	if (drbg->core->flags & DRBG_HMAC)
@@ -1199,8 +1203,10 @@ static inline int drbg_alloc_state(struct drbg_state *drbg)
 
 
 	if (0 < sb_size) {
 	if (0 < sb_size) {
 		drbg->scratchpadbuf = kzalloc(sb_size + ret, GFP_KERNEL);
 		drbg->scratchpadbuf = kzalloc(sb_size + ret, GFP_KERNEL);
-		if (!drbg->scratchpadbuf)
+		if (!drbg->scratchpadbuf) {
+			ret = -ENOMEM;
 			goto fini;
 			goto fini;
+		}
 		drbg->scratchpad = PTR_ALIGN(drbg->scratchpadbuf, ret + 1);
 		drbg->scratchpad = PTR_ALIGN(drbg->scratchpadbuf, ret + 1);
 	}
 	}
 
 
@@ -1917,6 +1923,8 @@ static inline int __init drbg_healthcheck_sanity(void)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
 	mutex_init(&drbg->drbg_mutex);
 	mutex_init(&drbg->drbg_mutex);
+	drbg->core = &drbg_cores[coreref];
+	drbg->reseed_threshold = drbg_max_requests(drbg);
 
 
 	/*
 	/*
 	 * if the following tests fail, it is likely that there is a buffer
 	 * if the following tests fail, it is likely that there is a buffer
@@ -1926,12 +1934,6 @@ static inline int __init drbg_healthcheck_sanity(void)
 	 * grave bug.
 	 * grave bug.
 	 */
 	 */
 
 
-	/* get a valid instance of DRBG for following tests */
-	ret = drbg_instantiate(drbg, NULL, coreref, pr);
-	if (ret) {
-		rc = ret;
-		goto outbuf;
-	}
 	max_addtllen = drbg_max_addtl(drbg);
 	max_addtllen = drbg_max_addtl(drbg);
 	max_request_bytes = drbg_max_request_bytes(drbg);
 	max_request_bytes = drbg_max_request_bytes(drbg);
 	drbg_string_fill(&addtl, buf, max_addtllen + 1);
 	drbg_string_fill(&addtl, buf, max_addtllen + 1);
@@ -1941,10 +1943,9 @@ static inline int __init drbg_healthcheck_sanity(void)
 	/* overflow max_bits */
 	/* overflow max_bits */
 	len = drbg_generate(drbg, buf, (max_request_bytes + 1), NULL);
 	len = drbg_generate(drbg, buf, (max_request_bytes + 1), NULL);
 	BUG_ON(0 < len);
 	BUG_ON(0 < len);
-	drbg_uninstantiate(drbg);
 
 
 	/* overflow max addtllen with personalization string */
 	/* overflow max addtllen with personalization string */
-	ret = drbg_instantiate(drbg, &addtl, coreref, pr);
+	ret = drbg_seed(drbg, &addtl, false);
 	BUG_ON(0 == ret);
 	BUG_ON(0 == ret);
 	/* all tests passed */
 	/* all tests passed */
 	rc = 0;
 	rc = 0;
@@ -1952,9 +1953,7 @@ static inline int __init drbg_healthcheck_sanity(void)
 	pr_devel("DRBG: Sanity tests for failure code paths successfully "
 	pr_devel("DRBG: Sanity tests for failure code paths successfully "
 		 "completed\n");
 		 "completed\n");
 
 
-	drbg_uninstantiate(drbg);
-outbuf:
-	kzfree(drbg);
+	kfree(drbg);
 	return rc;
 	return rc;
 }
 }
 
 
@@ -2006,7 +2005,7 @@ static int __init drbg_init(void)
 {
 {
 	unsigned int i = 0; /* pointer to drbg_algs */
 	unsigned int i = 0; /* pointer to drbg_algs */
 	unsigned int j = 0; /* pointer to drbg_cores */
 	unsigned int j = 0; /* pointer to drbg_cores */
-	int ret = -EFAULT;
+	int ret;
 
 
 	ret = drbg_healthcheck_sanity();
 	ret = drbg_healthcheck_sanity();
 	if (ret)
 	if (ret)
@@ -2016,7 +2015,7 @@ static int __init drbg_init(void)
 		pr_info("DRBG: Cannot register all DRBG types"
 		pr_info("DRBG: Cannot register all DRBG types"
 			"(slots needed: %zu, slots available: %zu)\n",
 			"(slots needed: %zu, slots available: %zu)\n",
 			ARRAY_SIZE(drbg_cores) * 2, ARRAY_SIZE(drbg_algs));
 			ARRAY_SIZE(drbg_cores) * 2, ARRAY_SIZE(drbg_algs));
-		return ret;
+		return -EFAULT;
 	}
 	}
 
 
 	/*
 	/*

+ 1 - 1
crypto/gcm.c

@@ -117,7 +117,7 @@ static int crypto_gcm_setkey(struct crypto_aead *aead, const u8 *key,
 	struct crypto_skcipher *ctr = ctx->ctr;
 	struct crypto_skcipher *ctr = ctx->ctr;
 	struct {
 	struct {
 		be128 hash;
 		be128 hash;
-		u8 iv[8];
+		u8 iv[16];
 
 
 		struct crypto_gcm_setkey_result result;
 		struct crypto_gcm_setkey_result result;
 
 

+ 1 - 12
crypto/ghash-generic.c

@@ -14,24 +14,13 @@
 
 
 #include <crypto/algapi.h>
 #include <crypto/algapi.h>
 #include <crypto/gf128mul.h>
 #include <crypto/gf128mul.h>
+#include <crypto/ghash.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/module.h>
 
 
-#define GHASH_BLOCK_SIZE	16
-#define GHASH_DIGEST_SIZE	16
-
-struct ghash_ctx {
-	struct gf128mul_4k *gf128;
-};
-
-struct ghash_desc_ctx {
-	u8 buffer[GHASH_BLOCK_SIZE];
-	u32 bytes;
-};
-
 static int ghash_init(struct shash_desc *desc)
 static int ghash_init(struct shash_desc *desc)
 {
 {
 	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
 	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);

+ 1 - 6
crypto/mcryptd.c

@@ -612,12 +612,7 @@ EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
 
 
 int ahash_mcryptd_digest(struct ahash_request *desc)
 int ahash_mcryptd_digest(struct ahash_request *desc)
 {
 {
-	int err;
-
-	err = crypto_ahash_init(desc) ?:
-	      ahash_mcryptd_finup(desc);
-
-	return err;
+	return crypto_ahash_init(desc) ?: ahash_mcryptd_finup(desc);
 }
 }
 
 
 int ahash_mcryptd_update(struct ahash_request *desc)
 int ahash_mcryptd_update(struct ahash_request *desc)

+ 2 - 2
crypto/rsa_helper.c

@@ -35,8 +35,8 @@ int rsa_get_n(void *context, size_t hdrlen, unsigned char tag,
 			n_sz--;
 			n_sz--;
 		}
 		}
 
 
-		/* In FIPS mode only allow key size 2K & 3K */
-		if (n_sz != 256 && n_sz != 384) {
+		/* In FIPS mode only allow key size 2K and higher */
+		if (n_sz < 256) {
 			pr_err("RSA: key size not allowed in FIPS mode\n");
 			pr_err("RSA: key size not allowed in FIPS mode\n");
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}

+ 18 - 6
crypto/testmgr.c

@@ -209,16 +209,19 @@ static int ahash_partial_update(struct ahash_request **preq,
 	char *state;
 	char *state;
 	struct ahash_request *req;
 	struct ahash_request *req;
 	int statesize, ret = -EINVAL;
 	int statesize, ret = -EINVAL;
+	const char guard[] = { 0x00, 0xba, 0xad, 0x00 };
 
 
 	req = *preq;
 	req = *preq;
 	statesize = crypto_ahash_statesize(
 	statesize = crypto_ahash_statesize(
 			crypto_ahash_reqtfm(req));
 			crypto_ahash_reqtfm(req));
-	state = kmalloc(statesize, GFP_KERNEL);
+	state = kmalloc(statesize + sizeof(guard), GFP_KERNEL);
 	if (!state) {
 	if (!state) {
 		pr_err("alt: hash: Failed to alloc state for %s\n", algo);
 		pr_err("alt: hash: Failed to alloc state for %s\n", algo);
 		goto out_nostate;
 		goto out_nostate;
 	}
 	}
+	memcpy(state + statesize, guard, sizeof(guard));
 	ret = crypto_ahash_export(req, state);
 	ret = crypto_ahash_export(req, state);
+	WARN_ON(memcmp(state + statesize, guard, sizeof(guard)));
 	if (ret) {
 	if (ret) {
 		pr_err("alt: hash: Failed to export() for %s\n", algo);
 		pr_err("alt: hash: Failed to export() for %s\n", algo);
 		goto out;
 		goto out;
@@ -665,7 +668,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
 		memcpy(key, template[i].key, template[i].klen);
 		memcpy(key, template[i].key, template[i].klen);
 
 
 		ret = crypto_aead_setkey(tfm, key, template[i].klen);
 		ret = crypto_aead_setkey(tfm, key, template[i].klen);
-		if (!ret == template[i].fail) {
+		if (template[i].fail == !ret) {
 			pr_err("alg: aead%s: setkey failed on test %d for %s: flags=%x\n",
 			pr_err("alg: aead%s: setkey failed on test %d for %s: flags=%x\n",
 			       d, j, algo, crypto_aead_get_flags(tfm));
 			       d, j, algo, crypto_aead_get_flags(tfm));
 			goto out;
 			goto out;
@@ -770,7 +773,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
 		memcpy(key, template[i].key, template[i].klen);
 		memcpy(key, template[i].key, template[i].klen);
 
 
 		ret = crypto_aead_setkey(tfm, key, template[i].klen);
 		ret = crypto_aead_setkey(tfm, key, template[i].klen);
-		if (!ret == template[i].fail) {
+		if (template[i].fail == !ret) {
 			pr_err("alg: aead%s: setkey failed on chunk test %d for %s: flags=%x\n",
 			pr_err("alg: aead%s: setkey failed on chunk test %d for %s: flags=%x\n",
 			       d, j, algo, crypto_aead_get_flags(tfm));
 			       d, j, algo, crypto_aead_get_flags(tfm));
 			goto out;
 			goto out;
@@ -1008,6 +1011,9 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
 		if (template[i].np)
 		if (template[i].np)
 			continue;
 			continue;
 
 
+		if (fips_enabled && template[i].fips_skip)
+			continue;
+
 		j++;
 		j++;
 
 
 		ret = -EINVAL;
 		ret = -EINVAL;
@@ -1023,7 +1029,7 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
 
 
 		ret = crypto_cipher_setkey(tfm, template[i].key,
 		ret = crypto_cipher_setkey(tfm, template[i].key,
 					   template[i].klen);
 					   template[i].klen);
-		if (!ret == template[i].fail) {
+		if (template[i].fail == !ret) {
 			printk(KERN_ERR "alg: cipher: setkey failed "
 			printk(KERN_ERR "alg: cipher: setkey failed "
 			       "on test %d for %s: flags=%x\n", j,
 			       "on test %d for %s: flags=%x\n", j,
 			       algo, crypto_cipher_get_flags(tfm));
 			       algo, crypto_cipher_get_flags(tfm));
@@ -1112,6 +1118,9 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
 		if (template[i].np && !template[i].also_non_np)
 		if (template[i].np && !template[i].also_non_np)
 			continue;
 			continue;
 
 
+		if (fips_enabled && template[i].fips_skip)
+			continue;
+
 		if (template[i].iv)
 		if (template[i].iv)
 			memcpy(iv, template[i].iv, ivsize);
 			memcpy(iv, template[i].iv, ivsize);
 		else
 		else
@@ -1133,7 +1142,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
 
 
 		ret = crypto_skcipher_setkey(tfm, template[i].key,
 		ret = crypto_skcipher_setkey(tfm, template[i].key,
 					     template[i].klen);
 					     template[i].klen);
-		if (!ret == template[i].fail) {
+		if (template[i].fail == !ret) {
 			pr_err("alg: skcipher%s: setkey failed on test %d for %s: flags=%x\n",
 			pr_err("alg: skcipher%s: setkey failed on test %d for %s: flags=%x\n",
 			       d, j, algo, crypto_skcipher_get_flags(tfm));
 			       d, j, algo, crypto_skcipher_get_flags(tfm));
 			goto out;
 			goto out;
@@ -1198,6 +1207,9 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
 		if (!template[i].np)
 		if (!template[i].np)
 			continue;
 			continue;
 
 
+		if (fips_enabled && template[i].fips_skip)
+			continue;
+
 		if (template[i].iv)
 		if (template[i].iv)
 			memcpy(iv, template[i].iv, ivsize);
 			memcpy(iv, template[i].iv, ivsize);
 		else
 		else
@@ -1211,7 +1223,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
 
 
 		ret = crypto_skcipher_setkey(tfm, template[i].key,
 		ret = crypto_skcipher_setkey(tfm, template[i].key,
 					     template[i].klen);
 					     template[i].klen);
-		if (!ret == template[i].fail) {
+		if (template[i].fail == !ret) {
 			pr_err("alg: skcipher%s: setkey failed on chunk test %d for %s: flags=%x\n",
 			pr_err("alg: skcipher%s: setkey failed on chunk test %d for %s: flags=%x\n",
 			       d, j, algo, crypto_skcipher_get_flags(tfm));
 			       d, j, algo, crypto_skcipher_get_flags(tfm));
 			goto out;
 			goto out;

+ 4 - 0
crypto/testmgr.h

@@ -59,6 +59,7 @@ struct hash_testvec {
  * @tap:	How to distribute data in @np SGs
  * @tap:	How to distribute data in @np SGs
  * @also_non_np: 	if set to 1, the test will be also done without
  * @also_non_np: 	if set to 1, the test will be also done without
  * 			splitting data in @np SGs
  * 			splitting data in @np SGs
+ * @fips_skip:	Skip the test vector in FIPS mode
  */
  */
 
 
 struct cipher_testvec {
 struct cipher_testvec {
@@ -75,6 +76,7 @@ struct cipher_testvec {
 	unsigned char klen;
 	unsigned char klen;
 	unsigned short ilen;
 	unsigned short ilen;
 	unsigned short rlen;
 	unsigned short rlen;
+	bool fips_skip;
 };
 };
 
 
 struct aead_testvec {
 struct aead_testvec {
@@ -18224,6 +18226,7 @@ static struct cipher_testvec aes_xts_enc_tv_template[] = {
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen   = 32,
 		.klen   = 32,
+		.fips_skip = 1,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.input  = "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.input  = "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -18566,6 +18569,7 @@ static struct cipher_testvec aes_xts_dec_tv_template[] = {
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen   = 32,
 		.klen   = 32,
+		.fips_skip = 1,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.input = "\x91\x7c\xf6\x9e\xbd\x68\xb2\xec"
 		.input = "\x91\x7c\xf6\x9e\xbd\x68\xb2\xec"

+ 20 - 21
crypto/xor.c

@@ -24,6 +24,10 @@
 #include <linux/preempt.h>
 #include <linux/preempt.h>
 #include <asm/xor.h>
 #include <asm/xor.h>
 
 
+#ifndef XOR_SELECT_TEMPLATE
+#define XOR_SELECT_TEMPLATE(x) (x)
+#endif
+
 /* The xor routines to use.  */
 /* The xor routines to use.  */
 static struct xor_block_template *active_template;
 static struct xor_block_template *active_template;
 
 
@@ -109,6 +113,15 @@ calibrate_xor_blocks(void)
 	void *b1, *b2;
 	void *b1, *b2;
 	struct xor_block_template *f, *fastest;
 	struct xor_block_template *f, *fastest;
 
 
+	fastest = XOR_SELECT_TEMPLATE(NULL);
+
+	if (fastest) {
+		printk(KERN_INFO "xor: automatically using best "
+				 "checksumming function   %-10s\n",
+		       fastest->name);
+		goto out;
+	}
+
 	/*
 	/*
 	 * Note: Since the memory is not actually used for _anything_ but to
 	 * Note: Since the memory is not actually used for _anything_ but to
 	 * test the XOR speed, we don't really want kmemcheck to warn about
 	 * test the XOR speed, we don't really want kmemcheck to warn about
@@ -126,36 +139,22 @@ calibrate_xor_blocks(void)
 	 * all the possible functions, just test the best one
 	 * all the possible functions, just test the best one
 	 */
 	 */
 
 
-	fastest = NULL;
-
-#ifdef XOR_SELECT_TEMPLATE
-		fastest = XOR_SELECT_TEMPLATE(fastest);
-#endif
-
 #define xor_speed(templ)	do_xor_speed((templ), b1, b2)
 #define xor_speed(templ)	do_xor_speed((templ), b1, b2)
 
 
-	if (fastest) {
-		printk(KERN_INFO "xor: automatically using best "
-				 "checksumming function:\n");
-		xor_speed(fastest);
-		goto out;
-	} else {
-		printk(KERN_INFO "xor: measuring software checksum speed\n");
-		XOR_TRY_TEMPLATES;
-		fastest = template_list;
-		for (f = fastest; f; f = f->next)
-			if (f->speed > fastest->speed)
-				fastest = f;
-	}
+	printk(KERN_INFO "xor: measuring software checksum speed\n");
+	XOR_TRY_TEMPLATES;
+	fastest = template_list;
+	for (f = fastest; f; f = f->next)
+		if (f->speed > fastest->speed)
+			fastest = f;
 
 
 	printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n",
 	printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n",
 	       fastest->name, fastest->speed / 1000, fastest->speed % 1000);
 	       fastest->name, fastest->speed / 1000, fastest->speed % 1000);
 
 
 #undef xor_speed
 #undef xor_speed
 
 
- out:
 	free_pages((unsigned long)b1, 2);
 	free_pages((unsigned long)b1, 2);
-
+out:
 	active_template = fastest;
 	active_template = fastest;
 	return 0;
 	return 0;
 }
 }

+ 1 - 1
crypto/xts.c

@@ -5,7 +5,7 @@
  *
  *
  * Copyright (c) 2007 Rik Snel <rsnel@cube.dyndns.org>
  * Copyright (c) 2007 Rik Snel <rsnel@cube.dyndns.org>
  *
  *
- * Based om ecb.c
+ * Based on ecb.c
  * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
  * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
  *
  *
  * This program is free software; you can redistribute it and/or modify it
  * This program is free software; you can redistribute it and/or modify it

+ 13 - 0
drivers/char/hw_random/Kconfig

@@ -410,6 +410,19 @@ config HW_RANDOM_MESON
 
 
 	  If unsure, say Y.
 	  If unsure, say Y.
 
 
+config HW_RANDOM_CAVIUM
+       tristate "Cavium ThunderX Random Number Generator support"
+       depends on HW_RANDOM && PCI && (ARM64 || (COMPILE_TEST && 64BIT))
+       default HW_RANDOM
+       ---help---
+         This driver provides kernel-side support for the Random Number
+         Generator hardware found on Cavium SoCs.
+
+         To compile this driver as a module, choose M here: the
+         module will be called cavium_rng.
+
+         If unsure, say Y.
+
 endif # HW_RANDOM
 endif # HW_RANDOM
 
 
 config UML_RANDOM
 config UML_RANDOM

+ 1 - 0
drivers/char/hw_random/Makefile

@@ -35,3 +35,4 @@ obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o
 obj-$(CONFIG_HW_RANDOM_STM32) += stm32-rng.o
 obj-$(CONFIG_HW_RANDOM_STM32) += stm32-rng.o
 obj-$(CONFIG_HW_RANDOM_PIC32) += pic32-rng.o
 obj-$(CONFIG_HW_RANDOM_PIC32) += pic32-rng.o
 obj-$(CONFIG_HW_RANDOM_MESON) += meson-rng.o
 obj-$(CONFIG_HW_RANDOM_MESON) += meson-rng.o
+obj-$(CONFIG_HW_RANDOM_CAVIUM) += cavium-rng.o cavium-rng-vf.o

+ 78 - 62
drivers/char/hw_random/amd-rng.c

@@ -24,16 +24,18 @@
  * warranty of any kind, whether express or implied.
  * warranty of any kind, whether express or implied.
  */
  */
 
 
-#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/hw_random.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/pci.h>
-#include <linux/hw_random.h>
-#include <linux/delay.h>
-#include <asm/io.h>
 
 
+#define DRV_NAME "AMD768-HWRNG"
 
 
-#define PFX	KBUILD_MODNAME ": "
-
+#define RNGDATA		0x00
+#define RNGDONE		0x04
+#define PMBASE_OFFSET	0xF0
+#define PMBASE_SIZE	8
 
 
 /*
 /*
  * Data for PCI driver interface
  * Data for PCI driver interface
@@ -50,72 +52,84 @@ static const struct pci_device_id pci_tbl[] = {
 };
 };
 MODULE_DEVICE_TABLE(pci, pci_tbl);
 MODULE_DEVICE_TABLE(pci, pci_tbl);
 
 
-static struct pci_dev *amd_pdev;
-
+struct amd768_priv {
+	void __iomem *iobase;
+	struct pci_dev *pcidev;
+};
 
 
-static int amd_rng_data_present(struct hwrng *rng, int wait)
+static int amd_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
 {
 {
-	u32 pmbase = (u32)rng->priv;
-	int data, i;
-
-	for (i = 0; i < 20; i++) {
-		data = !!(inl(pmbase + 0xF4) & 1);
-		if (data || !wait)
-			break;
-		udelay(10);
+	u32 *data = buf;
+	struct amd768_priv *priv = (struct amd768_priv *)rng->priv;
+	size_t read = 0;
+	/* We will wait at maximum one time per read */
+	int timeout = max / 4 + 1;
+
+	/*
+	 * RNG data is available when RNGDONE is set to 1
+	 * New random numbers are generated approximately 128 microseconds
+	 * after RNGDATA is read
+	 */
+	while (read < max) {
+		if (ioread32(priv->iobase + RNGDONE) == 0) {
+			if (wait) {
+				/* Delay given by datasheet */
+				usleep_range(128, 196);
+				if (timeout-- == 0)
+					return read;
+			} else {
+				return 0;
+			}
+		} else {
+			*data = ioread32(priv->iobase + RNGDATA);
+			data++;
+			read += 4;
+		}
 	}
 	}
-	return data;
-}
 
 
-static int amd_rng_data_read(struct hwrng *rng, u32 *data)
-{
-	u32 pmbase = (u32)rng->priv;
-
-	*data = inl(pmbase + 0xF0);
-
-	return 4;
+	return read;
 }
 }
 
 
 static int amd_rng_init(struct hwrng *rng)
 static int amd_rng_init(struct hwrng *rng)
 {
 {
+	struct amd768_priv *priv = (struct amd768_priv *)rng->priv;
 	u8 rnen;
 	u8 rnen;
 
 
-	pci_read_config_byte(amd_pdev, 0x40, &rnen);
-	rnen |= (1 << 7);	/* RNG on */
-	pci_write_config_byte(amd_pdev, 0x40, rnen);
+	pci_read_config_byte(priv->pcidev, 0x40, &rnen);
+	rnen |= BIT(7);	/* RNG on */
+	pci_write_config_byte(priv->pcidev, 0x40, rnen);
 
 
-	pci_read_config_byte(amd_pdev, 0x41, &rnen);
-	rnen |= (1 << 7);	/* PMIO enable */
-	pci_write_config_byte(amd_pdev, 0x41, rnen);
+	pci_read_config_byte(priv->pcidev, 0x41, &rnen);
+	rnen |= BIT(7);	/* PMIO enable */
+	pci_write_config_byte(priv->pcidev, 0x41, rnen);
 
 
 	return 0;
 	return 0;
 }
 }
 
 
 static void amd_rng_cleanup(struct hwrng *rng)
 static void amd_rng_cleanup(struct hwrng *rng)
 {
 {
+	struct amd768_priv *priv = (struct amd768_priv *)rng->priv;
 	u8 rnen;
 	u8 rnen;
 
 
-	pci_read_config_byte(amd_pdev, 0x40, &rnen);
-	rnen &= ~(1 << 7);	/* RNG off */
-	pci_write_config_byte(amd_pdev, 0x40, rnen);
+	pci_read_config_byte(priv->pcidev, 0x40, &rnen);
+	rnen &= ~BIT(7);	/* RNG off */
+	pci_write_config_byte(priv->pcidev, 0x40, rnen);
 }
 }
 
 
-
 static struct hwrng amd_rng = {
 static struct hwrng amd_rng = {
 	.name		= "amd",
 	.name		= "amd",
 	.init		= amd_rng_init,
 	.init		= amd_rng_init,
 	.cleanup	= amd_rng_cleanup,
 	.cleanup	= amd_rng_cleanup,
-	.data_present	= amd_rng_data_present,
-	.data_read	= amd_rng_data_read,
+	.read		= amd_rng_read,
 };
 };
 
 
-
 static int __init mod_init(void)
 static int __init mod_init(void)
 {
 {
 	int err = -ENODEV;
 	int err = -ENODEV;
 	struct pci_dev *pdev = NULL;
 	struct pci_dev *pdev = NULL;
 	const struct pci_device_id *ent;
 	const struct pci_device_id *ent;
 	u32 pmbase;
 	u32 pmbase;
+	struct amd768_priv *priv;
 
 
 	for_each_pci_dev(pdev) {
 	for_each_pci_dev(pdev) {
 		ent = pci_match_id(pci_tbl, pdev);
 		ent = pci_match_id(pci_tbl, pdev);
@@ -123,42 +137,44 @@ static int __init mod_init(void)
 			goto found;
 			goto found;
 	}
 	}
 	/* Device not found. */
 	/* Device not found. */
-	goto out;
+	return -ENODEV;
 
 
 found:
 found:
 	err = pci_read_config_dword(pdev, 0x58, &pmbase);
 	err = pci_read_config_dword(pdev, 0x58, &pmbase);
 	if (err)
 	if (err)
-		goto out;
-	err = -EIO;
+		return err;
+
 	pmbase &= 0x0000FF00;
 	pmbase &= 0x0000FF00;
 	if (pmbase == 0)
 	if (pmbase == 0)
-		goto out;
-	if (!request_region(pmbase + 0xF0, 8, "AMD HWRNG")) {
-		dev_err(&pdev->dev, "AMD HWRNG region 0x%x already in use!\n",
+		return -EIO;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	if (!devm_request_region(&pdev->dev, pmbase + PMBASE_OFFSET,
+				PMBASE_SIZE, DRV_NAME)) {
+		dev_err(&pdev->dev, DRV_NAME " region 0x%x already in use!\n",
 			pmbase + 0xF0);
 			pmbase + 0xF0);
-		err = -EBUSY;
-		goto out;
+		return -EBUSY;
 	}
 	}
-	amd_rng.priv = (unsigned long)pmbase;
-	amd_pdev = pdev;
-
-	pr_info("AMD768 RNG detected\n");
-	err = hwrng_register(&amd_rng);
-	if (err) {
-		pr_err(PFX "RNG registering failed (%d)\n",
-		       err);
-		release_region(pmbase + 0xF0, 8);
-		goto out;
+
+	priv->iobase = devm_ioport_map(&pdev->dev, pmbase + PMBASE_OFFSET,
+			PMBASE_SIZE);
+	if (!priv->iobase) {
+		pr_err(DRV_NAME "Cannot map ioport\n");
+		return -ENOMEM;
 	}
 	}
-out:
-	return err;
+
+	amd_rng.priv = (unsigned long)priv;
+	priv->pcidev = pdev;
+
+	pr_info(DRV_NAME " detected\n");
+	return devm_hwrng_register(&pdev->dev, &amd_rng);
 }
 }
 
 
 static void __exit mod_exit(void)
 static void __exit mod_exit(void)
 {
 {
-	u32 pmbase = (unsigned long)amd_rng.priv;
-	release_region(pmbase + 0xF0, 8);
-	hwrng_unregister(&amd_rng);
 }
 }
 
 
 module_init(mod_init);
 module_init(mod_init);

+ 3 - 2
drivers/char/hw_random/bcm2835-rng.c

@@ -92,9 +92,10 @@ static int bcm2835_rng_probe(struct platform_device *pdev)
 	bcm2835_rng_ops.priv = (unsigned long)rng_base;
 	bcm2835_rng_ops.priv = (unsigned long)rng_base;
 
 
 	rng_id = of_match_node(bcm2835_rng_of_match, np);
 	rng_id = of_match_node(bcm2835_rng_of_match, np);
-	if (!rng_id)
+	if (!rng_id) {
+		iounmap(rng_base);
 		return -EINVAL;
 		return -EINVAL;
-
+	}
 	/* Check for rng init function, execute it */
 	/* Check for rng init function, execute it */
 	rng_setup = rng_id->data;
 	rng_setup = rng_id->data;
 	if (rng_setup)
 	if (rng_setup)

+ 99 - 0
drivers/char/hw_random/cavium-rng-vf.c

@@ -0,0 +1,99 @@
+/*
+ * Hardware Random Number Generator support for Cavium, Inc.
+ * Thunder processor family.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2016 Cavium, Inc.
+ */
+
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+struct cavium_rng {
+	struct hwrng ops;
+	void __iomem *result;
+};
+
+/* Read data from the RNG unit */
+static int cavium_rng_read(struct hwrng *rng, void *dat, size_t max, bool wait)
+{
+	struct cavium_rng *p = container_of(rng, struct cavium_rng, ops);
+	unsigned int size = max;
+
+	while (size >= 8) {
+		*((u64 *)dat) = readq(p->result);
+		size -= 8;
+		dat += 8;
+	}
+	while (size > 0) {
+		*((u8 *)dat) = readb(p->result);
+		size--;
+		dat++;
+	}
+	return max;
+}
+
+/* Map Cavium RNG to an HWRNG object */
+static int cavium_rng_probe_vf(struct	pci_dev		*pdev,
+			 const struct	pci_device_id	*id)
+{
+	struct	cavium_rng *rng;
+	int	ret;
+
+	rng = devm_kzalloc(&pdev->dev, sizeof(*rng), GFP_KERNEL);
+	if (!rng)
+		return -ENOMEM;
+
+	/* Map the RNG result */
+	rng->result = pcim_iomap(pdev, 0, 0);
+	if (!rng->result) {
+		dev_err(&pdev->dev, "Error iomap failed retrieving result.\n");
+		return -ENOMEM;
+	}
+
+	rng->ops.name    = "cavium rng";
+	rng->ops.read    = cavium_rng_read;
+	rng->ops.quality = 1000;
+
+	pci_set_drvdata(pdev, rng);
+
+	ret = hwrng_register(&rng->ops);
+	if (ret) {
+		dev_err(&pdev->dev, "Error registering device as HWRNG.\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+/* Remove the VF */
+void  cavium_rng_remove_vf(struct pci_dev *pdev)
+{
+	struct cavium_rng *rng;
+
+	rng = pci_get_drvdata(pdev);
+	hwrng_unregister(&rng->ops);
+}
+
+static const struct pci_device_id cavium_rng_vf_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xa033), 0, 0, 0},
+	{0,},
+};
+MODULE_DEVICE_TABLE(pci, cavium_rng_vf_id_table);
+
+static struct pci_driver cavium_rng_vf_driver = {
+	.name		= "cavium_rng_vf",
+	.id_table	= cavium_rng_vf_id_table,
+	.probe		= cavium_rng_probe_vf,
+	.remove		= cavium_rng_remove_vf,
+};
+module_pci_driver(cavium_rng_vf_driver);
+
+MODULE_AUTHOR("Omer Khaliq <okhaliq@caviumnetworks.com>");
+MODULE_LICENSE("GPL");

+ 94 - 0
drivers/char/hw_random/cavium-rng.c

@@ -0,0 +1,94 @@
+/*
+ * Hardware Random Number Generator support for Cavium Inc.
+ * Thunder processor family.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2016 Cavium, Inc.
+ */
+
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+
+#define THUNDERX_RNM_ENT_EN     0x1
+#define THUNDERX_RNM_RNG_EN     0x2
+
+struct cavium_rng_pf {
+	void __iomem *control_status;
+};
+
+/* Enable the RNG hardware and activate the VF */
+static int cavium_rng_probe(struct pci_dev *pdev,
+			const struct pci_device_id *id)
+{
+	struct	cavium_rng_pf *rng;
+	int	iov_err;
+
+	rng = devm_kzalloc(&pdev->dev, sizeof(*rng), GFP_KERNEL);
+	if (!rng)
+		return -ENOMEM;
+
+	/*Map the RNG control */
+	rng->control_status = pcim_iomap(pdev, 0, 0);
+	if (!rng->control_status) {
+		dev_err(&pdev->dev,
+			"Error iomap failed retrieving control_status.\n");
+		return -ENOMEM;
+	}
+
+	/* Enable the RNG hardware and entropy source */
+	writeq(THUNDERX_RNM_RNG_EN | THUNDERX_RNM_ENT_EN,
+		rng->control_status);
+
+	pci_set_drvdata(pdev, rng);
+
+	/* Enable the Cavium RNG as a VF */
+	iov_err = pci_enable_sriov(pdev, 1);
+	if (iov_err != 0) {
+		/* Disable the RNG hardware and entropy source */
+		writeq(0, rng->control_status);
+		dev_err(&pdev->dev,
+			"Error initializing RNG virtual function,(%i).\n",
+			iov_err);
+		return iov_err;
+	}
+
+	return 0;
+}
+
+/* Disable VF and RNG Hardware */
+void  cavium_rng_remove(struct pci_dev *pdev)
+{
+	struct cavium_rng_pf *rng;
+
+	rng = pci_get_drvdata(pdev);
+
+	/* Remove the VF */
+	pci_disable_sriov(pdev);
+
+	/* Disable the RNG hardware and entropy source */
+	writeq(0, rng->control_status);
+}
+
+static const struct pci_device_id cavium_rng_pf_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xa018), 0, 0, 0}, /* Thunder RNM */
+	{0,},
+};
+
+MODULE_DEVICE_TABLE(pci, cavium_rng_pf_id_table);
+
+static struct pci_driver cavium_rng_pf_driver = {
+	.name		= "cavium_rng_pf",
+	.id_table	= cavium_rng_pf_id_table,
+	.probe		= cavium_rng_probe,
+	.remove		= cavium_rng_remove,
+};
+
+module_pci_driver(cavium_rng_pf_driver);
+MODULE_AUTHOR("Omer Khaliq <okhaliq@caviumnetworks.com>");
+MODULE_LICENSE("GPL");

+ 20 - 17
drivers/char/hw_random/core.c

@@ -449,22 +449,6 @@ int hwrng_register(struct hwrng *rng)
 		goto out;
 		goto out;
 
 
 	mutex_lock(&rng_mutex);
 	mutex_lock(&rng_mutex);
-
-	/* kmalloc makes this safe for virt_to_page() in virtio_rng.c */
-	err = -ENOMEM;
-	if (!rng_buffer) {
-		rng_buffer = kmalloc(rng_buffer_size(), GFP_KERNEL);
-		if (!rng_buffer)
-			goto out_unlock;
-	}
-	if (!rng_fillbuf) {
-		rng_fillbuf = kmalloc(rng_buffer_size(), GFP_KERNEL);
-		if (!rng_fillbuf) {
-			kfree(rng_buffer);
-			goto out_unlock;
-		}
-	}
-
 	/* Must not register two RNGs with the same name. */
 	/* Must not register two RNGs with the same name. */
 	err = -EEXIST;
 	err = -EEXIST;
 	list_for_each_entry(tmp, &rng_list, list) {
 	list_for_each_entry(tmp, &rng_list, list) {
@@ -573,7 +557,26 @@ EXPORT_SYMBOL_GPL(devm_hwrng_unregister);
 
 
 static int __init hwrng_modinit(void)
 static int __init hwrng_modinit(void)
 {
 {
-	return register_miscdev();
+	int ret = -ENOMEM;
+
+	/* kmalloc makes this safe for virt_to_page() in virtio_rng.c */
+	rng_buffer = kmalloc(rng_buffer_size(), GFP_KERNEL);
+	if (!rng_buffer)
+		return -ENOMEM;
+
+	rng_fillbuf = kmalloc(rng_buffer_size(), GFP_KERNEL);
+	if (!rng_fillbuf) {
+		kfree(rng_buffer);
+		return -ENOMEM;
+	}
+
+	ret = register_miscdev();
+	if (ret) {
+		kfree(rng_fillbuf);
+		kfree(rng_buffer);
+	}
+
+	return ret;
 }
 }
 
 
 static void __exit hwrng_modexit(void)
 static void __exit hwrng_modexit(void)

+ 19 - 39
drivers/char/hw_random/geode-rng.c

@@ -24,15 +24,12 @@
  * warranty of any kind, whether express or implied.
  * warranty of any kind, whether express or implied.
  */
  */
 
 
-#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/pci.h>
-#include <linux/hw_random.h>
-#include <linux/delay.h>
-#include <asm/io.h>
-
-
-#define PFX	KBUILD_MODNAME ": "
 
 
 #define GEODE_RNG_DATA_REG   0x50
 #define GEODE_RNG_DATA_REG   0x50
 #define GEODE_RNG_STATUS_REG 0x54
 #define GEODE_RNG_STATUS_REG 0x54
@@ -85,7 +82,6 @@ static struct hwrng geode_rng = {
 
 
 static int __init mod_init(void)
 static int __init mod_init(void)
 {
 {
-	int err = -ENODEV;
 	struct pci_dev *pdev = NULL;
 	struct pci_dev *pdev = NULL;
 	const struct pci_device_id *ent;
 	const struct pci_device_id *ent;
 	void __iomem *mem;
 	void __iomem *mem;
@@ -93,43 +89,27 @@ static int __init mod_init(void)
 
 
 	for_each_pci_dev(pdev) {
 	for_each_pci_dev(pdev) {
 		ent = pci_match_id(pci_tbl, pdev);
 		ent = pci_match_id(pci_tbl, pdev);
-		if (ent)
-			goto found;
-	}
-	/* Device not found. */
-	goto out;
-
-found:
-	rng_base = pci_resource_start(pdev, 0);
-	if (rng_base == 0)
-		goto out;
-	err = -ENOMEM;
-	mem = ioremap(rng_base, 0x58);
-	if (!mem)
-		goto out;
-	geode_rng.priv = (unsigned long)mem;
-
-	pr_info("AMD Geode RNG detected\n");
-	err = hwrng_register(&geode_rng);
-	if (err) {
-		pr_err(PFX "RNG registering failed (%d)\n",
-		       err);
-		goto err_unmap;
+		if (ent) {
+			rng_base = pci_resource_start(pdev, 0);
+			if (rng_base == 0)
+				return -ENODEV;
+
+			mem = devm_ioremap(&pdev->dev, rng_base, 0x58);
+			if (!mem)
+				return -ENOMEM;
+			geode_rng.priv = (unsigned long)mem;
+
+			pr_info("AMD Geode RNG detected\n");
+			return devm_hwrng_register(&pdev->dev, &geode_rng);
+		}
 	}
 	}
-out:
-	return err;
 
 
-err_unmap:
-	iounmap(mem);
-	goto out;
+	/* Device not found. */
+	return -ENODEV;
 }
 }
 
 
 static void __exit mod_exit(void)
 static void __exit mod_exit(void)
 {
 {
-	void __iomem *mem = (void __iomem *)geode_rng.priv;
-
-	hwrng_unregister(&geode_rng);
-	iounmap(mem);
 }
 }
 
 
 module_init(mod_init);
 module_init(mod_init);

+ 0 - 3
drivers/char/hw_random/meson-rng.c

@@ -76,9 +76,6 @@ static int meson_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
 	struct meson_rng_data *data =
 	struct meson_rng_data *data =
 			container_of(rng, struct meson_rng_data, rng);
 			container_of(rng, struct meson_rng_data, rng);
 
 
-	if (max < sizeof(u32))
-		return 0;
-
 	*(u32 *)buf = readl_relaxed(data->base + RNG_DATA);
 	*(u32 *)buf = readl_relaxed(data->base + RNG_DATA);
 
 
 	return sizeof(u32);
 	return sizeof(u32);

+ 2 - 2
drivers/char/hw_random/omap-rng.c

@@ -385,7 +385,7 @@ static int omap_rng_probe(struct platform_device *pdev)
 
 
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 	ret = pm_runtime_get_sync(&pdev->dev);
 	ret = pm_runtime_get_sync(&pdev->dev);
-	if (ret) {
+	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to runtime_get device: %d\n", ret);
 		dev_err(&pdev->dev, "Failed to runtime_get device: %d\n", ret);
 		pm_runtime_put_noidle(&pdev->dev);
 		pm_runtime_put_noidle(&pdev->dev);
 		goto err_ioremap;
 		goto err_ioremap;
@@ -443,7 +443,7 @@ static int __maybe_unused omap_rng_resume(struct device *dev)
 	int ret;
 	int ret;
 
 
 	ret = pm_runtime_get_sync(dev);
 	ret = pm_runtime_get_sync(dev);
-	if (ret) {
+	if (ret < 0) {
 		dev_err(dev, "Failed to runtime_get device: %d\n", ret);
 		dev_err(dev, "Failed to runtime_get device: %d\n", ret);
 		pm_runtime_put_noidle(dev);
 		pm_runtime_put_noidle(dev);
 		return ret;
 		return ret;

+ 2 - 8
drivers/char/hw_random/omap3-rom-rng.c

@@ -71,12 +71,7 @@ static int omap3_rom_rng_get_random(void *buf, unsigned int count)
 	return 0;
 	return 0;
 }
 }
 
 
-static int omap3_rom_rng_data_present(struct hwrng *rng, int wait)
-{
-	return 1;
-}
-
-static int omap3_rom_rng_data_read(struct hwrng *rng, u32 *data)
+static int omap3_rom_rng_read(struct hwrng *rng, void *data, size_t max, bool w)
 {
 {
 	int r;
 	int r;
 
 
@@ -88,8 +83,7 @@ static int omap3_rom_rng_data_read(struct hwrng *rng, u32 *data)
 
 
 static struct hwrng omap3_rom_rng_ops = {
 static struct hwrng omap3_rom_rng_ops = {
 	.name		= "omap3-rom",
 	.name		= "omap3-rom",
-	.data_present	= omap3_rom_rng_data_present,
-	.data_read	= omap3_rom_rng_data_read,
+	.read		= omap3_rom_rng_read,
 };
 };
 
 
 static int omap3_rom_rng_probe(struct platform_device *pdev)
 static int omap3_rom_rng_probe(struct platform_device *pdev)

+ 7 - 30
drivers/char/hw_random/pasemi-rng.c

@@ -95,42 +95,20 @@ static struct hwrng pasemi_rng = {
 	.data_read	= pasemi_rng_data_read,
 	.data_read	= pasemi_rng_data_read,
 };
 };
 
 
-static int rng_probe(struct platform_device *ofdev)
+static int rng_probe(struct platform_device *pdev)
 {
 {
 	void __iomem *rng_regs;
 	void __iomem *rng_regs;
-	struct device_node *rng_np = ofdev->dev.of_node;
-	struct resource res;
-	int err = 0;
+	struct resource *res;
 
 
-	err = of_address_to_resource(rng_np, 0, &res);
-	if (err)
-		return -ENODEV;
-
-	rng_regs = ioremap(res.start, 0x100);
-
-	if (!rng_regs)
-		return -ENOMEM;
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	rng_regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(rng_regs))
+		return PTR_ERR(rng_regs);
 
 
 	pasemi_rng.priv = (unsigned long)rng_regs;
 	pasemi_rng.priv = (unsigned long)rng_regs;
 
 
 	pr_info("Registering PA Semi RNG\n");
 	pr_info("Registering PA Semi RNG\n");
-
-	err = hwrng_register(&pasemi_rng);
-
-	if (err)
-		iounmap(rng_regs);
-
-	return err;
-}
-
-static int rng_remove(struct platform_device *dev)
-{
-	void __iomem *rng_regs = (void __iomem *)pasemi_rng.priv;
-
-	hwrng_unregister(&pasemi_rng);
-	iounmap(rng_regs);
-
-	return 0;
+	return devm_hwrng_register(&pdev->dev, &pasemi_rng);
 }
 }
 
 
 static const struct of_device_id rng_match[] = {
 static const struct of_device_id rng_match[] = {
@@ -146,7 +124,6 @@ static struct platform_driver rng_driver = {
 		.of_match_table = rng_match,
 		.of_match_table = rng_match,
 	},
 	},
 	.probe		= rng_probe,
 	.probe		= rng_probe,
-	.remove		= rng_remove,
 };
 };
 
 
 module_platform_driver(rng_driver);
 module_platform_driver(rng_driver);

+ 0 - 1
drivers/char/hw_random/pic32-rng.c

@@ -143,7 +143,6 @@ static struct platform_driver pic32_rng_driver = {
 	.remove		= pic32_rng_remove,
 	.remove		= pic32_rng_remove,
 	.driver		= {
 	.driver		= {
 		.name	= "pic32-rng",
 		.name	= "pic32-rng",
-		.owner	= THIS_MODULE,
 		.of_match_table = of_match_ptr(pic32_rng_of_match),
 		.of_match_table = of_match_ptr(pic32_rng_of_match),
 	},
 	},
 };
 };

+ 1 - 3
drivers/char/hw_random/st-rng.c

@@ -54,9 +54,6 @@ static int st_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 	u32 status;
 	u32 status;
 	int i;
 	int i;
 
 
-	if (max < sizeof(u16))
-		return -EINVAL;
-
 	/* Wait until FIFO is full - max 4uS*/
 	/* Wait until FIFO is full - max 4uS*/
 	for (i = 0; i < ST_RNG_FILL_FIFO_TIMEOUT; i++) {
 	for (i = 0; i < ST_RNG_FILL_FIFO_TIMEOUT; i++) {
 		status = readl_relaxed(ddata->base + ST_RNG_STATUS_REG);
 		status = readl_relaxed(ddata->base + ST_RNG_STATUS_REG);
@@ -111,6 +108,7 @@ static int st_rng_probe(struct platform_device *pdev)
 	ret = hwrng_register(&ddata->ops);
 	ret = hwrng_register(&ddata->ops);
 	if (ret) {
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to register HW RNG\n");
 		dev_err(&pdev->dev, "Failed to register HW RNG\n");
+		clk_disable_unprepare(clk);
 		return ret;
 		return ret;
 	}
 	}
 
 

+ 1 - 10
drivers/char/hw_random/tx4939-rng.c

@@ -144,22 +144,13 @@ static int __init tx4939_rng_probe(struct platform_device *dev)
 	}
 	}
 
 
 	platform_set_drvdata(dev, rngdev);
 	platform_set_drvdata(dev, rngdev);
-	return hwrng_register(&rngdev->rng);
-}
-
-static int __exit tx4939_rng_remove(struct platform_device *dev)
-{
-	struct tx4939_rng *rngdev = platform_get_drvdata(dev);
-
-	hwrng_unregister(&rngdev->rng);
-	return 0;
+	return devm_hwrng_register(&dev->dev, &rngdev->rng);
 }
 }
 
 
 static struct platform_driver tx4939_rng_driver = {
 static struct platform_driver tx4939_rng_driver = {
 	.driver		= {
 	.driver		= {
 		.name	= "tx4939-rng",
 		.name	= "tx4939-rng",
 	},
 	},
-	.remove = tx4939_rng_remove,
 };
 };
 
 
 module_platform_driver_probe(tx4939_rng_driver, tx4939_rng_probe);
 module_platform_driver_probe(tx4939_rng_driver, tx4939_rng_probe);

+ 3 - 0
drivers/crypto/Kconfig

@@ -318,6 +318,9 @@ config CRYPTO_DEV_OMAP_AES
 	select CRYPTO_AES
 	select CRYPTO_AES
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_ENGINE
 	select CRYPTO_ENGINE
+	select CRYPTO_CBC
+	select CRYPTO_ECB
+	select CRYPTO_CTR
 	help
 	help
 	  OMAP processors have AES module accelerator. Select this if you
 	  OMAP processors have AES module accelerator. Select this if you
 	  want to use the OMAP module for AES algorithms.
 	  want to use the OMAP module for AES algorithms.

+ 101 - 60
drivers/crypto/caam/caamalg.c

@@ -111,6 +111,42 @@
 #else
 #else
 #define debug(format, arg...)
 #define debug(format, arg...)
 #endif
 #endif
+
+#ifdef DEBUG
+#include <linux/highmem.h>
+
+static void dbg_dump_sg(const char *level, const char *prefix_str,
+			int prefix_type, int rowsize, int groupsize,
+			struct scatterlist *sg, size_t tlen, bool ascii,
+			bool may_sleep)
+{
+	struct scatterlist *it;
+	void *it_page;
+	size_t len;
+	void *buf;
+
+	for (it = sg; it != NULL && tlen > 0 ; it = sg_next(sg)) {
+		/*
+		 * make sure the scatterlist's page
+		 * has a valid virtual memory mapping
+		 */
+		it_page = kmap_atomic(sg_page(it));
+		if (unlikely(!it_page)) {
+			printk(KERN_ERR "dbg_dump_sg: kmap failed\n");
+			return;
+		}
+
+		buf = it_page + it->offset;
+		len = min(tlen, it->length);
+		print_hex_dump(level, prefix_str, prefix_type, rowsize,
+			       groupsize, buf, len, ascii);
+		tlen -= len;
+
+		kunmap_atomic(it_page);
+	}
+}
+#endif
+
 static struct list_head alg_list;
 static struct list_head alg_list;
 
 
 struct caam_alg_entry {
 struct caam_alg_entry {
@@ -227,8 +263,9 @@ static void append_key_aead(u32 *desc, struct caam_ctx *ctx,
 	if (is_rfc3686) {
 	if (is_rfc3686) {
 		nonce = (u32 *)((void *)ctx->key + ctx->split_key_pad_len +
 		nonce = (u32 *)((void *)ctx->key + ctx->split_key_pad_len +
 			       enckeylen);
 			       enckeylen);
-		append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB |
-				    LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+				   LDST_CLASS_IND_CCB |
+				   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
 		append_move(desc,
 		append_move(desc,
 			    MOVE_SRC_OUTFIFO |
 			    MOVE_SRC_OUTFIFO |
 			    MOVE_DEST_CLASS1CTX |
 			    MOVE_DEST_CLASS1CTX |
@@ -500,11 +537,10 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 
 
 	/* Load Counter into CONTEXT1 reg */
 	/* Load Counter into CONTEXT1 reg */
 	if (is_rfc3686)
 	if (is_rfc3686)
-		append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-				    LDST_CLASS_1_CCB |
-				    LDST_SRCDST_BYTE_CONTEXT |
-				    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-				     LDST_OFFSET_SHIFT));
+		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+				     LDST_SRCDST_BYTE_CONTEXT |
+				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+				      LDST_OFFSET_SHIFT));
 
 
 	/* Class 1 operation */
 	/* Class 1 operation */
 	append_operation(desc, ctx->class1_alg_type |
 	append_operation(desc, ctx->class1_alg_type |
@@ -578,11 +614,10 @@ skip_enc:
 
 
 	/* Load Counter into CONTEXT1 reg */
 	/* Load Counter into CONTEXT1 reg */
 	if (is_rfc3686)
 	if (is_rfc3686)
-		append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-				    LDST_CLASS_1_CCB |
-				    LDST_SRCDST_BYTE_CONTEXT |
-				    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-				     LDST_OFFSET_SHIFT));
+		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+				     LDST_SRCDST_BYTE_CONTEXT |
+				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+				      LDST_OFFSET_SHIFT));
 
 
 	/* Choose operation */
 	/* Choose operation */
 	if (ctr_mode)
 	if (ctr_mode)
@@ -683,11 +718,10 @@ copy_iv:
 
 
 	/* Load Counter into CONTEXT1 reg */
 	/* Load Counter into CONTEXT1 reg */
 	if (is_rfc3686)
 	if (is_rfc3686)
-		append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-				    LDST_CLASS_1_CCB |
-				    LDST_SRCDST_BYTE_CONTEXT |
-				    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-				     LDST_OFFSET_SHIFT));
+		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+				     LDST_SRCDST_BYTE_CONTEXT |
+				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+				      LDST_OFFSET_SHIFT));
 
 
 	/* Class 1 operation */
 	/* Class 1 operation */
 	append_operation(desc, ctx->class1_alg_type |
 	append_operation(desc, ctx->class1_alg_type |
@@ -1478,7 +1512,7 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 	int ret = 0;
 	int ret = 0;
 	u32 *key_jump_cmd;
 	u32 *key_jump_cmd;
 	u32 *desc;
 	u32 *desc;
-	u32 *nonce;
+	u8 *nonce;
 	u32 geniv;
 	u32 geniv;
 	u32 ctx1_iv_off = 0;
 	u32 ctx1_iv_off = 0;
 	const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) ==
 	const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) ==
@@ -1531,9 +1565,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
 
 	/* Load nonce into CONTEXT1 reg */
 	/* Load nonce into CONTEXT1 reg */
 	if (is_rfc3686) {
 	if (is_rfc3686) {
-		nonce = (u32 *)(key + keylen);
-		append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB |
-				    LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+		nonce = (u8 *)key + keylen;
+		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+				   LDST_CLASS_IND_CCB |
+				   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
 		append_move(desc, MOVE_WAITCOMP |
 		append_move(desc, MOVE_WAITCOMP |
 			    MOVE_SRC_OUTFIFO |
 			    MOVE_SRC_OUTFIFO |
 			    MOVE_DEST_CLASS1CTX |
 			    MOVE_DEST_CLASS1CTX |
@@ -1549,11 +1584,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
 
 	/* Load counter into CONTEXT1 reg */
 	/* Load counter into CONTEXT1 reg */
 	if (is_rfc3686)
 	if (is_rfc3686)
-		append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-				    LDST_CLASS_1_CCB |
-				    LDST_SRCDST_BYTE_CONTEXT |
-				    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-				     LDST_OFFSET_SHIFT));
+		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+				     LDST_SRCDST_BYTE_CONTEXT |
+				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+				      LDST_OFFSET_SHIFT));
 
 
 	/* Load operation */
 	/* Load operation */
 	append_operation(desc, ctx->class1_alg_type |
 	append_operation(desc, ctx->class1_alg_type |
@@ -1590,9 +1624,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
 
 	/* Load nonce into CONTEXT1 reg */
 	/* Load nonce into CONTEXT1 reg */
 	if (is_rfc3686) {
 	if (is_rfc3686) {
-		nonce = (u32 *)(key + keylen);
-		append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB |
-				    LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+		nonce = (u8 *)key + keylen;
+		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+				   LDST_CLASS_IND_CCB |
+				   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
 		append_move(desc, MOVE_WAITCOMP |
 		append_move(desc, MOVE_WAITCOMP |
 			    MOVE_SRC_OUTFIFO |
 			    MOVE_SRC_OUTFIFO |
 			    MOVE_DEST_CLASS1CTX |
 			    MOVE_DEST_CLASS1CTX |
@@ -1608,11 +1643,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
 
 	/* Load counter into CONTEXT1 reg */
 	/* Load counter into CONTEXT1 reg */
 	if (is_rfc3686)
 	if (is_rfc3686)
-		append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
-				    LDST_CLASS_1_CCB |
-				    LDST_SRCDST_BYTE_CONTEXT |
-				    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-				     LDST_OFFSET_SHIFT));
+		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+				     LDST_SRCDST_BYTE_CONTEXT |
+				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+				      LDST_OFFSET_SHIFT));
 
 
 	/* Choose operation */
 	/* Choose operation */
 	if (ctr_mode)
 	if (ctr_mode)
@@ -1653,9 +1687,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
 
 	/* Load Nonce into CONTEXT1 reg */
 	/* Load Nonce into CONTEXT1 reg */
 	if (is_rfc3686) {
 	if (is_rfc3686) {
-		nonce = (u32 *)(key + keylen);
-		append_load_imm_u32(desc, *nonce, LDST_CLASS_IND_CCB |
-				    LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+		nonce = (u8 *)key + keylen;
+		append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+				   LDST_CLASS_IND_CCB |
+				   LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
 		append_move(desc, MOVE_WAITCOMP |
 		append_move(desc, MOVE_WAITCOMP |
 			    MOVE_SRC_OUTFIFO |
 			    MOVE_SRC_OUTFIFO |
 			    MOVE_DEST_CLASS1CTX |
 			    MOVE_DEST_CLASS1CTX |
@@ -1685,11 +1720,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 
 
 	/* Load Counter into CONTEXT1 reg */
 	/* Load Counter into CONTEXT1 reg */
 	if (is_rfc3686)
 	if (is_rfc3686)
-		append_load_imm_u32(desc, (u32)1, LDST_IMM |
-				    LDST_CLASS_1_CCB |
-				    LDST_SRCDST_BYTE_CONTEXT |
-				    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-				     LDST_OFFSET_SHIFT));
+		append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+				     LDST_SRCDST_BYTE_CONTEXT |
+				     ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+				      LDST_OFFSET_SHIFT));
 
 
 	if (ctx1_iv_off)
 	if (ctx1_iv_off)
 		append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NCP |
 		append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NCP |
@@ -1995,9 +2029,9 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
 	print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
 	print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       edesc->src_nents > 1 ? 100 : ivsize, 1);
 		       edesc->src_nents > 1 ? 100 : ivsize, 1);
-	print_hex_dump(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-		       edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
+	dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
+		    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1, true);
 #endif
 #endif
 
 
 	ablkcipher_unmap(jrdev, edesc, req);
 	ablkcipher_unmap(jrdev, edesc, req);
@@ -2027,9 +2061,9 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
 	print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
 	print_hex_dump(KERN_ERR, "dstiv  @"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       ivsize, 1);
 		       ivsize, 1);
-	print_hex_dump(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-		       edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
+	dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
+		    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
+		    edesc->dst_nents > 1 ? 100 : req->nbytes, 1, true);
 #endif
 #endif
 
 
 	ablkcipher_unmap(jrdev, edesc, req);
 	ablkcipher_unmap(jrdev, edesc, req);
@@ -2184,12 +2218,15 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
 	int len, sec4_sg_index = 0;
 	int len, sec4_sg_index = 0;
 
 
 #ifdef DEBUG
 #ifdef DEBUG
+	bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+					      CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
 	print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ",
 	print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       ivsize, 1);
 		       ivsize, 1);
-	print_hex_dump(KERN_ERR, "src    @"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-		       edesc->src_nents ? 100 : req->nbytes, 1);
+	printk(KERN_ERR "asked=%d, nbytes%d\n", (int)edesc->src_nents ? 100 : req->nbytes, req->nbytes);
+	dbg_dump_sg(KERN_ERR, "src    @"__stringify(__LINE__)": ",
+		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+		    edesc->src_nents ? 100 : req->nbytes, 1, may_sleep);
 #endif
 #endif
 
 
 	len = desc_len(sh_desc);
 	len = desc_len(sh_desc);
@@ -2241,12 +2278,14 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
 	int len, sec4_sg_index = 0;
 	int len, sec4_sg_index = 0;
 
 
 #ifdef DEBUG
 #ifdef DEBUG
+	bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+					      CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
 	print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ",
 	print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
 		       ivsize, 1);
 		       ivsize, 1);
-	print_hex_dump(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-		       edesc->src_nents ? 100 : req->nbytes, 1);
+	dbg_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
+		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+		    edesc->src_nents ? 100 : req->nbytes, 1, may_sleep);
 #endif
 #endif
 
 
 	len = desc_len(sh_desc);
 	len = desc_len(sh_desc);
@@ -2516,18 +2555,20 @@ static int aead_decrypt(struct aead_request *req)
 	u32 *desc;
 	u32 *desc;
 	int ret = 0;
 	int ret = 0;
 
 
+#ifdef DEBUG
+	bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
+					      CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
+	dbg_dump_sg(KERN_ERR, "dec src@"__stringify(__LINE__)": ",
+		    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
+		    req->assoclen + req->cryptlen, 1, may_sleep);
+#endif
+
 	/* allocate extended descriptor */
 	/* allocate extended descriptor */
 	edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN,
 	edesc = aead_edesc_alloc(req, AUTHENC_DESC_JOB_IO_LEN,
 				 &all_contig, false);
 				 &all_contig, false);
 	if (IS_ERR(edesc))
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 		return PTR_ERR(edesc);
 
 
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "dec src@"__stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, sg_virt(req->src),
-		       req->assoclen + req->cryptlen, 1);
-#endif
-
 	/* Create and submit job descriptor*/
 	/* Create and submit job descriptor*/
 	init_authenc_job(req, edesc, all_contig, false);
 	init_authenc_job(req, edesc, all_contig, false);
 #ifdef DEBUG
 #ifdef DEBUG

+ 313 - 268
drivers/crypto/caam/caamhash.c

@@ -99,17 +99,17 @@ static struct list_head hash_list;
 
 
 /* ahash per-session context */
 /* ahash per-session context */
 struct caam_hash_ctx {
 struct caam_hash_ctx {
-	struct device *jrdev;
-	u32 sh_desc_update[DESC_HASH_MAX_USED_LEN];
-	u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN];
-	u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN];
-	u32 sh_desc_digest[DESC_HASH_MAX_USED_LEN];
-	u32 sh_desc_finup[DESC_HASH_MAX_USED_LEN];
-	dma_addr_t sh_desc_update_dma;
+	u32 sh_desc_update[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+	u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+	u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+	u32 sh_desc_digest[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+	u32 sh_desc_finup[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
+	dma_addr_t sh_desc_update_dma ____cacheline_aligned;
 	dma_addr_t sh_desc_update_first_dma;
 	dma_addr_t sh_desc_update_first_dma;
 	dma_addr_t sh_desc_fin_dma;
 	dma_addr_t sh_desc_fin_dma;
 	dma_addr_t sh_desc_digest_dma;
 	dma_addr_t sh_desc_digest_dma;
 	dma_addr_t sh_desc_finup_dma;
 	dma_addr_t sh_desc_finup_dma;
+	struct device *jrdev;
 	u32 alg_type;
 	u32 alg_type;
 	u32 alg_op;
 	u32 alg_op;
 	u8 key[CAAM_MAX_HASH_KEY_SIZE];
 	u8 key[CAAM_MAX_HASH_KEY_SIZE];
@@ -187,15 +187,6 @@ static inline dma_addr_t buf_map_to_sec4_sg(struct device *jrdev,
 	return buf_dma;
 	return buf_dma;
 }
 }
 
 
-/* Map req->src and put it in link table */
-static inline void src_map_to_sec4_sg(struct device *jrdev,
-				      struct scatterlist *src, int src_nents,
-				      struct sec4_sg_entry *sec4_sg)
-{
-	dma_map_sg(jrdev, src, src_nents, DMA_TO_DEVICE);
-	sg_to_sec4_sg_last(src, src_nents, sec4_sg, 0);
-}
-
 /*
 /*
  * Only put buffer in link table if it contains data, which is possible,
  * Only put buffer in link table if it contains data, which is possible,
  * since a buffer has previously been used, and needs to be unmapped,
  * since a buffer has previously been used, and needs to be unmapped,
@@ -449,7 +440,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
 	u32 *desc;
 	u32 *desc;
 	struct split_key_result result;
 	struct split_key_result result;
 	dma_addr_t src_dma, dst_dma;
 	dma_addr_t src_dma, dst_dma;
-	int ret = 0;
+	int ret;
 
 
 	desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
 	desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
 	if (!desc) {
 	if (!desc) {
@@ -526,7 +517,7 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
 	int blocksize = crypto_tfm_alg_blocksize(&ahash->base);
 	int blocksize = crypto_tfm_alg_blocksize(&ahash->base);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
-	int ret = 0;
+	int ret;
 	u8 *hashed_key = NULL;
 	u8 *hashed_key = NULL;
 
 
 #ifdef DEBUG
 #ifdef DEBUG
@@ -534,14 +525,15 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 #endif
 #endif
 
 
 	if (keylen > blocksize) {
 	if (keylen > blocksize) {
-		hashed_key = kmalloc(sizeof(u8) * digestsize, GFP_KERNEL |
-				     GFP_DMA);
+		hashed_key = kmalloc_array(digestsize,
+					   sizeof(*hashed_key),
+					   GFP_KERNEL | GFP_DMA);
 		if (!hashed_key)
 		if (!hashed_key)
 			return -ENOMEM;
 			return -ENOMEM;
 		ret = hash_digest_key(ctx, key, &keylen, hashed_key,
 		ret = hash_digest_key(ctx, key, &keylen, hashed_key,
 				      digestsize);
 				      digestsize);
 		if (ret)
 		if (ret)
-			goto badkey;
+			goto bad_free_key;
 		key = hashed_key;
 		key = hashed_key;
 	}
 	}
 
 
@@ -559,14 +551,14 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 
 
 	ret = gen_split_hash_key(ctx, key, keylen);
 	ret = gen_split_hash_key(ctx, key, keylen);
 	if (ret)
 	if (ret)
-		goto badkey;
+		goto bad_free_key;
 
 
 	ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len,
 	ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len,
 				      DMA_TO_DEVICE);
 				      DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, ctx->key_dma)) {
 	if (dma_mapping_error(jrdev, ctx->key_dma)) {
 		dev_err(jrdev, "unable to map key i/o memory\n");
 		dev_err(jrdev, "unable to map key i/o memory\n");
 		ret = -ENOMEM;
 		ret = -ENOMEM;
-		goto map_err;
+		goto error_free_key;
 	}
 	}
 #ifdef DEBUG
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
 	print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
@@ -579,11 +571,10 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 		dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len,
 		dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len,
 				 DMA_TO_DEVICE);
 				 DMA_TO_DEVICE);
 	}
 	}
-
-map_err:
+ error_free_key:
 	kfree(hashed_key);
 	kfree(hashed_key);
 	return ret;
 	return ret;
-badkey:
+ bad_free_key:
 	kfree(hashed_key);
 	kfree(hashed_key);
 	crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	return -EINVAL;
 	return -EINVAL;
@@ -595,16 +586,16 @@ badkey:
  * @sec4_sg_dma: physical mapped address of h/w link table
  * @sec4_sg_dma: physical mapped address of h/w link table
  * @src_nents: number of segments in input scatterlist
  * @src_nents: number of segments in input scatterlist
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
- * @sec4_sg: pointer to h/w link table
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
+ * @sec4_sg: h/w link table
  */
  */
 struct ahash_edesc {
 struct ahash_edesc {
 	dma_addr_t dst_dma;
 	dma_addr_t dst_dma;
 	dma_addr_t sec4_sg_dma;
 	dma_addr_t sec4_sg_dma;
 	int src_nents;
 	int src_nents;
 	int sec4_sg_bytes;
 	int sec4_sg_bytes;
-	struct sec4_sg_entry *sec4_sg;
-	u32 hw_desc[0];
+	u32 hw_desc[DESC_JOB_IO_LEN / sizeof(u32)] ____cacheline_aligned;
+	struct sec4_sg_entry sec4_sg[0];
 };
 };
 
 
 static inline void ahash_unmap(struct device *dev,
 static inline void ahash_unmap(struct device *dev,
@@ -774,6 +765,65 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
 	req->base.complete(&req->base, err);
 	req->base.complete(&req->base, err);
 }
 }
 
 
+/*
+ * Allocate an enhanced descriptor, which contains the hardware descriptor
+ * and space for hardware scatter table containing sg_num entries.
+ */
+static struct ahash_edesc *ahash_edesc_alloc(struct caam_hash_ctx *ctx,
+					     int sg_num, u32 *sh_desc,
+					     dma_addr_t sh_desc_dma,
+					     gfp_t flags)
+{
+	struct ahash_edesc *edesc;
+	unsigned int sg_size = sg_num * sizeof(struct sec4_sg_entry);
+
+	edesc = kzalloc(sizeof(*edesc) + sg_size, GFP_DMA | flags);
+	if (!edesc) {
+		dev_err(ctx->jrdev, "could not allocate extended descriptor\n");
+		return NULL;
+	}
+
+	init_job_desc_shared(edesc->hw_desc, sh_desc_dma, desc_len(sh_desc),
+			     HDR_SHARE_DEFER | HDR_REVERSE);
+
+	return edesc;
+}
+
+static int ahash_edesc_add_src(struct caam_hash_ctx *ctx,
+			       struct ahash_edesc *edesc,
+			       struct ahash_request *req, int nents,
+			       unsigned int first_sg,
+			       unsigned int first_bytes, size_t to_hash)
+{
+	dma_addr_t src_dma;
+	u32 options;
+
+	if (nents > 1 || first_sg) {
+		struct sec4_sg_entry *sg = edesc->sec4_sg;
+		unsigned int sgsize = sizeof(*sg) * (first_sg + nents);
+
+		sg_to_sec4_sg_last(req->src, nents, sg + first_sg, 0);
+
+		src_dma = dma_map_single(ctx->jrdev, sg, sgsize, DMA_TO_DEVICE);
+		if (dma_mapping_error(ctx->jrdev, src_dma)) {
+			dev_err(ctx->jrdev, "unable to map S/G table\n");
+			return -ENOMEM;
+		}
+
+		edesc->sec4_sg_bytes = sgsize;
+		edesc->sec4_sg_dma = src_dma;
+		options = LDST_SGF;
+	} else {
+		src_dma = sg_dma_address(req->src);
+		options = 0;
+	}
+
+	append_seq_in_ptr(edesc->hw_desc, src_dma, first_bytes + to_hash,
+			  options);
+
+	return 0;
+}
+
 /* submit update job descriptor */
 /* submit update job descriptor */
 static int ahash_update_ctx(struct ahash_request *req)
 static int ahash_update_ctx(struct ahash_request *req)
 {
 {
@@ -789,12 +839,10 @@ static int ahash_update_ctx(struct ahash_request *req)
 	int *next_buflen = state->current_buf ? &state->buflen_0 :
 	int *next_buflen = state->current_buf ? &state->buflen_0 :
 			   &state->buflen_1, last_buflen;
 			   &state->buflen_1, last_buflen;
 	int in_len = *buflen + req->nbytes, to_hash;
 	int in_len = *buflen + req->nbytes, to_hash;
-	u32 *sh_desc = ctx->sh_desc_update, *desc;
-	dma_addr_t ptr = ctx->sh_desc_update_dma;
-	int src_nents, sec4_sg_bytes, sec4_sg_src_index;
+	u32 *desc;
+	int src_nents, mapped_nents, sec4_sg_bytes, sec4_sg_src_index;
 	struct ahash_edesc *edesc;
 	struct ahash_edesc *edesc;
 	int ret = 0;
 	int ret = 0;
-	int sh_len;
 
 
 	last_buflen = *next_buflen;
 	last_buflen = *next_buflen;
 	*next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
 	*next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
@@ -807,40 +855,51 @@ static int ahash_update_ctx(struct ahash_request *req)
 			dev_err(jrdev, "Invalid number of src SG.\n");
 			dev_err(jrdev, "Invalid number of src SG.\n");
 			return src_nents;
 			return src_nents;
 		}
 		}
+
+		if (src_nents) {
+			mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+						  DMA_TO_DEVICE);
+			if (!mapped_nents) {
+				dev_err(jrdev, "unable to DMA map source\n");
+				return -ENOMEM;
+			}
+		} else {
+			mapped_nents = 0;
+		}
+
 		sec4_sg_src_index = 1 + (*buflen ? 1 : 0);
 		sec4_sg_src_index = 1 + (*buflen ? 1 : 0);
-		sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
+		sec4_sg_bytes = (sec4_sg_src_index + mapped_nents) *
 				 sizeof(struct sec4_sg_entry);
 				 sizeof(struct sec4_sg_entry);
 
 
 		/*
 		/*
 		 * allocate space for base edesc and hw desc commands,
 		 * allocate space for base edesc and hw desc commands,
 		 * link tables
 		 * link tables
 		 */
 		 */
-		edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN +
-				sec4_sg_bytes, GFP_DMA | flags);
+		edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
+					  ctx->sh_desc_update,
+					  ctx->sh_desc_update_dma, flags);
 		if (!edesc) {
 		if (!edesc) {
-			dev_err(jrdev,
-				"could not allocate extended descriptor\n");
+			dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
 			return -ENOMEM;
 			return -ENOMEM;
 		}
 		}
 
 
 		edesc->src_nents = src_nents;
 		edesc->src_nents = src_nents;
 		edesc->sec4_sg_bytes = sec4_sg_bytes;
 		edesc->sec4_sg_bytes = sec4_sg_bytes;
-		edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-				 DESC_JOB_IO_LEN;
 
 
 		ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
 		ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
 					 edesc->sec4_sg, DMA_BIDIRECTIONAL);
 					 edesc->sec4_sg, DMA_BIDIRECTIONAL);
 		if (ret)
 		if (ret)
-			return ret;
+			goto unmap_ctx;
 
 
 		state->buf_dma = try_buf_map_to_sec4_sg(jrdev,
 		state->buf_dma = try_buf_map_to_sec4_sg(jrdev,
 							edesc->sec4_sg + 1,
 							edesc->sec4_sg + 1,
 							buf, state->buf_dma,
 							buf, state->buf_dma,
 							*buflen, last_buflen);
 							*buflen, last_buflen);
 
 
-		if (src_nents) {
-			src_map_to_sec4_sg(jrdev, req->src, src_nents,
-					   edesc->sec4_sg + sec4_sg_src_index);
+		if (mapped_nents) {
+			sg_to_sec4_sg_last(req->src, mapped_nents,
+					   edesc->sec4_sg + sec4_sg_src_index,
+					   0);
 			if (*next_buflen)
 			if (*next_buflen)
 				scatterwalk_map_and_copy(next_buf, req->src,
 				scatterwalk_map_and_copy(next_buf, req->src,
 							 to_hash - *buflen,
 							 to_hash - *buflen,
@@ -852,17 +911,15 @@ static int ahash_update_ctx(struct ahash_request *req)
 
 
 		state->current_buf = !state->current_buf;
 		state->current_buf = !state->current_buf;
 
 
-		sh_len = desc_len(sh_desc);
 		desc = edesc->hw_desc;
 		desc = edesc->hw_desc;
-		init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER |
-				     HDR_REVERSE);
 
 
 		edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 		edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 						     sec4_sg_bytes,
 						     sec4_sg_bytes,
 						     DMA_TO_DEVICE);
 						     DMA_TO_DEVICE);
 		if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
 		if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
 			dev_err(jrdev, "unable to map S/G table\n");
 			dev_err(jrdev, "unable to map S/G table\n");
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto unmap_ctx;
 		}
 		}
 
 
 		append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len +
 		append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len +
@@ -877,13 +934,10 @@ static int ahash_update_ctx(struct ahash_request *req)
 #endif
 #endif
 
 
 		ret = caam_jr_enqueue(jrdev, desc, ahash_done_bi, req);
 		ret = caam_jr_enqueue(jrdev, desc, ahash_done_bi, req);
-		if (!ret) {
-			ret = -EINPROGRESS;
-		} else {
-			ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len,
-					   DMA_BIDIRECTIONAL);
-			kfree(edesc);
-		}
+		if (ret)
+			goto unmap_ctx;
+
+		ret = -EINPROGRESS;
 	} else if (*next_buflen) {
 	} else if (*next_buflen) {
 		scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
 		scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
 					 req->nbytes, 0);
 					 req->nbytes, 0);
@@ -898,6 +952,10 @@ static int ahash_update_ctx(struct ahash_request *req)
 		       *next_buflen, 1);
 		       *next_buflen, 1);
 #endif
 #endif
 
 
+	return ret;
+ unmap_ctx:
+	ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL);
+	kfree(edesc);
 	return ret;
 	return ret;
 }
 }
 
 
@@ -913,38 +971,31 @@ static int ahash_final_ctx(struct ahash_request *req)
 	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
 	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
 	int last_buflen = state->current_buf ? state->buflen_0 :
 	int last_buflen = state->current_buf ? state->buflen_0 :
 			  state->buflen_1;
 			  state->buflen_1;
-	u32 *sh_desc = ctx->sh_desc_fin, *desc;
-	dma_addr_t ptr = ctx->sh_desc_fin_dma;
+	u32 *desc;
 	int sec4_sg_bytes, sec4_sg_src_index;
 	int sec4_sg_bytes, sec4_sg_src_index;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct ahash_edesc *edesc;
 	struct ahash_edesc *edesc;
-	int ret = 0;
-	int sh_len;
+	int ret;
 
 
 	sec4_sg_src_index = 1 + (buflen ? 1 : 0);
 	sec4_sg_src_index = 1 + (buflen ? 1 : 0);
 	sec4_sg_bytes = sec4_sg_src_index * sizeof(struct sec4_sg_entry);
 	sec4_sg_bytes = sec4_sg_src_index * sizeof(struct sec4_sg_entry);
 
 
 	/* allocate space for base edesc and hw desc commands, link tables */
 	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN + sec4_sg_bytes,
-			GFP_DMA | flags);
-	if (!edesc) {
-		dev_err(jrdev, "could not allocate extended descriptor\n");
+	edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index,
+				  ctx->sh_desc_fin, ctx->sh_desc_fin_dma,
+				  flags);
+	if (!edesc)
 		return -ENOMEM;
 		return -ENOMEM;
-	}
 
 
-	sh_len = desc_len(sh_desc);
 	desc = edesc->hw_desc;
 	desc = edesc->hw_desc;
-	init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
 
 
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
-	edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-			 DESC_JOB_IO_LEN;
 	edesc->src_nents = 0;
 	edesc->src_nents = 0;
 
 
 	ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
 	ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
 				 edesc->sec4_sg, DMA_TO_DEVICE);
 				 edesc->sec4_sg, DMA_TO_DEVICE);
 	if (ret)
 	if (ret)
-		return ret;
+		goto unmap_ctx;
 
 
 	state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1,
 	state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1,
 						buf, state->buf_dma, buflen,
 						buf, state->buf_dma, buflen,
@@ -956,7 +1007,8 @@ static int ahash_final_ctx(struct ahash_request *req)
 					    sec4_sg_bytes, DMA_TO_DEVICE);
 					    sec4_sg_bytes, DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
 	if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
 		dev_err(jrdev, "unable to map S/G table\n");
 		dev_err(jrdev, "unable to map S/G table\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto unmap_ctx;
 	}
 	}
 
 
 	append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len + buflen,
 	append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len + buflen,
@@ -966,7 +1018,8 @@ static int ahash_final_ctx(struct ahash_request *req)
 						digestsize);
 						digestsize);
 	if (dma_mapping_error(jrdev, edesc->dst_dma)) {
 	if (dma_mapping_error(jrdev, edesc->dst_dma)) {
 		dev_err(jrdev, "unable to map dst\n");
 		dev_err(jrdev, "unable to map dst\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto unmap_ctx;
 	}
 	}
 
 
 #ifdef DEBUG
 #ifdef DEBUG
@@ -975,13 +1028,13 @@ static int ahash_final_ctx(struct ahash_request *req)
 #endif
 #endif
 
 
 	ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
 	ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
-	if (!ret) {
-		ret = -EINPROGRESS;
-	} else {
-		ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
-		kfree(edesc);
-	}
+	if (ret)
+		goto unmap_ctx;
 
 
+	return -EINPROGRESS;
+ unmap_ctx:
+	ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
+	kfree(edesc);
 	return ret;
 	return ret;
 }
 }
 
 
@@ -997,68 +1050,66 @@ static int ahash_finup_ctx(struct ahash_request *req)
 	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
 	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
 	int last_buflen = state->current_buf ? state->buflen_0 :
 	int last_buflen = state->current_buf ? state->buflen_0 :
 			  state->buflen_1;
 			  state->buflen_1;
-	u32 *sh_desc = ctx->sh_desc_finup, *desc;
-	dma_addr_t ptr = ctx->sh_desc_finup_dma;
-	int sec4_sg_bytes, sec4_sg_src_index;
-	int src_nents;
+	u32 *desc;
+	int sec4_sg_src_index;
+	int src_nents, mapped_nents;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct ahash_edesc *edesc;
 	struct ahash_edesc *edesc;
-	int ret = 0;
-	int sh_len;
+	int ret;
 
 
 	src_nents = sg_nents_for_len(req->src, req->nbytes);
 	src_nents = sg_nents_for_len(req->src, req->nbytes);
 	if (src_nents < 0) {
 	if (src_nents < 0) {
 		dev_err(jrdev, "Invalid number of src SG.\n");
 		dev_err(jrdev, "Invalid number of src SG.\n");
 		return src_nents;
 		return src_nents;
 	}
 	}
+
+	if (src_nents) {
+		mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+					  DMA_TO_DEVICE);
+		if (!mapped_nents) {
+			dev_err(jrdev, "unable to DMA map source\n");
+			return -ENOMEM;
+		}
+	} else {
+		mapped_nents = 0;
+	}
+
 	sec4_sg_src_index = 1 + (buflen ? 1 : 0);
 	sec4_sg_src_index = 1 + (buflen ? 1 : 0);
-	sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
-			 sizeof(struct sec4_sg_entry);
 
 
 	/* allocate space for base edesc and hw desc commands, link tables */
 	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN + sec4_sg_bytes,
-			GFP_DMA | flags);
+	edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
+				  ctx->sh_desc_finup, ctx->sh_desc_finup_dma,
+				  flags);
 	if (!edesc) {
 	if (!edesc) {
-		dev_err(jrdev, "could not allocate extended descriptor\n");
+		dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 
 
-	sh_len = desc_len(sh_desc);
 	desc = edesc->hw_desc;
 	desc = edesc->hw_desc;
-	init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
 
 
 	edesc->src_nents = src_nents;
 	edesc->src_nents = src_nents;
-	edesc->sec4_sg_bytes = sec4_sg_bytes;
-	edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-			 DESC_JOB_IO_LEN;
 
 
 	ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
 	ret = ctx_map_to_sec4_sg(desc, jrdev, state, ctx->ctx_len,
 				 edesc->sec4_sg, DMA_TO_DEVICE);
 				 edesc->sec4_sg, DMA_TO_DEVICE);
 	if (ret)
 	if (ret)
-		return ret;
+		goto unmap_ctx;
 
 
 	state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1,
 	state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1,
 						buf, state->buf_dma, buflen,
 						buf, state->buf_dma, buflen,
 						last_buflen);
 						last_buflen);
 
 
-	src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg +
-			   sec4_sg_src_index);
-
-	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
-					    sec4_sg_bytes, DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
-		dev_err(jrdev, "unable to map S/G table\n");
-		return -ENOMEM;
-	}
-
-	append_seq_in_ptr(desc, edesc->sec4_sg_dma, ctx->ctx_len +
-			       buflen + req->nbytes, LDST_SGF);
+	ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents,
+				  sec4_sg_src_index, ctx->ctx_len + buflen,
+				  req->nbytes);
+	if (ret)
+		goto unmap_ctx;
 
 
 	edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
 	edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
 						digestsize);
 						digestsize);
 	if (dma_mapping_error(jrdev, edesc->dst_dma)) {
 	if (dma_mapping_error(jrdev, edesc->dst_dma)) {
 		dev_err(jrdev, "unable to map dst\n");
 		dev_err(jrdev, "unable to map dst\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto unmap_ctx;
 	}
 	}
 
 
 #ifdef DEBUG
 #ifdef DEBUG
@@ -1067,13 +1118,13 @@ static int ahash_finup_ctx(struct ahash_request *req)
 #endif
 #endif
 
 
 	ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
 	ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_src, req);
-	if (!ret) {
-		ret = -EINPROGRESS;
-	} else {
-		ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
-		kfree(edesc);
-	}
+	if (ret)
+		goto unmap_ctx;
 
 
+	return -EINPROGRESS;
+ unmap_ctx:
+	ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
+	kfree(edesc);
 	return ret;
 	return ret;
 }
 }
 
 
@@ -1084,60 +1135,56 @@ static int ahash_digest(struct ahash_request *req)
 	struct device *jrdev = ctx->jrdev;
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
 	gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
 		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
 		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
-	u32 *sh_desc = ctx->sh_desc_digest, *desc;
-	dma_addr_t ptr = ctx->sh_desc_digest_dma;
+	u32 *desc;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
-	int src_nents, sec4_sg_bytes;
-	dma_addr_t src_dma;
+	int src_nents, mapped_nents;
 	struct ahash_edesc *edesc;
 	struct ahash_edesc *edesc;
-	int ret = 0;
-	u32 options;
-	int sh_len;
+	int ret;
 
 
-	src_nents = sg_count(req->src, req->nbytes);
+	src_nents = sg_nents_for_len(req->src, req->nbytes);
 	if (src_nents < 0) {
 	if (src_nents < 0) {
 		dev_err(jrdev, "Invalid number of src SG.\n");
 		dev_err(jrdev, "Invalid number of src SG.\n");
 		return src_nents;
 		return src_nents;
 	}
 	}
-	dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE);
-	sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry);
+
+	if (src_nents) {
+		mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+					  DMA_TO_DEVICE);
+		if (!mapped_nents) {
+			dev_err(jrdev, "unable to map source for DMA\n");
+			return -ENOMEM;
+		}
+	} else {
+		mapped_nents = 0;
+	}
 
 
 	/* allocate space for base edesc and hw desc commands, link tables */
 	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = kzalloc(sizeof(*edesc) + sec4_sg_bytes + DESC_JOB_IO_LEN,
-			GFP_DMA | flags);
+	edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ? mapped_nents : 0,
+				  ctx->sh_desc_digest, ctx->sh_desc_digest_dma,
+				  flags);
 	if (!edesc) {
 	if (!edesc) {
-		dev_err(jrdev, "could not allocate extended descriptor\n");
+		dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
-	edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-			  DESC_JOB_IO_LEN;
-	edesc->sec4_sg_bytes = sec4_sg_bytes;
-	edesc->src_nents = src_nents;
 
 
-	sh_len = desc_len(sh_desc);
-	desc = edesc->hw_desc;
-	init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
+	edesc->src_nents = src_nents;
 
 
-	if (src_nents) {
-		sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0);
-		edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
-					    sec4_sg_bytes, DMA_TO_DEVICE);
-		if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
-			dev_err(jrdev, "unable to map S/G table\n");
-			return -ENOMEM;
-		}
-		src_dma = edesc->sec4_sg_dma;
-		options = LDST_SGF;
-	} else {
-		src_dma = sg_dma_address(req->src);
-		options = 0;
+	ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents, 0, 0,
+				  req->nbytes);
+	if (ret) {
+		ahash_unmap(jrdev, edesc, req, digestsize);
+		kfree(edesc);
+		return ret;
 	}
 	}
-	append_seq_in_ptr(desc, src_dma, req->nbytes, options);
+
+	desc = edesc->hw_desc;
 
 
 	edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
 	edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
 						digestsize);
 						digestsize);
 	if (dma_mapping_error(jrdev, edesc->dst_dma)) {
 	if (dma_mapping_error(jrdev, edesc->dst_dma)) {
 		dev_err(jrdev, "unable to map dst\n");
 		dev_err(jrdev, "unable to map dst\n");
+		ahash_unmap(jrdev, edesc, req, digestsize);
+		kfree(edesc);
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 
 
@@ -1168,29 +1215,23 @@ static int ahash_final_no_ctx(struct ahash_request *req)
 		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
 		       CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC;
 	u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
 	u8 *buf = state->current_buf ? state->buf_1 : state->buf_0;
 	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
 	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
-	u32 *sh_desc = ctx->sh_desc_digest, *desc;
-	dma_addr_t ptr = ctx->sh_desc_digest_dma;
+	u32 *desc;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct ahash_edesc *edesc;
 	struct ahash_edesc *edesc;
-	int ret = 0;
-	int sh_len;
+	int ret;
 
 
 	/* allocate space for base edesc and hw desc commands, link tables */
 	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN, GFP_DMA | flags);
-	if (!edesc) {
-		dev_err(jrdev, "could not allocate extended descriptor\n");
+	edesc = ahash_edesc_alloc(ctx, 0, ctx->sh_desc_digest,
+				  ctx->sh_desc_digest_dma, flags);
+	if (!edesc)
 		return -ENOMEM;
 		return -ENOMEM;
-	}
 
 
-	edesc->sec4_sg_bytes = 0;
-	sh_len = desc_len(sh_desc);
 	desc = edesc->hw_desc;
 	desc = edesc->hw_desc;
-	init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
 
 
 	state->buf_dma = dma_map_single(jrdev, buf, buflen, DMA_TO_DEVICE);
 	state->buf_dma = dma_map_single(jrdev, buf, buflen, DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, state->buf_dma)) {
 	if (dma_mapping_error(jrdev, state->buf_dma)) {
 		dev_err(jrdev, "unable to map src\n");
 		dev_err(jrdev, "unable to map src\n");
-		return -ENOMEM;
+		goto unmap;
 	}
 	}
 
 
 	append_seq_in_ptr(desc, state->buf_dma, buflen, 0);
 	append_seq_in_ptr(desc, state->buf_dma, buflen, 0);
@@ -1199,7 +1240,7 @@ static int ahash_final_no_ctx(struct ahash_request *req)
 						digestsize);
 						digestsize);
 	if (dma_mapping_error(jrdev, edesc->dst_dma)) {
 	if (dma_mapping_error(jrdev, edesc->dst_dma)) {
 		dev_err(jrdev, "unable to map dst\n");
 		dev_err(jrdev, "unable to map dst\n");
-		return -ENOMEM;
+		goto unmap;
 	}
 	}
 	edesc->src_nents = 0;
 	edesc->src_nents = 0;
 
 
@@ -1217,6 +1258,11 @@ static int ahash_final_no_ctx(struct ahash_request *req)
 	}
 	}
 
 
 	return ret;
 	return ret;
+ unmap:
+	ahash_unmap(jrdev, edesc, req, digestsize);
+	kfree(edesc);
+	return -ENOMEM;
+
 }
 }
 
 
 /* submit ahash update if it the first job descriptor after update */
 /* submit ahash update if it the first job descriptor after update */
@@ -1234,48 +1280,58 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 	int *next_buflen = state->current_buf ? &state->buflen_0 :
 	int *next_buflen = state->current_buf ? &state->buflen_0 :
 			   &state->buflen_1;
 			   &state->buflen_1;
 	int in_len = *buflen + req->nbytes, to_hash;
 	int in_len = *buflen + req->nbytes, to_hash;
-	int sec4_sg_bytes, src_nents;
+	int sec4_sg_bytes, src_nents, mapped_nents;
 	struct ahash_edesc *edesc;
 	struct ahash_edesc *edesc;
-	u32 *desc, *sh_desc = ctx->sh_desc_update_first;
-	dma_addr_t ptr = ctx->sh_desc_update_first_dma;
+	u32 *desc;
 	int ret = 0;
 	int ret = 0;
-	int sh_len;
 
 
 	*next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
 	*next_buflen = in_len & (crypto_tfm_alg_blocksize(&ahash->base) - 1);
 	to_hash = in_len - *next_buflen;
 	to_hash = in_len - *next_buflen;
 
 
 	if (to_hash) {
 	if (to_hash) {
 		src_nents = sg_nents_for_len(req->src,
 		src_nents = sg_nents_for_len(req->src,
-					     req->nbytes - (*next_buflen));
+					     req->nbytes - *next_buflen);
 		if (src_nents < 0) {
 		if (src_nents < 0) {
 			dev_err(jrdev, "Invalid number of src SG.\n");
 			dev_err(jrdev, "Invalid number of src SG.\n");
 			return src_nents;
 			return src_nents;
 		}
 		}
-		sec4_sg_bytes = (1 + src_nents) *
+
+		if (src_nents) {
+			mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+						  DMA_TO_DEVICE);
+			if (!mapped_nents) {
+				dev_err(jrdev, "unable to DMA map source\n");
+				return -ENOMEM;
+			}
+		} else {
+			mapped_nents = 0;
+		}
+
+		sec4_sg_bytes = (1 + mapped_nents) *
 				sizeof(struct sec4_sg_entry);
 				sizeof(struct sec4_sg_entry);
 
 
 		/*
 		/*
 		 * allocate space for base edesc and hw desc commands,
 		 * allocate space for base edesc and hw desc commands,
 		 * link tables
 		 * link tables
 		 */
 		 */
-		edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN +
-				sec4_sg_bytes, GFP_DMA | flags);
+		edesc = ahash_edesc_alloc(ctx, 1 + mapped_nents,
+					  ctx->sh_desc_update_first,
+					  ctx->sh_desc_update_first_dma,
+					  flags);
 		if (!edesc) {
 		if (!edesc) {
-			dev_err(jrdev,
-				"could not allocate extended descriptor\n");
+			dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
 			return -ENOMEM;
 			return -ENOMEM;
 		}
 		}
 
 
 		edesc->src_nents = src_nents;
 		edesc->src_nents = src_nents;
 		edesc->sec4_sg_bytes = sec4_sg_bytes;
 		edesc->sec4_sg_bytes = sec4_sg_bytes;
-		edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-				 DESC_JOB_IO_LEN;
 		edesc->dst_dma = 0;
 		edesc->dst_dma = 0;
 
 
 		state->buf_dma = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg,
 		state->buf_dma = buf_map_to_sec4_sg(jrdev, edesc->sec4_sg,
 						    buf, *buflen);
 						    buf, *buflen);
-		src_map_to_sec4_sg(jrdev, req->src, src_nents,
-				   edesc->sec4_sg + 1);
+		sg_to_sec4_sg_last(req->src, mapped_nents,
+				   edesc->sec4_sg + 1, 0);
+
 		if (*next_buflen) {
 		if (*next_buflen) {
 			scatterwalk_map_and_copy(next_buf, req->src,
 			scatterwalk_map_and_copy(next_buf, req->src,
 						 to_hash - *buflen,
 						 to_hash - *buflen,
@@ -1284,24 +1340,22 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 
 
 		state->current_buf = !state->current_buf;
 		state->current_buf = !state->current_buf;
 
 
-		sh_len = desc_len(sh_desc);
 		desc = edesc->hw_desc;
 		desc = edesc->hw_desc;
-		init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER |
-				     HDR_REVERSE);
 
 
 		edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 		edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 						    sec4_sg_bytes,
 						    sec4_sg_bytes,
 						    DMA_TO_DEVICE);
 						    DMA_TO_DEVICE);
 		if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
 		if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
 			dev_err(jrdev, "unable to map S/G table\n");
 			dev_err(jrdev, "unable to map S/G table\n");
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto unmap_ctx;
 		}
 		}
 
 
 		append_seq_in_ptr(desc, edesc->sec4_sg_dma, to_hash, LDST_SGF);
 		append_seq_in_ptr(desc, edesc->sec4_sg_dma, to_hash, LDST_SGF);
 
 
 		ret = map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len);
 		ret = map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len);
 		if (ret)
 		if (ret)
-			return ret;
+			goto unmap_ctx;
 
 
 #ifdef DEBUG
 #ifdef DEBUG
 		print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
 		print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
@@ -1310,16 +1364,13 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 #endif
 #endif
 
 
 		ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req);
 		ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req);
-		if (!ret) {
-			ret = -EINPROGRESS;
-			state->update = ahash_update_ctx;
-			state->finup = ahash_finup_ctx;
-			state->final = ahash_final_ctx;
-		} else {
-			ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len,
-					DMA_TO_DEVICE);
-			kfree(edesc);
-		}
+		if (ret)
+			goto unmap_ctx;
+
+		ret = -EINPROGRESS;
+		state->update = ahash_update_ctx;
+		state->finup = ahash_finup_ctx;
+		state->final = ahash_final_ctx;
 	} else if (*next_buflen) {
 	} else if (*next_buflen) {
 		scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
 		scatterwalk_map_and_copy(buf + *buflen, req->src, 0,
 					 req->nbytes, 0);
 					 req->nbytes, 0);
@@ -1334,6 +1385,10 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 		       *next_buflen, 1);
 		       *next_buflen, 1);
 #endif
 #endif
 
 
+	return ret;
+ unmap_ctx:
+	ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE);
+	kfree(edesc);
 	return ret;
 	return ret;
 }
 }
 
 
@@ -1350,61 +1405,63 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
 	int buflen = state->current_buf ? state->buflen_1 : state->buflen_0;
 	int last_buflen = state->current_buf ? state->buflen_0 :
 	int last_buflen = state->current_buf ? state->buflen_0 :
 			  state->buflen_1;
 			  state->buflen_1;
-	u32 *sh_desc = ctx->sh_desc_digest, *desc;
-	dma_addr_t ptr = ctx->sh_desc_digest_dma;
-	int sec4_sg_bytes, sec4_sg_src_index, src_nents;
+	u32 *desc;
+	int sec4_sg_bytes, sec4_sg_src_index, src_nents, mapped_nents;
 	int digestsize = crypto_ahash_digestsize(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct ahash_edesc *edesc;
 	struct ahash_edesc *edesc;
-	int sh_len;
-	int ret = 0;
+	int ret;
 
 
 	src_nents = sg_nents_for_len(req->src, req->nbytes);
 	src_nents = sg_nents_for_len(req->src, req->nbytes);
 	if (src_nents < 0) {
 	if (src_nents < 0) {
 		dev_err(jrdev, "Invalid number of src SG.\n");
 		dev_err(jrdev, "Invalid number of src SG.\n");
 		return src_nents;
 		return src_nents;
 	}
 	}
+
+	if (src_nents) {
+		mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+					  DMA_TO_DEVICE);
+		if (!mapped_nents) {
+			dev_err(jrdev, "unable to DMA map source\n");
+			return -ENOMEM;
+		}
+	} else {
+		mapped_nents = 0;
+	}
+
 	sec4_sg_src_index = 2;
 	sec4_sg_src_index = 2;
-	sec4_sg_bytes = (sec4_sg_src_index + src_nents) *
+	sec4_sg_bytes = (sec4_sg_src_index + mapped_nents) *
 			 sizeof(struct sec4_sg_entry);
 			 sizeof(struct sec4_sg_entry);
 
 
 	/* allocate space for base edesc and hw desc commands, link tables */
 	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN + sec4_sg_bytes,
-			GFP_DMA | flags);
+	edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
+				  ctx->sh_desc_digest, ctx->sh_desc_digest_dma,
+				  flags);
 	if (!edesc) {
 	if (!edesc) {
-		dev_err(jrdev, "could not allocate extended descriptor\n");
+		dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 
 
-	sh_len = desc_len(sh_desc);
 	desc = edesc->hw_desc;
 	desc = edesc->hw_desc;
-	init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
 
 
 	edesc->src_nents = src_nents;
 	edesc->src_nents = src_nents;
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
-	edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-			 DESC_JOB_IO_LEN;
 
 
 	state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, buf,
 	state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg, buf,
 						state->buf_dma, buflen,
 						state->buf_dma, buflen,
 						last_buflen);
 						last_buflen);
 
 
-	src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + 1);
-
-	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
-					    sec4_sg_bytes, DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
+	ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents, 1, buflen,
+				  req->nbytes);
+	if (ret) {
 		dev_err(jrdev, "unable to map S/G table\n");
 		dev_err(jrdev, "unable to map S/G table\n");
-		return -ENOMEM;
+		goto unmap;
 	}
 	}
 
 
-	append_seq_in_ptr(desc, edesc->sec4_sg_dma, buflen +
-			       req->nbytes, LDST_SGF);
-
 	edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
 	edesc->dst_dma = map_seq_out_ptr_result(desc, jrdev, req->result,
 						digestsize);
 						digestsize);
 	if (dma_mapping_error(jrdev, edesc->dst_dma)) {
 	if (dma_mapping_error(jrdev, edesc->dst_dma)) {
 		dev_err(jrdev, "unable to map dst\n");
 		dev_err(jrdev, "unable to map dst\n");
-		return -ENOMEM;
+		goto unmap;
 	}
 	}
 
 
 #ifdef DEBUG
 #ifdef DEBUG
@@ -1421,6 +1478,11 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 	}
 	}
 
 
 	return ret;
 	return ret;
+ unmap:
+	ahash_unmap(jrdev, edesc, req, digestsize);
+	kfree(edesc);
+	return -ENOMEM;
+
 }
 }
 
 
 /* submit first update job descriptor after init */
 /* submit first update job descriptor after init */
@@ -1436,78 +1498,65 @@ static int ahash_update_first(struct ahash_request *req)
 	int *next_buflen = state->current_buf ?
 	int *next_buflen = state->current_buf ?
 		&state->buflen_1 : &state->buflen_0;
 		&state->buflen_1 : &state->buflen_0;
 	int to_hash;
 	int to_hash;
-	u32 *sh_desc = ctx->sh_desc_update_first, *desc;
-	dma_addr_t ptr = ctx->sh_desc_update_first_dma;
-	int sec4_sg_bytes, src_nents;
-	dma_addr_t src_dma;
-	u32 options;
+	u32 *desc;
+	int src_nents, mapped_nents;
 	struct ahash_edesc *edesc;
 	struct ahash_edesc *edesc;
 	int ret = 0;
 	int ret = 0;
-	int sh_len;
 
 
 	*next_buflen = req->nbytes & (crypto_tfm_alg_blocksize(&ahash->base) -
 	*next_buflen = req->nbytes & (crypto_tfm_alg_blocksize(&ahash->base) -
 				      1);
 				      1);
 	to_hash = req->nbytes - *next_buflen;
 	to_hash = req->nbytes - *next_buflen;
 
 
 	if (to_hash) {
 	if (to_hash) {
-		src_nents = sg_count(req->src, req->nbytes - (*next_buflen));
+		src_nents = sg_nents_for_len(req->src,
+					     req->nbytes - *next_buflen);
 		if (src_nents < 0) {
 		if (src_nents < 0) {
 			dev_err(jrdev, "Invalid number of src SG.\n");
 			dev_err(jrdev, "Invalid number of src SG.\n");
 			return src_nents;
 			return src_nents;
 		}
 		}
-		dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE);
-		sec4_sg_bytes = src_nents * sizeof(struct sec4_sg_entry);
+
+		if (src_nents) {
+			mapped_nents = dma_map_sg(jrdev, req->src, src_nents,
+						  DMA_TO_DEVICE);
+			if (!mapped_nents) {
+				dev_err(jrdev, "unable to map source for DMA\n");
+				return -ENOMEM;
+			}
+		} else {
+			mapped_nents = 0;
+		}
 
 
 		/*
 		/*
 		 * allocate space for base edesc and hw desc commands,
 		 * allocate space for base edesc and hw desc commands,
 		 * link tables
 		 * link tables
 		 */
 		 */
-		edesc = kzalloc(sizeof(*edesc) + DESC_JOB_IO_LEN +
-				sec4_sg_bytes, GFP_DMA | flags);
+		edesc = ahash_edesc_alloc(ctx, mapped_nents > 1 ?
+					  mapped_nents : 0,
+					  ctx->sh_desc_update_first,
+					  ctx->sh_desc_update_first_dma,
+					  flags);
 		if (!edesc) {
 		if (!edesc) {
-			dev_err(jrdev,
-				"could not allocate extended descriptor\n");
+			dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
 			return -ENOMEM;
 			return -ENOMEM;
 		}
 		}
 
 
 		edesc->src_nents = src_nents;
 		edesc->src_nents = src_nents;
-		edesc->sec4_sg_bytes = sec4_sg_bytes;
-		edesc->sec4_sg = (void *)edesc + sizeof(struct ahash_edesc) +
-				 DESC_JOB_IO_LEN;
 		edesc->dst_dma = 0;
 		edesc->dst_dma = 0;
 
 
-		if (src_nents) {
-			sg_to_sec4_sg_last(req->src, src_nents,
-					   edesc->sec4_sg, 0);
-			edesc->sec4_sg_dma = dma_map_single(jrdev,
-							    edesc->sec4_sg,
-							    sec4_sg_bytes,
-							    DMA_TO_DEVICE);
-			if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
-				dev_err(jrdev, "unable to map S/G table\n");
-				return -ENOMEM;
-			}
-			src_dma = edesc->sec4_sg_dma;
-			options = LDST_SGF;
-		} else {
-			src_dma = sg_dma_address(req->src);
-			options = 0;
-		}
+		ret = ahash_edesc_add_src(ctx, edesc, req, mapped_nents, 0, 0,
+					  to_hash);
+		if (ret)
+			goto unmap_ctx;
 
 
 		if (*next_buflen)
 		if (*next_buflen)
 			scatterwalk_map_and_copy(next_buf, req->src, to_hash,
 			scatterwalk_map_and_copy(next_buf, req->src, to_hash,
 						 *next_buflen, 0);
 						 *next_buflen, 0);
 
 
-		sh_len = desc_len(sh_desc);
 		desc = edesc->hw_desc;
 		desc = edesc->hw_desc;
-		init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER |
-				     HDR_REVERSE);
-
-		append_seq_in_ptr(desc, src_dma, to_hash, options);
 
 
 		ret = map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len);
 		ret = map_seq_out_ptr_ctx(desc, jrdev, state, ctx->ctx_len);
 		if (ret)
 		if (ret)
-			return ret;
+			goto unmap_ctx;
 
 
 #ifdef DEBUG
 #ifdef DEBUG
 		print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
 		print_hex_dump(KERN_ERR, "jobdesc@"__stringify(__LINE__)": ",
@@ -1515,18 +1564,14 @@ static int ahash_update_first(struct ahash_request *req)
 			       desc_bytes(desc), 1);
 			       desc_bytes(desc), 1);
 #endif
 #endif
 
 
-		ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst,
-				      req);
-		if (!ret) {
-			ret = -EINPROGRESS;
-			state->update = ahash_update_ctx;
-			state->finup = ahash_finup_ctx;
-			state->final = ahash_final_ctx;
-		} else {
-			ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len,
-					DMA_TO_DEVICE);
-			kfree(edesc);
-		}
+		ret = caam_jr_enqueue(jrdev, desc, ahash_done_ctx_dst, req);
+		if (ret)
+			goto unmap_ctx;
+
+		ret = -EINPROGRESS;
+		state->update = ahash_update_ctx;
+		state->finup = ahash_finup_ctx;
+		state->final = ahash_final_ctx;
 	} else if (*next_buflen) {
 	} else if (*next_buflen) {
 		state->update = ahash_update_no_ctx;
 		state->update = ahash_update_no_ctx;
 		state->finup = ahash_finup_no_ctx;
 		state->finup = ahash_finup_no_ctx;
@@ -1540,6 +1585,10 @@ static int ahash_update_first(struct ahash_request *req)
 		       *next_buflen, 1);
 		       *next_buflen, 1);
 #endif
 #endif
 
 
+	return ret;
+ unmap_ctx:
+	ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_TO_DEVICE);
+	kfree(edesc);
 	return ret;
 	return ret;
 }
 }
 
 
@@ -1799,7 +1848,6 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 					 HASH_MSG_LEN + SHA256_DIGEST_SIZE,
 					 HASH_MSG_LEN + SHA256_DIGEST_SIZE,
 					 HASH_MSG_LEN + 64,
 					 HASH_MSG_LEN + 64,
 					 HASH_MSG_LEN + SHA512_DIGEST_SIZE };
 					 HASH_MSG_LEN + SHA512_DIGEST_SIZE };
-	int ret = 0;
 
 
 	/*
 	/*
 	 * Get a Job ring from Job Ring driver to ensure in-order
 	 * Get a Job ring from Job Ring driver to ensure in-order
@@ -1819,10 +1867,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 
 
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 				 sizeof(struct caam_hash_state));
 				 sizeof(struct caam_hash_state));
-
-	ret = ahash_set_sh_desc(ahash);
-
-	return ret;
+	return ahash_set_sh_desc(ahash);
 }
 }
 
 
 static void caam_hash_cra_exit(struct crypto_tfm *tfm)
 static void caam_hash_cra_exit(struct crypto_tfm *tfm)

+ 3 - 0
drivers/crypto/caam/ctrl.c

@@ -14,6 +14,7 @@
 #include "jr.h"
 #include "jr.h"
 #include "desc_constr.h"
 #include "desc_constr.h"
 #include "error.h"
 #include "error.h"
+#include "ctrl.h"
 
 
 bool caam_little_end;
 bool caam_little_end;
 EXPORT_SYMBOL(caam_little_end);
 EXPORT_SYMBOL(caam_little_end);
@@ -826,6 +827,8 @@ static int caam_probe(struct platform_device *pdev)
 
 
 caam_remove:
 caam_remove:
 	caam_remove(pdev);
 	caam_remove(pdev);
+	return ret;
+
 iounmap_ctrl:
 iounmap_ctrl:
 	iounmap(ctrl);
 	iounmap(ctrl);
 disable_caam_emi_slow:
 disable_caam_emi_slow:

+ 0 - 6
drivers/crypto/caam/desc.h

@@ -23,13 +23,7 @@
 #define SEC4_SG_OFFSET_MASK	0x00001fff
 #define SEC4_SG_OFFSET_MASK	0x00001fff
 
 
 struct sec4_sg_entry {
 struct sec4_sg_entry {
-#if !defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && \
-	defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX)
-	u32 rsvd1;
-	dma_addr_t ptr;
-#else
 	u64 ptr;
 	u64 ptr;
-#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_IMX */
 	u32 len;
 	u32 len;
 	u32 bpid_offset;
 	u32 bpid_offset;
 };
 };

+ 17 - 0
drivers/crypto/caam/desc_constr.h

@@ -324,6 +324,23 @@ static inline void append_##cmd##_imm_##type(u32 *desc, type immediate, \
 }
 }
 APPEND_CMD_RAW_IMM(load, LOAD, u32);
 APPEND_CMD_RAW_IMM(load, LOAD, u32);
 
 
+/*
+ * ee - endianness
+ * size - size of immediate type in bytes
+ */
+#define APPEND_CMD_RAW_IMM2(cmd, op, ee, size) \
+static inline void append_##cmd##_imm_##ee##size(u32 *desc, \
+						   u##size immediate, \
+						   u32 options) \
+{ \
+	__##ee##size data = cpu_to_##ee##size(immediate); \
+	PRINT_POS; \
+	append_cmd(desc, CMD_##op | IMMEDIATE | options | sizeof(data)); \
+	append_data(desc, &data, sizeof(data)); \
+}
+
+APPEND_CMD_RAW_IMM2(load, LOAD, be, 32);
+
 /*
 /*
  * Append math command. Only the last part of destination and source need to
  * Append math command. Only the last part of destination and source need to
  * be specified
  * be specified

+ 0 - 1
drivers/crypto/caam/intern.h

@@ -41,7 +41,6 @@ struct caam_drv_private_jr {
 	struct device		*dev;
 	struct device		*dev;
 	int ridx;
 	int ridx;
 	struct caam_job_ring __iomem *rregs;	/* JobR's register space */
 	struct caam_job_ring __iomem *rregs;	/* JobR's register space */
-	struct tasklet_struct irqtask;
 	int irq;			/* One per queue */
 	int irq;			/* One per queue */
 
 
 	/* Number of scatterlist crypt transforms active on the JobR */
 	/* Number of scatterlist crypt transforms active on the JobR */

+ 10 - 16
drivers/crypto/caam/jr.c

@@ -73,8 +73,6 @@ static int caam_jr_shutdown(struct device *dev)
 
 
 	ret = caam_reset_hw_jr(dev);
 	ret = caam_reset_hw_jr(dev);
 
 
-	tasklet_kill(&jrp->irqtask);
-
 	/* Release interrupt */
 	/* Release interrupt */
 	free_irq(jrp->irq, dev);
 	free_irq(jrp->irq, dev);
 
 
@@ -130,7 +128,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
 
 
 	/*
 	/*
 	 * Check the output ring for ready responses, kick
 	 * Check the output ring for ready responses, kick
-	 * tasklet if jobs done.
+	 * the threaded irq if jobs done.
 	 */
 	 */
 	irqstate = rd_reg32(&jrp->rregs->jrintstatus);
 	irqstate = rd_reg32(&jrp->rregs->jrintstatus);
 	if (!irqstate)
 	if (!irqstate)
@@ -152,18 +150,13 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
 	/* Have valid interrupt at this point, just ACK and trigger */
 	/* Have valid interrupt at this point, just ACK and trigger */
 	wr_reg32(&jrp->rregs->jrintstatus, irqstate);
 	wr_reg32(&jrp->rregs->jrintstatus, irqstate);
 
 
-	preempt_disable();
-	tasklet_schedule(&jrp->irqtask);
-	preempt_enable();
-
-	return IRQ_HANDLED;
+	return IRQ_WAKE_THREAD;
 }
 }
 
 
-/* Deferred service handler, run as interrupt-fired tasklet */
-static void caam_jr_dequeue(unsigned long devarg)
+static irqreturn_t caam_jr_threadirq(int irq, void *st_dev)
 {
 {
 	int hw_idx, sw_idx, i, head, tail;
 	int hw_idx, sw_idx, i, head, tail;
-	struct device *dev = (struct device *)devarg;
+	struct device *dev = st_dev;
 	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
 	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
 	void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
 	void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
 	u32 *userdesc, userstatus;
 	u32 *userdesc, userstatus;
@@ -237,6 +230,8 @@ static void caam_jr_dequeue(unsigned long devarg)
 
 
 	/* reenable / unmask IRQs */
 	/* reenable / unmask IRQs */
 	clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0);
 	clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0);
+
+	return IRQ_HANDLED;
 }
 }
 
 
 /**
 /**
@@ -394,11 +389,10 @@ static int caam_jr_init(struct device *dev)
 
 
 	jrp = dev_get_drvdata(dev);
 	jrp = dev_get_drvdata(dev);
 
 
-	tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev);
-
 	/* Connect job ring interrupt handler. */
 	/* Connect job ring interrupt handler. */
-	error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED,
-			    dev_name(dev), dev);
+	error = request_threaded_irq(jrp->irq, caam_jr_interrupt,
+				     caam_jr_threadirq, IRQF_SHARED,
+				     dev_name(dev), dev);
 	if (error) {
 	if (error) {
 		dev_err(dev, "can't connect JobR %d interrupt (%d)\n",
 		dev_err(dev, "can't connect JobR %d interrupt (%d)\n",
 			jrp->ridx, jrp->irq);
 			jrp->ridx, jrp->irq);
@@ -460,7 +454,6 @@ out_free_inpring:
 out_free_irq:
 out_free_irq:
 	free_irq(jrp->irq, dev);
 	free_irq(jrp->irq, dev);
 out_kill_deq:
 out_kill_deq:
-	tasklet_kill(&jrp->irqtask);
 	return error;
 	return error;
 }
 }
 
 
@@ -513,6 +506,7 @@ static int caam_jr_probe(struct platform_device *pdev)
 	error = caam_jr_init(jrdev); /* now turn on hardware */
 	error = caam_jr_init(jrdev); /* now turn on hardware */
 	if (error) {
 	if (error) {
 		irq_dispose_mapping(jrpriv->irq);
 		irq_dispose_mapping(jrpriv->irq);
+		iounmap(ctrl);
 		return error;
 		return error;
 	}
 	}
 
 

+ 8 - 0
drivers/crypto/caam/regs.h

@@ -196,6 +196,14 @@ static inline u64 rd_reg64(void __iomem *reg)
 #define caam_dma_to_cpu(value) caam32_to_cpu(value)
 #define caam_dma_to_cpu(value) caam32_to_cpu(value)
 #endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT  */
 #endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT  */
 
 
+#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
+#define cpu_to_caam_dma64(value) \
+		(((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
+		 (u64)cpu_to_caam32(upper_32_bits(value)))
+#else
+#define cpu_to_caam_dma64(value) cpu_to_caam64(value)
+#endif
+
 /*
 /*
  * jr_outentry
  * jr_outentry
  * Represents each entry in a JobR output ring
  * Represents each entry in a JobR output ring

+ 1 - 1
drivers/crypto/caam/sg_sw_sec4.h

@@ -15,7 +15,7 @@ struct sec4_sg_entry;
 static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
 static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
 				      dma_addr_t dma, u32 len, u16 offset)
 				      dma_addr_t dma, u32 len, u16 offset)
 {
 {
-	sec4_sg_ptr->ptr = cpu_to_caam_dma(dma);
+	sec4_sg_ptr->ptr = cpu_to_caam_dma64(dma);
 	sec4_sg_ptr->len = cpu_to_caam32(len);
 	sec4_sg_ptr->len = cpu_to_caam32(len);
 	sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK);
 	sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK);
 #ifdef DEBUG
 #ifdef DEBUG

+ 1 - 0
drivers/crypto/ccp/Makefile

@@ -2,6 +2,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
 ccp-objs := ccp-dev.o \
 ccp-objs := ccp-dev.o \
 	    ccp-ops.o \
 	    ccp-ops.o \
 	    ccp-dev-v3.o \
 	    ccp-dev-v3.o \
+	    ccp-dev-v5.o \
 	    ccp-platform.o \
 	    ccp-platform.o \
 	    ccp-dmaengine.o
 	    ccp-dmaengine.o
 ccp-$(CONFIG_PCI) += ccp-pci.o
 ccp-$(CONFIG_PCI) += ccp-pci.o

+ 17 - 1
drivers/crypto/ccp/ccp-crypto-sha.c

@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -134,7 +135,22 @@ static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes,
 	rctx->cmd.engine = CCP_ENGINE_SHA;
 	rctx->cmd.engine = CCP_ENGINE_SHA;
 	rctx->cmd.u.sha.type = rctx->type;
 	rctx->cmd.u.sha.type = rctx->type;
 	rctx->cmd.u.sha.ctx = &rctx->ctx_sg;
 	rctx->cmd.u.sha.ctx = &rctx->ctx_sg;
-	rctx->cmd.u.sha.ctx_len = sizeof(rctx->ctx);
+
+	switch (rctx->type) {
+	case CCP_SHA_TYPE_1:
+		rctx->cmd.u.sha.ctx_len = SHA1_DIGEST_SIZE;
+		break;
+	case CCP_SHA_TYPE_224:
+		rctx->cmd.u.sha.ctx_len = SHA224_DIGEST_SIZE;
+		break;
+	case CCP_SHA_TYPE_256:
+		rctx->cmd.u.sha.ctx_len = SHA256_DIGEST_SIZE;
+		break;
+	default:
+		/* Should never get here */
+		break;
+	}
+
 	rctx->cmd.u.sha.src = sg;
 	rctx->cmd.u.sha.src = sg;
 	rctx->cmd.u.sha.src_len = rctx->hash_cnt;
 	rctx->cmd.u.sha.src_len = rctx->hash_cnt;
 	rctx->cmd.u.sha.opad = ctx->u.sha.key_len ?
 	rctx->cmd.u.sha.opad = ctx->u.sha.key_len ?

+ 108 - 74
drivers/crypto/ccp/ccp-dev-v3.c

@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -19,6 +20,61 @@
 
 
 #include "ccp-dev.h"
 #include "ccp-dev.h"
 
 
+static u32 ccp_alloc_ksb(struct ccp_cmd_queue *cmd_q, unsigned int count)
+{
+	int start;
+	struct ccp_device *ccp = cmd_q->ccp;
+
+	for (;;) {
+		mutex_lock(&ccp->sb_mutex);
+
+		start = (u32)bitmap_find_next_zero_area(ccp->sb,
+							ccp->sb_count,
+							ccp->sb_start,
+							count, 0);
+		if (start <= ccp->sb_count) {
+			bitmap_set(ccp->sb, start, count);
+
+			mutex_unlock(&ccp->sb_mutex);
+			break;
+		}
+
+		ccp->sb_avail = 0;
+
+		mutex_unlock(&ccp->sb_mutex);
+
+		/* Wait for KSB entries to become available */
+		if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
+			return 0;
+	}
+
+	return KSB_START + start;
+}
+
+static void ccp_free_ksb(struct ccp_cmd_queue *cmd_q, unsigned int start,
+			 unsigned int count)
+{
+	struct ccp_device *ccp = cmd_q->ccp;
+
+	if (!start)
+		return;
+
+	mutex_lock(&ccp->sb_mutex);
+
+	bitmap_clear(ccp->sb, start - KSB_START, count);
+
+	ccp->sb_avail = 1;
+
+	mutex_unlock(&ccp->sb_mutex);
+
+	wake_up_interruptible_all(&ccp->sb_queue);
+}
+
+static unsigned int ccp_get_free_slots(struct ccp_cmd_queue *cmd_q)
+{
+	return CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
+}
+
 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
 {
 {
 	struct ccp_cmd_queue *cmd_q = op->cmd_q;
 	struct ccp_cmd_queue *cmd_q = op->cmd_q;
@@ -68,6 +124,9 @@ static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
 			/* On error delete all related jobs from the queue */
 			/* On error delete all related jobs from the queue */
 			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
 			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
 			      | op->jobid;
 			      | op->jobid;
+			if (cmd_q->cmd_error)
+				ccp_log_error(cmd_q->ccp,
+					      cmd_q->cmd_error);
 
 
 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
 
 
@@ -99,10 +158,10 @@ static int ccp_perform_aes(struct ccp_op *op)
 		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
 		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
 		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
 		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
 		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
 		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
-		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
+		| (op->sb_key << REQ1_KEY_KSB_SHIFT);
 	cr[1] = op->src.u.dma.length - 1;
 	cr[1] = op->src.u.dma.length - 1;
 	cr[2] = ccp_addr_lo(&op->src.u.dma);
 	cr[2] = ccp_addr_lo(&op->src.u.dma);
-	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+	cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
 		| ccp_addr_hi(&op->src.u.dma);
 		| ccp_addr_hi(&op->src.u.dma);
 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
@@ -129,10 +188,10 @@ static int ccp_perform_xts_aes(struct ccp_op *op)
 	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
 	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
 		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
 		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
 		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
 		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
-		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
+		| (op->sb_key << REQ1_KEY_KSB_SHIFT);
 	cr[1] = op->src.u.dma.length - 1;
 	cr[1] = op->src.u.dma.length - 1;
 	cr[2] = ccp_addr_lo(&op->src.u.dma);
 	cr[2] = ccp_addr_lo(&op->src.u.dma);
-	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+	cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
 		| ccp_addr_hi(&op->src.u.dma);
 		| ccp_addr_hi(&op->src.u.dma);
 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
@@ -158,7 +217,7 @@ static int ccp_perform_sha(struct ccp_op *op)
 		| REQ1_INIT;
 		| REQ1_INIT;
 	cr[1] = op->src.u.dma.length - 1;
 	cr[1] = op->src.u.dma.length - 1;
 	cr[2] = ccp_addr_lo(&op->src.u.dma);
 	cr[2] = ccp_addr_lo(&op->src.u.dma);
-	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+	cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
 		| ccp_addr_hi(&op->src.u.dma);
 		| ccp_addr_hi(&op->src.u.dma);
 
 
@@ -181,11 +240,11 @@ static int ccp_perform_rsa(struct ccp_op *op)
 	/* Fill out the register contents for REQ1 through REQ6 */
 	/* Fill out the register contents for REQ1 through REQ6 */
 	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
 	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
 		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
 		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
-		| (op->ksb_key << REQ1_KEY_KSB_SHIFT)
+		| (op->sb_key << REQ1_KEY_KSB_SHIFT)
 		| REQ1_EOM;
 		| REQ1_EOM;
 	cr[1] = op->u.rsa.input_len - 1;
 	cr[1] = op->u.rsa.input_len - 1;
 	cr[2] = ccp_addr_lo(&op->src.u.dma);
 	cr[2] = ccp_addr_lo(&op->src.u.dma);
-	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
+	cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
 		| ccp_addr_hi(&op->src.u.dma);
 		| ccp_addr_hi(&op->src.u.dma);
 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
@@ -215,10 +274,10 @@ static int ccp_perform_passthru(struct ccp_op *op)
 			| ccp_addr_hi(&op->src.u.dma);
 			| ccp_addr_hi(&op->src.u.dma);
 
 
 		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
 		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
-			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
+			cr[3] |= (op->sb_key << REQ4_KSB_SHIFT);
 	} else {
 	} else {
-		cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
-		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
+		cr[2] = op->src.u.sb * CCP_SB_BYTES;
+		cr[3] = (CCP_MEMTYPE_SB << REQ4_MEMTYPE_SHIFT);
 	}
 	}
 
 
 	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
 	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
@@ -226,8 +285,8 @@ static int ccp_perform_passthru(struct ccp_op *op)
 		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
 		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
 			| ccp_addr_hi(&op->dst.u.dma);
 			| ccp_addr_hi(&op->dst.u.dma);
 	} else {
 	} else {
-		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
-		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
+		cr[4] = op->dst.u.sb * CCP_SB_BYTES;
+		cr[5] = (CCP_MEMTYPE_SB << REQ6_MEMTYPE_SHIFT);
 	}
 	}
 
 
 	if (op->eom)
 	if (op->eom)
@@ -256,35 +315,6 @@ static int ccp_perform_ecc(struct ccp_op *op)
 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
 }
 }
 
 
-static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
-{
-	struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng);
-	u32 trng_value;
-	int len = min_t(int, sizeof(trng_value), max);
-
-	/*
-	 * Locking is provided by the caller so we can update device
-	 * hwrng-related fields safely
-	 */
-	trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG);
-	if (!trng_value) {
-		/* Zero is returned if not data is available or if a
-		 * bad-entropy error is present. Assume an error if
-		 * we exceed TRNG_RETRIES reads of zero.
-		 */
-		if (ccp->hwrng_retries++ > TRNG_RETRIES)
-			return -EIO;
-
-		return 0;
-	}
-
-	/* Reset the counter and save the rng value */
-	ccp->hwrng_retries = 0;
-	memcpy(data, &trng_value, len);
-
-	return len;
-}
-
 static int ccp_init(struct ccp_device *ccp)
 static int ccp_init(struct ccp_device *ccp)
 {
 {
 	struct device *dev = ccp->dev;
 	struct device *dev = ccp->dev;
@@ -321,9 +351,9 @@ static int ccp_init(struct ccp_device *ccp)
 		cmd_q->dma_pool = dma_pool;
 		cmd_q->dma_pool = dma_pool;
 
 
 		/* Reserve 2 KSB regions for the queue */
 		/* Reserve 2 KSB regions for the queue */
-		cmd_q->ksb_key = KSB_START + ccp->ksb_start++;
-		cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++;
-		ccp->ksb_count -= 2;
+		cmd_q->sb_key = KSB_START + ccp->sb_start++;
+		cmd_q->sb_ctx = KSB_START + ccp->sb_start++;
+		ccp->sb_count -= 2;
 
 
 		/* Preset some register values and masks that are queue
 		/* Preset some register values and masks that are queue
 		 * number dependent
 		 * number dependent
@@ -335,7 +365,7 @@ static int ccp_init(struct ccp_device *ccp)
 		cmd_q->int_ok = 1 << (i * 2);
 		cmd_q->int_ok = 1 << (i * 2);
 		cmd_q->int_err = 1 << ((i * 2) + 1);
 		cmd_q->int_err = 1 << ((i * 2) + 1);
 
 
-		cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
+		cmd_q->free_slots = ccp_get_free_slots(cmd_q);
 
 
 		init_waitqueue_head(&cmd_q->int_queue);
 		init_waitqueue_head(&cmd_q->int_queue);
 
 
@@ -375,9 +405,10 @@ static int ccp_init(struct ccp_device *ccp)
 	}
 	}
 
 
 	/* Initialize the queues used to wait for KSB space and suspend */
 	/* Initialize the queues used to wait for KSB space and suspend */
-	init_waitqueue_head(&ccp->ksb_queue);
+	init_waitqueue_head(&ccp->sb_queue);
 	init_waitqueue_head(&ccp->suspend_queue);
 	init_waitqueue_head(&ccp->suspend_queue);
 
 
+	dev_dbg(dev, "Starting threads...\n");
 	/* Create a kthread for each queue */
 	/* Create a kthread for each queue */
 	for (i = 0; i < ccp->cmd_q_count; i++) {
 	for (i = 0; i < ccp->cmd_q_count; i++) {
 		struct task_struct *kthread;
 		struct task_struct *kthread;
@@ -397,29 +428,26 @@ static int ccp_init(struct ccp_device *ccp)
 		wake_up_process(kthread);
 		wake_up_process(kthread);
 	}
 	}
 
 
-	/* Register the RNG */
-	ccp->hwrng.name = ccp->rngname;
-	ccp->hwrng.read = ccp_trng_read;
-	ret = hwrng_register(&ccp->hwrng);
-	if (ret) {
-		dev_err(dev, "error registering hwrng (%d)\n", ret);
+	dev_dbg(dev, "Enabling interrupts...\n");
+	/* Enable interrupts */
+	iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
+
+	dev_dbg(dev, "Registering device...\n");
+	ccp_add_device(ccp);
+
+	ret = ccp_register_rng(ccp);
+	if (ret)
 		goto e_kthread;
 		goto e_kthread;
-	}
 
 
 	/* Register the DMA engine support */
 	/* Register the DMA engine support */
 	ret = ccp_dmaengine_register(ccp);
 	ret = ccp_dmaengine_register(ccp);
 	if (ret)
 	if (ret)
 		goto e_hwrng;
 		goto e_hwrng;
 
 
-	ccp_add_device(ccp);
-
-	/* Enable interrupts */
-	iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
-
 	return 0;
 	return 0;
 
 
 e_hwrng:
 e_hwrng:
-	hwrng_unregister(&ccp->hwrng);
+	ccp_unregister_rng(ccp);
 
 
 e_kthread:
 e_kthread:
 	for (i = 0; i < ccp->cmd_q_count; i++)
 	for (i = 0; i < ccp->cmd_q_count; i++)
@@ -441,19 +469,14 @@ static void ccp_destroy(struct ccp_device *ccp)
 	struct ccp_cmd *cmd;
 	struct ccp_cmd *cmd;
 	unsigned int qim, i;
 	unsigned int qim, i;
 
 
-	/* Remove this device from the list of available units first */
-	ccp_del_device(ccp);
-
 	/* Unregister the DMA engine */
 	/* Unregister the DMA engine */
 	ccp_dmaengine_unregister(ccp);
 	ccp_dmaengine_unregister(ccp);
 
 
 	/* Unregister the RNG */
 	/* Unregister the RNG */
-	hwrng_unregister(&ccp->hwrng);
+	ccp_unregister_rng(ccp);
 
 
-	/* Stop the queue kthreads */
-	for (i = 0; i < ccp->cmd_q_count; i++)
-		if (ccp->cmd_q[i].kthread)
-			kthread_stop(ccp->cmd_q[i].kthread);
+	/* Remove this device from the list of available units */
+	ccp_del_device(ccp);
 
 
 	/* Build queue interrupt mask (two interrupt masks per queue) */
 	/* Build queue interrupt mask (two interrupt masks per queue) */
 	qim = 0;
 	qim = 0;
@@ -472,6 +495,11 @@ static void ccp_destroy(struct ccp_device *ccp)
 	}
 	}
 	iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
 	iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
 
 
+	/* Stop the queue kthreads */
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		if (ccp->cmd_q[i].kthread)
+			kthread_stop(ccp->cmd_q[i].kthread);
+
 	ccp->free_irq(ccp);
 	ccp->free_irq(ccp);
 
 
 	for (i = 0; i < ccp->cmd_q_count; i++)
 	for (i = 0; i < ccp->cmd_q_count; i++)
@@ -527,18 +555,24 @@ static irqreturn_t ccp_irq_handler(int irq, void *data)
 }
 }
 
 
 static const struct ccp_actions ccp3_actions = {
 static const struct ccp_actions ccp3_actions = {
-	.perform_aes = ccp_perform_aes,
-	.perform_xts_aes = ccp_perform_xts_aes,
-	.perform_sha = ccp_perform_sha,
-	.perform_rsa = ccp_perform_rsa,
-	.perform_passthru = ccp_perform_passthru,
-	.perform_ecc = ccp_perform_ecc,
+	.aes = ccp_perform_aes,
+	.xts_aes = ccp_perform_xts_aes,
+	.sha = ccp_perform_sha,
+	.rsa = ccp_perform_rsa,
+	.passthru = ccp_perform_passthru,
+	.ecc = ccp_perform_ecc,
+	.sballoc = ccp_alloc_ksb,
+	.sbfree = ccp_free_ksb,
 	.init = ccp_init,
 	.init = ccp_init,
 	.destroy = ccp_destroy,
 	.destroy = ccp_destroy,
+	.get_free_slots = ccp_get_free_slots,
 	.irqhandler = ccp_irq_handler,
 	.irqhandler = ccp_irq_handler,
 };
 };
 
 
-struct ccp_vdata ccpv3 = {
+const struct ccp_vdata ccpv3 = {
 	.version = CCP_VERSION(3, 0),
 	.version = CCP_VERSION(3, 0),
+	.setup = NULL,
 	.perform = &ccp3_actions,
 	.perform = &ccp3_actions,
+	.bar = 2,
+	.offset = 0x20000,
 };
 };

+ 1017 - 0
drivers/crypto/ccp/ccp-dev-v5.c

@@ -0,0 +1,1017 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kthread.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/compiler.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
+{
+	struct ccp_device *ccp;
+	int start;
+
+	/* First look at the map for the queue */
+	if (cmd_q->lsb >= 0) {
+		start = (u32)bitmap_find_next_zero_area(cmd_q->lsbmap,
+							LSB_SIZE,
+							0, count, 0);
+		if (start < LSB_SIZE) {
+			bitmap_set(cmd_q->lsbmap, start, count);
+			return start + cmd_q->lsb * LSB_SIZE;
+		}
+	}
+
+	/* No joy; try to get an entry from the shared blocks */
+	ccp = cmd_q->ccp;
+	for (;;) {
+		mutex_lock(&ccp->sb_mutex);
+
+		start = (u32)bitmap_find_next_zero_area(ccp->lsbmap,
+							MAX_LSB_CNT * LSB_SIZE,
+							0,
+							count, 0);
+		if (start <= MAX_LSB_CNT * LSB_SIZE) {
+			bitmap_set(ccp->lsbmap, start, count);
+
+			mutex_unlock(&ccp->sb_mutex);
+			return start * LSB_ITEM_SIZE;
+		}
+
+		ccp->sb_avail = 0;
+
+		mutex_unlock(&ccp->sb_mutex);
+
+		/* Wait for KSB entries to become available */
+		if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
+			return 0;
+	}
+}
+
+static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
+			 unsigned int count)
+{
+	int lsbno = start / LSB_SIZE;
+
+	if (!start)
+		return;
+
+	if (cmd_q->lsb == lsbno) {
+		/* An entry from the private LSB */
+		bitmap_clear(cmd_q->lsbmap, start % LSB_SIZE, count);
+	} else {
+		/* From the shared LSBs */
+		struct ccp_device *ccp = cmd_q->ccp;
+
+		mutex_lock(&ccp->sb_mutex);
+		bitmap_clear(ccp->lsbmap, start, count);
+		ccp->sb_avail = 1;
+		mutex_unlock(&ccp->sb_mutex);
+		wake_up_interruptible_all(&ccp->sb_queue);
+	}
+}
+
+/* CCP version 5: Union to define the function field (cmd_reg1/dword0) */
+union ccp_function {
+	struct {
+		u16 size:7;
+		u16 encrypt:1;
+		u16 mode:5;
+		u16 type:2;
+	} aes;
+	struct {
+		u16 size:7;
+		u16 encrypt:1;
+		u16 rsvd:5;
+		u16 type:2;
+	} aes_xts;
+	struct {
+		u16 rsvd1:10;
+		u16 type:4;
+		u16 rsvd2:1;
+	} sha;
+	struct {
+		u16 mode:3;
+		u16 size:12;
+	} rsa;
+	struct {
+		u16 byteswap:2;
+		u16 bitwise:3;
+		u16 reflect:2;
+		u16 rsvd:8;
+	} pt;
+	struct  {
+		u16 rsvd:13;
+	} zlib;
+	struct {
+		u16 size:10;
+		u16 type:2;
+		u16 mode:3;
+	} ecc;
+	u16 raw;
+};
+
+#define	CCP_AES_SIZE(p)		((p)->aes.size)
+#define	CCP_AES_ENCRYPT(p)	((p)->aes.encrypt)
+#define	CCP_AES_MODE(p)		((p)->aes.mode)
+#define	CCP_AES_TYPE(p)		((p)->aes.type)
+#define	CCP_XTS_SIZE(p)		((p)->aes_xts.size)
+#define	CCP_XTS_ENCRYPT(p)	((p)->aes_xts.encrypt)
+#define	CCP_SHA_TYPE(p)		((p)->sha.type)
+#define	CCP_RSA_SIZE(p)		((p)->rsa.size)
+#define	CCP_PT_BYTESWAP(p)	((p)->pt.byteswap)
+#define	CCP_PT_BITWISE(p)	((p)->pt.bitwise)
+#define	CCP_ECC_MODE(p)		((p)->ecc.mode)
+#define	CCP_ECC_AFFINE(p)	((p)->ecc.one)
+
+/* Word 0 */
+#define CCP5_CMD_DW0(p)		((p)->dw0)
+#define CCP5_CMD_SOC(p)		(CCP5_CMD_DW0(p).soc)
+#define CCP5_CMD_IOC(p)		(CCP5_CMD_DW0(p).ioc)
+#define CCP5_CMD_INIT(p)	(CCP5_CMD_DW0(p).init)
+#define CCP5_CMD_EOM(p)		(CCP5_CMD_DW0(p).eom)
+#define CCP5_CMD_FUNCTION(p)	(CCP5_CMD_DW0(p).function)
+#define CCP5_CMD_ENGINE(p)	(CCP5_CMD_DW0(p).engine)
+#define CCP5_CMD_PROT(p)	(CCP5_CMD_DW0(p).prot)
+
+/* Word 1 */
+#define CCP5_CMD_DW1(p)		((p)->length)
+#define CCP5_CMD_LEN(p)		(CCP5_CMD_DW1(p))
+
+/* Word 2 */
+#define CCP5_CMD_DW2(p)		((p)->src_lo)
+#define CCP5_CMD_SRC_LO(p)	(CCP5_CMD_DW2(p))
+
+/* Word 3 */
+#define CCP5_CMD_DW3(p)		((p)->dw3)
+#define CCP5_CMD_SRC_MEM(p)	((p)->dw3.src_mem)
+#define CCP5_CMD_SRC_HI(p)	((p)->dw3.src_hi)
+#define CCP5_CMD_LSB_ID(p)	((p)->dw3.lsb_cxt_id)
+#define CCP5_CMD_FIX_SRC(p)	((p)->dw3.fixed)
+
+/* Words 4/5 */
+#define CCP5_CMD_DW4(p)		((p)->dw4)
+#define CCP5_CMD_DST_LO(p)	(CCP5_CMD_DW4(p).dst_lo)
+#define CCP5_CMD_DW5(p)		((p)->dw5.fields.dst_hi)
+#define CCP5_CMD_DST_HI(p)	(CCP5_CMD_DW5(p))
+#define CCP5_CMD_DST_MEM(p)	((p)->dw5.fields.dst_mem)
+#define CCP5_CMD_FIX_DST(p)	((p)->dw5.fields.fixed)
+#define CCP5_CMD_SHA_LO(p)	((p)->dw4.sha_len_lo)
+#define CCP5_CMD_SHA_HI(p)	((p)->dw5.sha_len_hi)
+
+/* Word 6/7 */
+#define CCP5_CMD_DW6(p)		((p)->key_lo)
+#define CCP5_CMD_KEY_LO(p)	(CCP5_CMD_DW6(p))
+#define CCP5_CMD_DW7(p)		((p)->dw7)
+#define CCP5_CMD_KEY_HI(p)	((p)->dw7.key_hi)
+#define CCP5_CMD_KEY_MEM(p)	((p)->dw7.key_mem)
+
+static inline u32 low_address(unsigned long addr)
+{
+	return (u64)addr & 0x0ffffffff;
+}
+
+static inline u32 high_address(unsigned long addr)
+{
+	return ((u64)addr >> 32) & 0x00000ffff;
+}
+
+static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q)
+{
+	unsigned int head_idx, n;
+	u32 head_lo, queue_start;
+
+	queue_start = low_address(cmd_q->qdma_tail);
+	head_lo = ioread32(cmd_q->reg_head_lo);
+	head_idx = (head_lo - queue_start) / sizeof(struct ccp5_desc);
+
+	n = head_idx + COMMANDS_PER_QUEUE - cmd_q->qidx - 1;
+
+	return n % COMMANDS_PER_QUEUE; /* Always one unused spot */
+}
+
+static int ccp5_do_cmd(struct ccp5_desc *desc,
+		       struct ccp_cmd_queue *cmd_q)
+{
+	u32 *mP;
+	__le32 *dP;
+	u32 tail;
+	int	i;
+	int ret = 0;
+
+	if (CCP5_CMD_SOC(desc)) {
+		CCP5_CMD_IOC(desc) = 1;
+		CCP5_CMD_SOC(desc) = 0;
+	}
+	mutex_lock(&cmd_q->q_mutex);
+
+	mP = (u32 *) &cmd_q->qbase[cmd_q->qidx];
+	dP = (__le32 *) desc;
+	for (i = 0; i < 8; i++)
+		mP[i] = cpu_to_le32(dP[i]); /* handle endianness */
+
+	cmd_q->qidx = (cmd_q->qidx + 1) % COMMANDS_PER_QUEUE;
+
+	/* The data used by this command must be flushed to memory */
+	wmb();
+
+	/* Write the new tail address back to the queue register */
+	tail = low_address(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
+	iowrite32(tail, cmd_q->reg_tail_lo);
+
+	/* Turn the queue back on using our cached control register */
+	iowrite32(cmd_q->qcontrol | CMD5_Q_RUN, cmd_q->reg_control);
+	mutex_unlock(&cmd_q->q_mutex);
+
+	if (CCP5_CMD_IOC(desc)) {
+		/* Wait for the job to complete */
+		ret = wait_event_interruptible(cmd_q->int_queue,
+					       cmd_q->int_rcvd);
+		if (ret || cmd_q->cmd_error) {
+			if (cmd_q->cmd_error)
+				ccp_log_error(cmd_q->ccp,
+					      cmd_q->cmd_error);
+			/* A version 5 device doesn't use Job IDs... */
+			if (!ret)
+				ret = -EIO;
+		}
+		cmd_q->int_rcvd = 0;
+	}
+
+	return 0;
+}
+
+static int ccp5_perform_aes(struct ccp_op *op)
+{
+	struct ccp5_desc desc;
+	union ccp_function function;
+	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
+
+	/* Zero out all the fields of the command desc */
+	memset(&desc, 0, Q_DESC_SIZE);
+
+	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_AES;
+
+	CCP5_CMD_SOC(&desc) = op->soc;
+	CCP5_CMD_IOC(&desc) = 1;
+	CCP5_CMD_INIT(&desc) = op->init;
+	CCP5_CMD_EOM(&desc) = op->eom;
+	CCP5_CMD_PROT(&desc) = 0;
+
+	function.raw = 0;
+	CCP_AES_ENCRYPT(&function) = op->u.aes.action;
+	CCP_AES_MODE(&function) = op->u.aes.mode;
+	CCP_AES_TYPE(&function) = op->u.aes.type;
+	if (op->u.aes.mode == CCP_AES_MODE_CFB)
+		CCP_AES_SIZE(&function) = 0x7f;
+
+	CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
+	CCP5_CMD_KEY_HI(&desc) = 0;
+	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
+	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
+
+	return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_xts_aes(struct ccp_op *op)
+{
+	struct ccp5_desc desc;
+	union ccp_function function;
+	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
+
+	/* Zero out all the fields of the command desc */
+	memset(&desc, 0, Q_DESC_SIZE);
+
+	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_XTS_AES_128;
+
+	CCP5_CMD_SOC(&desc) = op->soc;
+	CCP5_CMD_IOC(&desc) = 1;
+	CCP5_CMD_INIT(&desc) = op->init;
+	CCP5_CMD_EOM(&desc) = op->eom;
+	CCP5_CMD_PROT(&desc) = 0;
+
+	function.raw = 0;
+	CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
+	CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
+	CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
+	CCP5_CMD_KEY_HI(&desc) =  0;
+	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
+	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
+
+	return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_sha(struct ccp_op *op)
+{
+	struct ccp5_desc desc;
+	union ccp_function function;
+
+	/* Zero out all the fields of the command desc */
+	memset(&desc, 0, Q_DESC_SIZE);
+
+	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_SHA;
+
+	CCP5_CMD_SOC(&desc) = op->soc;
+	CCP5_CMD_IOC(&desc) = 1;
+	CCP5_CMD_INIT(&desc) = 1;
+	CCP5_CMD_EOM(&desc) = op->eom;
+	CCP5_CMD_PROT(&desc) = 0;
+
+	function.raw = 0;
+	CCP_SHA_TYPE(&function) = op->u.sha.type;
+	CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
+
+	if (op->eom) {
+		CCP5_CMD_SHA_LO(&desc) = lower_32_bits(op->u.sha.msg_bits);
+		CCP5_CMD_SHA_HI(&desc) = upper_32_bits(op->u.sha.msg_bits);
+	} else {
+		CCP5_CMD_SHA_LO(&desc) = 0;
+		CCP5_CMD_SHA_HI(&desc) = 0;
+	}
+
+	return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_rsa(struct ccp_op *op)
+{
+	struct ccp5_desc desc;
+	union ccp_function function;
+
+	/* Zero out all the fields of the command desc */
+	memset(&desc, 0, Q_DESC_SIZE);
+
+	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_RSA;
+
+	CCP5_CMD_SOC(&desc) = op->soc;
+	CCP5_CMD_IOC(&desc) = 1;
+	CCP5_CMD_INIT(&desc) = 0;
+	CCP5_CMD_EOM(&desc) = 1;
+	CCP5_CMD_PROT(&desc) = 0;
+
+	function.raw = 0;
+	CCP_RSA_SIZE(&function) = op->u.rsa.mod_size;
+	CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+	CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
+
+	/* Source is from external memory */
+	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	/* Destination is in external memory */
+	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	/* Key (Exponent) is in external memory */
+	CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma);
+	CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma);
+	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_passthru(struct ccp_op *op)
+{
+	struct ccp5_desc desc;
+	union ccp_function function;
+	struct ccp_dma_info *saddr = &op->src.u.dma;
+	struct ccp_dma_info *daddr = &op->dst.u.dma;
+
+	memset(&desc, 0, Q_DESC_SIZE);
+
+	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;
+
+	CCP5_CMD_SOC(&desc) = 0;
+	CCP5_CMD_IOC(&desc) = 1;
+	CCP5_CMD_INIT(&desc) = 0;
+	CCP5_CMD_EOM(&desc) = op->eom;
+	CCP5_CMD_PROT(&desc) = 0;
+
+	function.raw = 0;
+	CCP_PT_BYTESWAP(&function) = op->u.passthru.byte_swap;
+	CCP_PT_BITWISE(&function) = op->u.passthru.bit_mod;
+	CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+	/* Length of source data is always 256 bytes */
+	if (op->src.type == CCP_MEMTYPE_SYSTEM)
+		CCP5_CMD_LEN(&desc) = saddr->length;
+	else
+		CCP5_CMD_LEN(&desc) = daddr->length;
+
+	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
+		CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+		CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+		CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
+			CCP5_CMD_LSB_ID(&desc) = op->sb_key;
+	} else {
+		u32 key_addr = op->src.u.sb * CCP_SB_BYTES;
+
+		CCP5_CMD_SRC_LO(&desc) = lower_32_bits(key_addr);
+		CCP5_CMD_SRC_HI(&desc) = 0;
+		CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SB;
+	}
+
+	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
+		CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+		CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+		CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+	} else {
+		u32 key_addr = op->dst.u.sb * CCP_SB_BYTES;
+
+		CCP5_CMD_DST_LO(&desc) = lower_32_bits(key_addr);
+		CCP5_CMD_DST_HI(&desc) = 0;
+		CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SB;
+	}
+
+	return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_ecc(struct ccp_op *op)
+{
+	struct ccp5_desc desc;
+	union ccp_function function;
+
+	/* Zero out all the fields of the command desc */
+	memset(&desc, 0, Q_DESC_SIZE);
+
+	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_ECC;
+
+	CCP5_CMD_SOC(&desc) = 0;
+	CCP5_CMD_IOC(&desc) = 1;
+	CCP5_CMD_INIT(&desc) = 0;
+	CCP5_CMD_EOM(&desc) = 1;
+	CCP5_CMD_PROT(&desc) = 0;
+
+	function.raw = 0;
+	function.ecc.mode = op->u.ecc.function;
+	CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status)
+{
+	int q_mask = 1 << cmd_q->id;
+	int queues = 0;
+	int j;
+
+	/* Build a bit mask to know which LSBs this queue has access to.
+	 * Don't bother with segment 0 as it has special privileges.
+	 */
+	for (j = 1; j < MAX_LSB_CNT; j++) {
+		if (status & q_mask)
+			bitmap_set(cmd_q->lsbmask, j, 1);
+		status >>= LSB_REGION_WIDTH;
+	}
+	queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
+	dev_info(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n",
+		 cmd_q->id, queues);
+
+	return queues ? 0 : -EINVAL;
+}
+
+
+static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
+					int lsb_cnt, int n_lsbs,
+					unsigned long *lsb_pub)
+{
+	DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
+	int bitno;
+	int qlsb_wgt;
+	int i;
+
+	/* For each queue:
+	 * If the count of potential LSBs available to a queue matches the
+	 * ordinal given to us in lsb_cnt:
+	 * Copy the mask of possible LSBs for this queue into "qlsb";
+	 * For each bit in qlsb, see if the corresponding bit in the
+	 * aggregation mask is set; if so, we have a match.
+	 *     If we have a match, clear the bit in the aggregation to
+	 *     mark it as no longer available.
+	 *     If there is no match, clear the bit in qlsb and keep looking.
+	 */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
+
+		qlsb_wgt = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
+
+		if (qlsb_wgt == lsb_cnt) {
+			bitmap_copy(qlsb, cmd_q->lsbmask, MAX_LSB_CNT);
+
+			bitno = find_first_bit(qlsb, MAX_LSB_CNT);
+			while (bitno < MAX_LSB_CNT) {
+				if (test_bit(bitno, lsb_pub)) {
+					/* We found an available LSB
+					 * that this queue can access
+					 */
+					cmd_q->lsb = bitno;
+					bitmap_clear(lsb_pub, bitno, 1);
+					dev_info(ccp->dev,
+						 "Queue %d gets LSB %d\n",
+						 i, bitno);
+					break;
+				}
+				bitmap_clear(qlsb, bitno, 1);
+				bitno = find_first_bit(qlsb, MAX_LSB_CNT);
+			}
+			if (bitno >= MAX_LSB_CNT)
+				return -EINVAL;
+			n_lsbs--;
+		}
+	}
+	return n_lsbs;
+}
+
+/* For each queue, from the most- to least-constrained:
+ * find an LSB that can be assigned to the queue. If there are N queues that
+ * can only use M LSBs, where N > M, fail; otherwise, every queue will get a
+ * dedicated LSB. Remaining LSB regions become a shared resource.
+ * If we have fewer LSBs than queues, all LSB regions become shared resources.
+ */
+static int ccp_assign_lsbs(struct ccp_device *ccp)
+{
+	DECLARE_BITMAP(lsb_pub, MAX_LSB_CNT);
+	DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
+	int n_lsbs = 0;
+	int bitno;
+	int i, lsb_cnt;
+	int rc = 0;
+
+	bitmap_zero(lsb_pub, MAX_LSB_CNT);
+
+	/* Create an aggregate bitmap to get a total count of available LSBs */
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		bitmap_or(lsb_pub,
+			  lsb_pub, ccp->cmd_q[i].lsbmask,
+			  MAX_LSB_CNT);
+
+	n_lsbs = bitmap_weight(lsb_pub, MAX_LSB_CNT);
+
+	if (n_lsbs >= ccp->cmd_q_count) {
+		/* We have enough LSBS to give every queue a private LSB.
+		 * Brute force search to start with the queues that are more
+		 * constrained in LSB choice. When an LSB is privately
+		 * assigned, it is removed from the public mask.
+		 * This is an ugly N squared algorithm with some optimization.
+		 */
+		for (lsb_cnt = 1;
+		     n_lsbs && (lsb_cnt <= MAX_LSB_CNT);
+		     lsb_cnt++) {
+			rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs,
+							  lsb_pub);
+			if (rc < 0)
+				return -EINVAL;
+			n_lsbs = rc;
+		}
+	}
+
+	rc = 0;
+	/* What's left of the LSBs, according to the public mask, now become
+	 * shared. Any zero bits in the lsb_pub mask represent an LSB region
+	 * that can't be used as a shared resource, so mark the LSB slots for
+	 * them as "in use".
+	 */
+	bitmap_copy(qlsb, lsb_pub, MAX_LSB_CNT);
+
+	bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
+	while (bitno < MAX_LSB_CNT) {
+		bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE);
+		bitmap_set(qlsb, bitno, 1);
+		bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
+	}
+
+	return rc;
+}
+
+static int ccp5_init(struct ccp_device *ccp)
+{
+	struct device *dev = ccp->dev;
+	struct ccp_cmd_queue *cmd_q;
+	struct dma_pool *dma_pool;
+	char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
+	unsigned int qmr, qim, i;
+	u64 status;
+	u32 status_lo, status_hi;
+	int ret;
+
+	/* Find available queues */
+	qim = 0;
+	qmr = ioread32(ccp->io_regs + Q_MASK_REG);
+	for (i = 0; i < MAX_HW_QUEUES; i++) {
+
+		if (!(qmr & (1 << i)))
+			continue;
+
+		/* Allocate a dma pool for this queue */
+		snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
+			 ccp->name, i);
+		dma_pool = dma_pool_create(dma_pool_name, dev,
+					   CCP_DMAPOOL_MAX_SIZE,
+					   CCP_DMAPOOL_ALIGN, 0);
+		if (!dma_pool) {
+			dev_err(dev, "unable to allocate dma pool\n");
+			ret = -ENOMEM;
+		}
+
+		cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
+		ccp->cmd_q_count++;
+
+		cmd_q->ccp = ccp;
+		cmd_q->id = i;
+		cmd_q->dma_pool = dma_pool;
+		mutex_init(&cmd_q->q_mutex);
+
+		/* Page alignment satisfies our needs for N <= 128 */
+		BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128);
+		cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
+		cmd_q->qbase = dma_zalloc_coherent(dev, cmd_q->qsize,
+						   &cmd_q->qbase_dma,
+						   GFP_KERNEL);
+		if (!cmd_q->qbase) {
+			dev_err(dev, "unable to allocate command queue\n");
+			ret = -ENOMEM;
+			goto e_pool;
+		}
+
+		cmd_q->qidx = 0;
+		/* Preset some register values and masks that are queue
+		 * number dependent
+		 */
+		cmd_q->reg_control = ccp->io_regs +
+				     CMD5_Q_STATUS_INCR * (i + 1);
+		cmd_q->reg_tail_lo = cmd_q->reg_control + CMD5_Q_TAIL_LO_BASE;
+		cmd_q->reg_head_lo = cmd_q->reg_control + CMD5_Q_HEAD_LO_BASE;
+		cmd_q->reg_int_enable = cmd_q->reg_control +
+					CMD5_Q_INT_ENABLE_BASE;
+		cmd_q->reg_interrupt_status = cmd_q->reg_control +
+					      CMD5_Q_INTERRUPT_STATUS_BASE;
+		cmd_q->reg_status = cmd_q->reg_control + CMD5_Q_STATUS_BASE;
+		cmd_q->reg_int_status = cmd_q->reg_control +
+					CMD5_Q_INT_STATUS_BASE;
+		cmd_q->reg_dma_status = cmd_q->reg_control +
+					CMD5_Q_DMA_STATUS_BASE;
+		cmd_q->reg_dma_read_status = cmd_q->reg_control +
+					     CMD5_Q_DMA_READ_STATUS_BASE;
+		cmd_q->reg_dma_write_status = cmd_q->reg_control +
+					      CMD5_Q_DMA_WRITE_STATUS_BASE;
+
+		init_waitqueue_head(&cmd_q->int_queue);
+
+		dev_dbg(dev, "queue #%u available\n", i);
+	}
+	if (ccp->cmd_q_count == 0) {
+		dev_notice(dev, "no command queues available\n");
+		ret = -EIO;
+		goto e_pool;
+	}
+	dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
+
+	/* Turn off the queues and disable interrupts until ready */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		cmd_q->qcontrol = 0; /* Start with nothing */
+		iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
+
+		/* Disable the interrupts */
+		iowrite32(0x00, cmd_q->reg_int_enable);
+		ioread32(cmd_q->reg_int_status);
+		ioread32(cmd_q->reg_status);
+
+		/* Clear the interrupts */
+		iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
+	}
+
+	dev_dbg(dev, "Requesting an IRQ...\n");
+	/* Request an irq */
+	ret = ccp->get_irq(ccp);
+	if (ret) {
+		dev_err(dev, "unable to allocate an IRQ\n");
+		goto e_pool;
+	}
+
+	/* Initialize the queue used to suspend */
+	init_waitqueue_head(&ccp->suspend_queue);
+
+	dev_dbg(dev, "Loading LSB map...\n");
+	/* Copy the private LSB mask to the public registers */
+	status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
+	status_hi = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
+	iowrite32(status_lo, ccp->io_regs + LSB_PUBLIC_MASK_LO_OFFSET);
+	iowrite32(status_hi, ccp->io_regs + LSB_PUBLIC_MASK_HI_OFFSET);
+	status = ((u64)status_hi<<30) | (u64)status_lo;
+
+	dev_dbg(dev, "Configuring virtual queues...\n");
+	/* Configure size of each virtual queue accessible to host */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		u32 dma_addr_lo;
+		u32 dma_addr_hi;
+
+		cmd_q = &ccp->cmd_q[i];
+
+		cmd_q->qcontrol &= ~(CMD5_Q_SIZE << CMD5_Q_SHIFT);
+		cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD5_Q_SHIFT;
+
+		cmd_q->qdma_tail = cmd_q->qbase_dma;
+		dma_addr_lo = low_address(cmd_q->qdma_tail);
+		iowrite32((u32)dma_addr_lo, cmd_q->reg_tail_lo);
+		iowrite32((u32)dma_addr_lo, cmd_q->reg_head_lo);
+
+		dma_addr_hi = high_address(cmd_q->qdma_tail);
+		cmd_q->qcontrol |= (dma_addr_hi << 16);
+		iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
+
+		/* Find the LSB regions accessible to the queue */
+		ccp_find_lsb_regions(cmd_q, status);
+		cmd_q->lsb = -1; /* Unassigned value */
+	}
+
+	dev_dbg(dev, "Assigning LSBs...\n");
+	ret = ccp_assign_lsbs(ccp);
+	if (ret) {
+		dev_err(dev, "Unable to assign LSBs (%d)\n", ret);
+		goto e_irq;
+	}
+
+	/* Optimization: pre-allocate LSB slots for each queue */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		ccp->cmd_q[i].sb_key = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
+		ccp->cmd_q[i].sb_ctx = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
+	}
+
+	dev_dbg(dev, "Starting threads...\n");
+	/* Create a kthread for each queue */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		struct task_struct *kthread;
+
+		cmd_q = &ccp->cmd_q[i];
+
+		kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
+					 "%s-q%u", ccp->name, cmd_q->id);
+		if (IS_ERR(kthread)) {
+			dev_err(dev, "error creating queue thread (%ld)\n",
+				PTR_ERR(kthread));
+			ret = PTR_ERR(kthread);
+			goto e_kthread;
+		}
+
+		cmd_q->kthread = kthread;
+		wake_up_process(kthread);
+	}
+
+	dev_dbg(dev, "Enabling interrupts...\n");
+	/* Enable interrupts */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+		iowrite32(ALL_INTERRUPTS, cmd_q->reg_int_enable);
+	}
+
+	dev_dbg(dev, "Registering device...\n");
+	/* Put this on the unit list to make it available */
+	ccp_add_device(ccp);
+
+	ret = ccp_register_rng(ccp);
+	if (ret)
+		goto e_kthread;
+
+	/* Register the DMA engine support */
+	ret = ccp_dmaengine_register(ccp);
+	if (ret)
+		goto e_hwrng;
+
+	return 0;
+
+e_hwrng:
+	ccp_unregister_rng(ccp);
+
+e_kthread:
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		if (ccp->cmd_q[i].kthread)
+			kthread_stop(ccp->cmd_q[i].kthread);
+
+e_irq:
+	ccp->free_irq(ccp);
+
+e_pool:
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
+
+	return ret;
+}
+
+static void ccp5_destroy(struct ccp_device *ccp)
+{
+	struct device *dev = ccp->dev;
+	struct ccp_cmd_queue *cmd_q;
+	struct ccp_cmd *cmd;
+	unsigned int i;
+
+	/* Unregister the DMA engine */
+	ccp_dmaengine_unregister(ccp);
+
+	/* Unregister the RNG */
+	ccp_unregister_rng(ccp);
+
+	/* Remove this device from the list of available units first */
+	ccp_del_device(ccp);
+
+	/* Disable and clear interrupts */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		/* Turn off the run bit */
+		iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control);
+
+		/* Disable the interrupts */
+		iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
+
+		/* Clear the interrupt status */
+		iowrite32(0x00, cmd_q->reg_int_enable);
+		ioread32(cmd_q->reg_int_status);
+		ioread32(cmd_q->reg_status);
+	}
+
+	/* Stop the queue kthreads */
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		if (ccp->cmd_q[i].kthread)
+			kthread_stop(ccp->cmd_q[i].kthread);
+
+	ccp->free_irq(ccp);
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+		dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase,
+				  cmd_q->qbase_dma);
+	}
+
+	/* Flush the cmd and backlog queue */
+	while (!list_empty(&ccp->cmd)) {
+		/* Invoke the callback directly with an error code */
+		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
+		list_del(&cmd->entry);
+		cmd->callback(cmd->data, -ENODEV);
+	}
+	while (!list_empty(&ccp->backlog)) {
+		/* Invoke the callback directly with an error code */
+		cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
+		list_del(&cmd->entry);
+		cmd->callback(cmd->data, -ENODEV);
+	}
+}
+
+static irqreturn_t ccp5_irq_handler(int irq, void *data)
+{
+	struct device *dev = data;
+	struct ccp_device *ccp = dev_get_drvdata(dev);
+	u32 status;
+	unsigned int i;
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
+
+		status = ioread32(cmd_q->reg_interrupt_status);
+
+		if (status) {
+			cmd_q->int_status = status;
+			cmd_q->q_status = ioread32(cmd_q->reg_status);
+			cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
+
+			/* On error, only save the first error value */
+			if ((status & INT_ERROR) && !cmd_q->cmd_error)
+				cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
+
+			cmd_q->int_rcvd = 1;
+
+			/* Acknowledge the interrupt and wake the kthread */
+			iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
+			wake_up_interruptible(&cmd_q->int_queue);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void ccp5_config(struct ccp_device *ccp)
+{
+	/* Public side */
+	iowrite32(0x00001249, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
+}
+
+static void ccp5other_config(struct ccp_device *ccp)
+{
+	int i;
+	u32 rnd;
+
+	/* We own all of the queues on the NTB CCP */
+
+	iowrite32(0x00012D57, ccp->io_regs + CMD5_TRNG_CTL_OFFSET);
+	iowrite32(0x00000003, ccp->io_regs + CMD5_CONFIG_0_OFFSET);
+	for (i = 0; i < 12; i++) {
+		rnd = ioread32(ccp->io_regs + TRNG_OUT_REG);
+		iowrite32(rnd, ccp->io_regs + CMD5_AES_MASK_OFFSET);
+	}
+
+	iowrite32(0x0000001F, ccp->io_regs + CMD5_QUEUE_MASK_OFFSET);
+	iowrite32(0x00005B6D, ccp->io_regs + CMD5_QUEUE_PRIO_OFFSET);
+	iowrite32(0x00000000, ccp->io_regs + CMD5_CMD_TIMEOUT_OFFSET);
+
+	iowrite32(0x3FFFFFFF, ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
+	iowrite32(0x000003FF, ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
+
+	iowrite32(0x00108823, ccp->io_regs + CMD5_CLK_GATE_CTL_OFFSET);
+
+	ccp5_config(ccp);
+}
+
+/* Version 5 adds some function, but is essentially the same as v5 */
+static const struct ccp_actions ccp5_actions = {
+	.aes = ccp5_perform_aes,
+	.xts_aes = ccp5_perform_xts_aes,
+	.sha = ccp5_perform_sha,
+	.rsa = ccp5_perform_rsa,
+	.passthru = ccp5_perform_passthru,
+	.ecc = ccp5_perform_ecc,
+	.sballoc = ccp_lsb_alloc,
+	.sbfree = ccp_lsb_free,
+	.init = ccp5_init,
+	.destroy = ccp5_destroy,
+	.get_free_slots = ccp5_get_free_slots,
+	.irqhandler = ccp5_irq_handler,
+};
+
+const struct ccp_vdata ccpv5a = {
+	.version = CCP_VERSION(5, 0),
+	.setup = ccp5_config,
+	.perform = &ccp5_actions,
+	.bar = 2,
+	.offset = 0x0,
+};
+
+const struct ccp_vdata ccpv5b = {
+	.version = CCP_VERSION(5, 0),
+	.setup = ccp5other_config,
+	.perform = &ccp5_actions,
+	.bar = 2,
+	.offset = 0x0,
+};

+ 109 - 4
drivers/crypto/ccp/ccp-dev.c

@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -39,6 +40,59 @@ struct ccp_tasklet_data {
 	struct ccp_cmd *cmd;
 	struct ccp_cmd *cmd;
 };
 };
 
 
+/* Human-readable error strings */
+char *ccp_error_codes[] = {
+	"",
+	"ERR 01: ILLEGAL_ENGINE",
+	"ERR 02: ILLEGAL_KEY_ID",
+	"ERR 03: ILLEGAL_FUNCTION_TYPE",
+	"ERR 04: ILLEGAL_FUNCTION_MODE",
+	"ERR 05: ILLEGAL_FUNCTION_ENCRYPT",
+	"ERR 06: ILLEGAL_FUNCTION_SIZE",
+	"ERR 07: Zlib_MISSING_INIT_EOM",
+	"ERR 08: ILLEGAL_FUNCTION_RSVD",
+	"ERR 09: ILLEGAL_BUFFER_LENGTH",
+	"ERR 10: VLSB_FAULT",
+	"ERR 11: ILLEGAL_MEM_ADDR",
+	"ERR 12: ILLEGAL_MEM_SEL",
+	"ERR 13: ILLEGAL_CONTEXT_ID",
+	"ERR 14: ILLEGAL_KEY_ADDR",
+	"ERR 15: 0xF Reserved",
+	"ERR 16: Zlib_ILLEGAL_MULTI_QUEUE",
+	"ERR 17: Zlib_ILLEGAL_JOBID_CHANGE",
+	"ERR 18: CMD_TIMEOUT",
+	"ERR 19: IDMA0_AXI_SLVERR",
+	"ERR 20: IDMA0_AXI_DECERR",
+	"ERR 21: 0x15 Reserved",
+	"ERR 22: IDMA1_AXI_SLAVE_FAULT",
+	"ERR 23: IDMA1_AIXI_DECERR",
+	"ERR 24: 0x18 Reserved",
+	"ERR 25: ZLIBVHB_AXI_SLVERR",
+	"ERR 26: ZLIBVHB_AXI_DECERR",
+	"ERR 27: 0x1B Reserved",
+	"ERR 27: ZLIB_UNEXPECTED_EOM",
+	"ERR 27: ZLIB_EXTRA_DATA",
+	"ERR 30: ZLIB_BTYPE",
+	"ERR 31: ZLIB_UNDEFINED_SYMBOL",
+	"ERR 32: ZLIB_UNDEFINED_DISTANCE_S",
+	"ERR 33: ZLIB_CODE_LENGTH_SYMBOL",
+	"ERR 34: ZLIB _VHB_ILLEGAL_FETCH",
+	"ERR 35: ZLIB_UNCOMPRESSED_LEN",
+	"ERR 36: ZLIB_LIMIT_REACHED",
+	"ERR 37: ZLIB_CHECKSUM_MISMATCH0",
+	"ERR 38: ODMA0_AXI_SLVERR",
+	"ERR 39: ODMA0_AXI_DECERR",
+	"ERR 40: 0x28 Reserved",
+	"ERR 41: ODMA1_AXI_SLVERR",
+	"ERR 42: ODMA1_AXI_DECERR",
+	"ERR 43: LSB_PARITY_ERR",
+};
+
+void ccp_log_error(struct ccp_device *d, int e)
+{
+	dev_err(d->dev, "CCP error: %s (0x%x)\n", ccp_error_codes[e], e);
+}
+
 /* List of CCPs, CCP count, read-write access lock, and access functions
 /* List of CCPs, CCP count, read-write access lock, and access functions
  *
  *
  * Lock structure: get ccp_unit_lock for reading whenever we need to
  * Lock structure: get ccp_unit_lock for reading whenever we need to
@@ -58,7 +112,7 @@ static struct ccp_device *ccp_rr;
 
 
 /* Ever-increasing value to produce unique unit numbers */
 /* Ever-increasing value to produce unique unit numbers */
 static atomic_t ccp_unit_ordinal;
 static atomic_t ccp_unit_ordinal;
-unsigned int ccp_increment_unit_ordinal(void)
+static unsigned int ccp_increment_unit_ordinal(void)
 {
 {
 	return atomic_inc_return(&ccp_unit_ordinal);
 	return atomic_inc_return(&ccp_unit_ordinal);
 }
 }
@@ -118,6 +172,29 @@ void ccp_del_device(struct ccp_device *ccp)
 	write_unlock_irqrestore(&ccp_unit_lock, flags);
 	write_unlock_irqrestore(&ccp_unit_lock, flags);
 }
 }
 
 
+
+
+int ccp_register_rng(struct ccp_device *ccp)
+{
+	int ret = 0;
+
+	dev_dbg(ccp->dev, "Registering RNG...\n");
+	/* Register an RNG */
+	ccp->hwrng.name = ccp->rngname;
+	ccp->hwrng.read = ccp_trng_read;
+	ret = hwrng_register(&ccp->hwrng);
+	if (ret)
+		dev_err(ccp->dev, "error registering hwrng (%d)\n", ret);
+
+	return ret;
+}
+
+void ccp_unregister_rng(struct ccp_device *ccp)
+{
+	if (ccp->hwrng.name)
+		hwrng_unregister(&ccp->hwrng);
+}
+
 static struct ccp_device *ccp_get_device(void)
 static struct ccp_device *ccp_get_device(void)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
@@ -397,9 +474,9 @@ struct ccp_device *ccp_alloc_struct(struct device *dev)
 
 
 	spin_lock_init(&ccp->cmd_lock);
 	spin_lock_init(&ccp->cmd_lock);
 	mutex_init(&ccp->req_mutex);
 	mutex_init(&ccp->req_mutex);
-	mutex_init(&ccp->ksb_mutex);
-	ccp->ksb_count = KSB_COUNT;
-	ccp->ksb_start = 0;
+	mutex_init(&ccp->sb_mutex);
+	ccp->sb_count = KSB_COUNT;
+	ccp->sb_start = 0;
 
 
 	ccp->ord = ccp_increment_unit_ordinal();
 	ccp->ord = ccp_increment_unit_ordinal();
 	snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", ccp->ord);
 	snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", ccp->ord);
@@ -408,6 +485,34 @@ struct ccp_device *ccp_alloc_struct(struct device *dev)
 	return ccp;
 	return ccp;
 }
 }
 
 
+int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+	struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng);
+	u32 trng_value;
+	int len = min_t(int, sizeof(trng_value), max);
+
+	/* Locking is provided by the caller so we can update device
+	 * hwrng-related fields safely
+	 */
+	trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG);
+	if (!trng_value) {
+		/* Zero is returned if not data is available or if a
+		 * bad-entropy error is present. Assume an error if
+		 * we exceed TRNG_RETRIES reads of zero.
+		 */
+		if (ccp->hwrng_retries++ > TRNG_RETRIES)
+			return -EIO;
+
+		return 0;
+	}
+
+	/* Reset the counter and save the rng value */
+	ccp->hwrng_retries = 0;
+	memcpy(data, &trng_value, len);
+
+	return len;
+}
+
 #ifdef CONFIG_PM
 #ifdef CONFIG_PM
 bool ccp_queues_suspended(struct ccp_device *ccp)
 bool ccp_queues_suspended(struct ccp_device *ccp)
 {
 {

+ 246 - 66
drivers/crypto/ccp/ccp-dev.h

@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -60,7 +61,69 @@
 #define CMD_Q_ERROR(__qs)		((__qs) & 0x0000003f)
 #define CMD_Q_ERROR(__qs)		((__qs) & 0x0000003f)
 #define CMD_Q_DEPTH(__qs)		(((__qs) >> 12) & 0x0000000f)
 #define CMD_Q_DEPTH(__qs)		(((__qs) >> 12) & 0x0000000f)
 
 
-/****** REQ0 Related Values ******/
+/* ------------------------ CCP Version 5 Specifics ------------------------ */
+#define CMD5_QUEUE_MASK_OFFSET		0x00
+#define	CMD5_QUEUE_PRIO_OFFSET		0x04
+#define CMD5_REQID_CONFIG_OFFSET	0x08
+#define	CMD5_CMD_TIMEOUT_OFFSET		0x10
+#define LSB_PUBLIC_MASK_LO_OFFSET	0x18
+#define LSB_PUBLIC_MASK_HI_OFFSET	0x1C
+#define LSB_PRIVATE_MASK_LO_OFFSET	0x20
+#define LSB_PRIVATE_MASK_HI_OFFSET	0x24
+
+#define CMD5_Q_CONTROL_BASE		0x0000
+#define CMD5_Q_TAIL_LO_BASE		0x0004
+#define CMD5_Q_HEAD_LO_BASE		0x0008
+#define CMD5_Q_INT_ENABLE_BASE		0x000C
+#define CMD5_Q_INTERRUPT_STATUS_BASE	0x0010
+
+#define CMD5_Q_STATUS_BASE		0x0100
+#define CMD5_Q_INT_STATUS_BASE		0x0104
+#define CMD5_Q_DMA_STATUS_BASE		0x0108
+#define CMD5_Q_DMA_READ_STATUS_BASE	0x010C
+#define CMD5_Q_DMA_WRITE_STATUS_BASE	0x0110
+#define CMD5_Q_ABORT_BASE		0x0114
+#define CMD5_Q_AX_CACHE_BASE		0x0118
+
+#define	CMD5_CONFIG_0_OFFSET		0x6000
+#define	CMD5_TRNG_CTL_OFFSET		0x6008
+#define	CMD5_AES_MASK_OFFSET		0x6010
+#define	CMD5_CLK_GATE_CTL_OFFSET	0x603C
+
+/* Address offset between two virtual queue registers */
+#define CMD5_Q_STATUS_INCR		0x1000
+
+/* Bit masks */
+#define CMD5_Q_RUN			0x1
+#define CMD5_Q_HALT			0x2
+#define CMD5_Q_MEM_LOCATION		0x4
+#define CMD5_Q_SIZE			0x1F
+#define CMD5_Q_SHIFT			3
+#define COMMANDS_PER_QUEUE		16
+#define QUEUE_SIZE_VAL			((ffs(COMMANDS_PER_QUEUE) - 2) & \
+					  CMD5_Q_SIZE)
+#define Q_PTR_MASK			(2 << (QUEUE_SIZE_VAL + 5) - 1)
+#define Q_DESC_SIZE			sizeof(struct ccp5_desc)
+#define Q_SIZE(n)			(COMMANDS_PER_QUEUE*(n))
+
+#define INT_COMPLETION			0x1
+#define INT_ERROR			0x2
+#define INT_QUEUE_STOPPED		0x4
+#define ALL_INTERRUPTS			(INT_COMPLETION| \
+					 INT_ERROR| \
+					 INT_QUEUE_STOPPED)
+
+#define LSB_REGION_WIDTH		5
+#define MAX_LSB_CNT			8
+
+#define LSB_SIZE			16
+#define LSB_ITEM_SIZE			32
+#define PLSB_MAP_SIZE			(LSB_SIZE)
+#define SLSB_MAP_SIZE			(MAX_LSB_CNT * LSB_SIZE)
+
+#define LSB_ENTRY_NUMBER(LSB_ADDR)	(LSB_ADDR / LSB_ITEM_SIZE)
+
+/* ------------------------ CCP Version 3 Specifics ------------------------ */
 #define REQ0_WAIT_FOR_WRITE		0x00000004
 #define REQ0_WAIT_FOR_WRITE		0x00000004
 #define REQ0_INT_ON_COMPLETE		0x00000002
 #define REQ0_INT_ON_COMPLETE		0x00000002
 #define REQ0_STOP_ON_COMPLETE		0x00000001
 #define REQ0_STOP_ON_COMPLETE		0x00000001
@@ -110,29 +173,30 @@
 #define KSB_START			77
 #define KSB_START			77
 #define KSB_END				127
 #define KSB_END				127
 #define KSB_COUNT			(KSB_END - KSB_START + 1)
 #define KSB_COUNT			(KSB_END - KSB_START + 1)
-#define CCP_KSB_BITS			256
-#define CCP_KSB_BYTES			32
+#define CCP_SB_BITS			256
 
 
 #define CCP_JOBID_MASK			0x0000003f
 #define CCP_JOBID_MASK			0x0000003f
 
 
+/* ------------------------ General CCP Defines ------------------------ */
+
 #define CCP_DMAPOOL_MAX_SIZE		64
 #define CCP_DMAPOOL_MAX_SIZE		64
 #define CCP_DMAPOOL_ALIGN		BIT(5)
 #define CCP_DMAPOOL_ALIGN		BIT(5)
 
 
 #define CCP_REVERSE_BUF_SIZE		64
 #define CCP_REVERSE_BUF_SIZE		64
 
 
-#define CCP_AES_KEY_KSB_COUNT		1
-#define CCP_AES_CTX_KSB_COUNT		1
+#define CCP_AES_KEY_SB_COUNT		1
+#define CCP_AES_CTX_SB_COUNT		1
 
 
-#define CCP_XTS_AES_KEY_KSB_COUNT	1
-#define CCP_XTS_AES_CTX_KSB_COUNT	1
+#define CCP_XTS_AES_KEY_SB_COUNT	1
+#define CCP_XTS_AES_CTX_SB_COUNT	1
 
 
-#define CCP_SHA_KSB_COUNT		1
+#define CCP_SHA_SB_COUNT		1
 
 
 #define CCP_RSA_MAX_WIDTH		4096
 #define CCP_RSA_MAX_WIDTH		4096
 
 
 #define CCP_PASSTHRU_BLOCKSIZE		256
 #define CCP_PASSTHRU_BLOCKSIZE		256
 #define CCP_PASSTHRU_MASKSIZE		32
 #define CCP_PASSTHRU_MASKSIZE		32
-#define CCP_PASSTHRU_KSB_COUNT		1
+#define CCP_PASSTHRU_SB_COUNT		1
 
 
 #define CCP_ECC_MODULUS_BYTES		48      /* 384-bits */
 #define CCP_ECC_MODULUS_BYTES		48      /* 384-bits */
 #define CCP_ECC_MAX_OPERANDS		6
 #define CCP_ECC_MAX_OPERANDS		6
@@ -144,31 +208,12 @@
 #define CCP_ECC_RESULT_OFFSET		60
 #define CCP_ECC_RESULT_OFFSET		60
 #define CCP_ECC_RESULT_SUCCESS		0x0001
 #define CCP_ECC_RESULT_SUCCESS		0x0001
 
 
-struct ccp_op;
-
-/* Structure for computation functions that are device-specific */
-struct ccp_actions {
-	int (*perform_aes)(struct ccp_op *);
-	int (*perform_xts_aes)(struct ccp_op *);
-	int (*perform_sha)(struct ccp_op *);
-	int (*perform_rsa)(struct ccp_op *);
-	int (*perform_passthru)(struct ccp_op *);
-	int (*perform_ecc)(struct ccp_op *);
-	int (*init)(struct ccp_device *);
-	void (*destroy)(struct ccp_device *);
-	irqreturn_t (*irqhandler)(int, void *);
-};
-
-/* Structure to hold CCP version-specific values */
-struct ccp_vdata {
-	unsigned int version;
-	const struct ccp_actions *perform;
-};
-
-extern struct ccp_vdata ccpv3;
+#define CCP_SB_BYTES			32
 
 
+struct ccp_op;
 struct ccp_device;
 struct ccp_device;
 struct ccp_cmd;
 struct ccp_cmd;
+struct ccp_fns;
 
 
 struct ccp_dma_cmd {
 struct ccp_dma_cmd {
 	struct list_head entry;
 	struct list_head entry;
@@ -212,9 +257,29 @@ struct ccp_cmd_queue {
 	/* Queue dma pool */
 	/* Queue dma pool */
 	struct dma_pool *dma_pool;
 	struct dma_pool *dma_pool;
 
 
-	/* Queue reserved KSB regions */
-	u32 ksb_key;
-	u32 ksb_ctx;
+	/* Queue base address (not neccessarily aligned)*/
+	struct ccp5_desc *qbase;
+
+	/* Aligned queue start address (per requirement) */
+	struct mutex q_mutex ____cacheline_aligned;
+	unsigned int qidx;
+
+	/* Version 5 has different requirements for queue memory */
+	unsigned int qsize;
+	dma_addr_t qbase_dma;
+	dma_addr_t qdma_tail;
+
+	/* Per-queue reserved storage block(s) */
+	u32 sb_key;
+	u32 sb_ctx;
+
+	/* Bitmap of LSBs that can be accessed by this queue */
+	DECLARE_BITMAP(lsbmask, MAX_LSB_CNT);
+	/* Private LSB that is assigned to this queue, or -1 if none.
+	 * Bitmap for my private LSB, unused otherwise
+	 */
+	unsigned int lsb;
+	DECLARE_BITMAP(lsbmap, PLSB_MAP_SIZE);
 
 
 	/* Queue processing thread */
 	/* Queue processing thread */
 	struct task_struct *kthread;
 	struct task_struct *kthread;
@@ -229,8 +294,17 @@ struct ccp_cmd_queue {
 	u32 int_err;
 	u32 int_err;
 
 
 	/* Register addresses for queue */
 	/* Register addresses for queue */
+	void __iomem *reg_control;
+	void __iomem *reg_tail_lo;
+	void __iomem *reg_head_lo;
+	void __iomem *reg_int_enable;
+	void __iomem *reg_interrupt_status;
 	void __iomem *reg_status;
 	void __iomem *reg_status;
 	void __iomem *reg_int_status;
 	void __iomem *reg_int_status;
+	void __iomem *reg_dma_status;
+	void __iomem *reg_dma_read_status;
+	void __iomem *reg_dma_write_status;
+	u32 qcontrol; /* Cached control register */
 
 
 	/* Status values from job */
 	/* Status values from job */
 	u32 int_status;
 	u32 int_status;
@@ -253,16 +327,14 @@ struct ccp_device {
 
 
 	struct device *dev;
 	struct device *dev;
 
 
-	/*
-	 * Bus specific device information
+	/* Bus specific device information
 	 */
 	 */
 	void *dev_specific;
 	void *dev_specific;
 	int (*get_irq)(struct ccp_device *ccp);
 	int (*get_irq)(struct ccp_device *ccp);
 	void (*free_irq)(struct ccp_device *ccp);
 	void (*free_irq)(struct ccp_device *ccp);
 	unsigned int irq;
 	unsigned int irq;
 
 
-	/*
-	 * I/O area used for device communication. The register mapping
+	/* I/O area used for device communication. The register mapping
 	 * starts at an offset into the mapped bar.
 	 * starts at an offset into the mapped bar.
 	 *   The CMD_REQx registers and the Delete_Cmd_Queue_Job register
 	 *   The CMD_REQx registers and the Delete_Cmd_Queue_Job register
 	 *   need to be protected while a command queue thread is accessing
 	 *   need to be protected while a command queue thread is accessing
@@ -272,8 +344,7 @@ struct ccp_device {
 	void __iomem *io_map;
 	void __iomem *io_map;
 	void __iomem *io_regs;
 	void __iomem *io_regs;
 
 
-	/*
-	 * Master lists that all cmds are queued on. Because there can be
+	/* Master lists that all cmds are queued on. Because there can be
 	 * more than one CCP command queue that can process a cmd a separate
 	 * more than one CCP command queue that can process a cmd a separate
 	 * backlog list is neeeded so that the backlog completion call
 	 * backlog list is neeeded so that the backlog completion call
 	 * completes before the cmd is available for execution.
 	 * completes before the cmd is available for execution.
@@ -283,47 +354,54 @@ struct ccp_device {
 	struct list_head cmd;
 	struct list_head cmd;
 	struct list_head backlog;
 	struct list_head backlog;
 
 
-	/*
-	 * The command queues. These represent the queues available on the
+	/* The command queues. These represent the queues available on the
 	 * CCP that are available for processing cmds
 	 * CCP that are available for processing cmds
 	 */
 	 */
 	struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES];
 	struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES];
 	unsigned int cmd_q_count;
 	unsigned int cmd_q_count;
 
 
-	/*
-	 * Support for the CCP True RNG
+	/* Support for the CCP True RNG
 	 */
 	 */
 	struct hwrng hwrng;
 	struct hwrng hwrng;
 	unsigned int hwrng_retries;
 	unsigned int hwrng_retries;
 
 
-	/*
-	 * Support for the CCP DMA capabilities
+	/* Support for the CCP DMA capabilities
 	 */
 	 */
 	struct dma_device dma_dev;
 	struct dma_device dma_dev;
 	struct ccp_dma_chan *ccp_dma_chan;
 	struct ccp_dma_chan *ccp_dma_chan;
 	struct kmem_cache *dma_cmd_cache;
 	struct kmem_cache *dma_cmd_cache;
 	struct kmem_cache *dma_desc_cache;
 	struct kmem_cache *dma_desc_cache;
 
 
-	/*
-	 * A counter used to generate job-ids for cmds submitted to the CCP
+	/* A counter used to generate job-ids for cmds submitted to the CCP
 	 */
 	 */
 	atomic_t current_id ____cacheline_aligned;
 	atomic_t current_id ____cacheline_aligned;
 
 
-	/*
-	 * The CCP uses key storage blocks (KSB) to maintain context for certain
-	 * operations. To prevent multiple cmds from using the same KSB range
-	 * a command queue reserves a KSB range for the duration of the cmd.
-	 * Each queue, will however, reserve 2 KSB blocks for operations that
-	 * only require single KSB entries (eg. AES context/iv and key) in order
-	 * to avoid allocation contention.  This will reserve at most 10 KSB
-	 * entries, leaving 40 KSB entries available for dynamic allocation.
+	/* The v3 CCP uses key storage blocks (SB) to maintain context for
+	 * certain operations. To prevent multiple cmds from using the same
+	 * SB range a command queue reserves an SB range for the duration of
+	 * the cmd. Each queue, will however, reserve 2 SB blocks for
+	 * operations that only require single SB entries (eg. AES context/iv
+	 * and key) in order to avoid allocation contention.  This will reserve
+	 * at most 10 SB entries, leaving 40 SB entries available for dynamic
+	 * allocation.
+	 *
+	 * The v5 CCP Local Storage Block (LSB) is broken up into 8
+	 * memrory ranges, each of which can be enabled for access by one
+	 * or more queues. Device initialization takes this into account,
+	 * and attempts to assign one region for exclusive use by each
+	 * available queue; the rest are then aggregated as "public" use.
+	 * If there are fewer regions than queues, all regions are shared
+	 * amongst all queues.
 	 */
 	 */
-	struct mutex ksb_mutex ____cacheline_aligned;
-	DECLARE_BITMAP(ksb, KSB_COUNT);
-	wait_queue_head_t ksb_queue;
-	unsigned int ksb_avail;
-	unsigned int ksb_count;
-	u32 ksb_start;
+	struct mutex sb_mutex ____cacheline_aligned;
+	DECLARE_BITMAP(sb, KSB_COUNT);
+	wait_queue_head_t sb_queue;
+	unsigned int sb_avail;
+	unsigned int sb_count;
+	u32 sb_start;
+
+	/* Bitmap of shared LSBs, if any */
+	DECLARE_BITMAP(lsbmap, SLSB_MAP_SIZE);
 
 
 	/* Suspend support */
 	/* Suspend support */
 	unsigned int suspending;
 	unsigned int suspending;
@@ -335,10 +413,11 @@ struct ccp_device {
 
 
 enum ccp_memtype {
 enum ccp_memtype {
 	CCP_MEMTYPE_SYSTEM = 0,
 	CCP_MEMTYPE_SYSTEM = 0,
-	CCP_MEMTYPE_KSB,
+	CCP_MEMTYPE_SB,
 	CCP_MEMTYPE_LOCAL,
 	CCP_MEMTYPE_LOCAL,
 	CCP_MEMTYPE__LAST,
 	CCP_MEMTYPE__LAST,
 };
 };
+#define	CCP_MEMTYPE_LSB	CCP_MEMTYPE_KSB
 
 
 struct ccp_dma_info {
 struct ccp_dma_info {
 	dma_addr_t address;
 	dma_addr_t address;
@@ -379,7 +458,7 @@ struct ccp_mem {
 	enum ccp_memtype type;
 	enum ccp_memtype type;
 	union {
 	union {
 		struct ccp_dma_info dma;
 		struct ccp_dma_info dma;
-		u32 ksb;
+		u32 sb;
 	} u;
 	} u;
 };
 };
 
 
@@ -419,13 +498,14 @@ struct ccp_op {
 	u32 jobid;
 	u32 jobid;
 	u32 ioc;
 	u32 ioc;
 	u32 soc;
 	u32 soc;
-	u32 ksb_key;
-	u32 ksb_ctx;
+	u32 sb_key;
+	u32 sb_ctx;
 	u32 init;
 	u32 init;
 	u32 eom;
 	u32 eom;
 
 
 	struct ccp_mem src;
 	struct ccp_mem src;
 	struct ccp_mem dst;
 	struct ccp_mem dst;
+	struct ccp_mem exp;
 
 
 	union {
 	union {
 		struct ccp_aes_op aes;
 		struct ccp_aes_op aes;
@@ -435,6 +515,7 @@ struct ccp_op {
 		struct ccp_passthru_op passthru;
 		struct ccp_passthru_op passthru;
 		struct ccp_ecc_op ecc;
 		struct ccp_ecc_op ecc;
 	} u;
 	} u;
+	struct ccp_mem key;
 };
 };
 
 
 static inline u32 ccp_addr_lo(struct ccp_dma_info *info)
 static inline u32 ccp_addr_lo(struct ccp_dma_info *info)
@@ -447,6 +528,70 @@ static inline u32 ccp_addr_hi(struct ccp_dma_info *info)
 	return upper_32_bits(info->address + info->offset) & 0x0000ffff;
 	return upper_32_bits(info->address + info->offset) & 0x0000ffff;
 }
 }
 
 
+/**
+ * descriptor for version 5 CPP commands
+ * 8 32-bit words:
+ * word 0: function; engine; control bits
+ * word 1: length of source data
+ * word 2: low 32 bits of source pointer
+ * word 3: upper 16 bits of source pointer; source memory type
+ * word 4: low 32 bits of destination pointer
+ * word 5: upper 16 bits of destination pointer; destination memory type
+ * word 6: low 32 bits of key pointer
+ * word 7: upper 16 bits of key pointer; key memory type
+ */
+struct dword0 {
+	__le32 soc:1;
+	__le32 ioc:1;
+	__le32 rsvd1:1;
+	__le32 init:1;
+	__le32 eom:1;		/* AES/SHA only */
+	__le32 function:15;
+	__le32 engine:4;
+	__le32 prot:1;
+	__le32 rsvd2:7;
+};
+
+struct dword3 {
+	__le32 src_hi:16;
+	__le32 src_mem:2;
+	__le32 lsb_cxt_id:8;
+	__le32 rsvd1:5;
+	__le32 fixed:1;
+};
+
+union dword4 {
+	__le32 dst_lo;		/* NON-SHA	*/
+	__le32 sha_len_lo;	/* SHA		*/
+};
+
+union dword5 {
+	struct {
+		__le32 dst_hi:16;
+		__le32 dst_mem:2;
+		__le32 rsvd1:13;
+		__le32 fixed:1;
+	} fields;
+	__le32 sha_len_hi;
+};
+
+struct dword7 {
+	__le32 key_hi:16;
+	__le32 key_mem:2;
+	__le32 rsvd1:14;
+};
+
+struct ccp5_desc {
+	struct dword0 dw0;
+	__le32 length;
+	__le32 src_lo;
+	struct dword3 dw3;
+	union dword4 dw4;
+	union dword5 dw5;
+	__le32 key_lo;
+	struct dword7 dw7;
+};
+
 int ccp_pci_init(void);
 int ccp_pci_init(void);
 void ccp_pci_exit(void);
 void ccp_pci_exit(void);
 
 
@@ -456,13 +601,48 @@ void ccp_platform_exit(void);
 void ccp_add_device(struct ccp_device *ccp);
 void ccp_add_device(struct ccp_device *ccp);
 void ccp_del_device(struct ccp_device *ccp);
 void ccp_del_device(struct ccp_device *ccp);
 
 
+extern void ccp_log_error(struct ccp_device *, int);
+
 struct ccp_device *ccp_alloc_struct(struct device *dev);
 struct ccp_device *ccp_alloc_struct(struct device *dev);
 bool ccp_queues_suspended(struct ccp_device *ccp);
 bool ccp_queues_suspended(struct ccp_device *ccp);
 int ccp_cmd_queue_thread(void *data);
 int ccp_cmd_queue_thread(void *data);
+int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait);
 
 
 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd);
 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd);
 
 
+int ccp_register_rng(struct ccp_device *ccp);
+void ccp_unregister_rng(struct ccp_device *ccp);
 int ccp_dmaengine_register(struct ccp_device *ccp);
 int ccp_dmaengine_register(struct ccp_device *ccp);
 void ccp_dmaengine_unregister(struct ccp_device *ccp);
 void ccp_dmaengine_unregister(struct ccp_device *ccp);
 
 
+/* Structure for computation functions that are device-specific */
+struct ccp_actions {
+	int (*aes)(struct ccp_op *);
+	int (*xts_aes)(struct ccp_op *);
+	int (*sha)(struct ccp_op *);
+	int (*rsa)(struct ccp_op *);
+	int (*passthru)(struct ccp_op *);
+	int (*ecc)(struct ccp_op *);
+	u32 (*sballoc)(struct ccp_cmd_queue *, unsigned int);
+	void (*sbfree)(struct ccp_cmd_queue *, unsigned int,
+			       unsigned int);
+	unsigned int (*get_free_slots)(struct ccp_cmd_queue *);
+	int (*init)(struct ccp_device *);
+	void (*destroy)(struct ccp_device *);
+	irqreturn_t (*irqhandler)(int, void *);
+};
+
+/* Structure to hold CCP version-specific values */
+struct ccp_vdata {
+	const unsigned int version;
+	void (*setup)(struct ccp_device *);
+	const struct ccp_actions *perform;
+	const unsigned int bar;
+	const unsigned int offset;
+};
+
+extern const struct ccp_vdata ccpv3;
+extern const struct ccp_vdata ccpv5a;
+extern const struct ccp_vdata ccpv5b;
+
 #endif
 #endif

+ 6 - 5
drivers/crypto/ccp/ccp-dmaengine.c

@@ -299,12 +299,10 @@ static struct ccp_dma_desc *ccp_alloc_dma_desc(struct ccp_dma_chan *chan,
 {
 {
 	struct ccp_dma_desc *desc;
 	struct ccp_dma_desc *desc;
 
 
-	desc = kmem_cache_alloc(chan->ccp->dma_desc_cache, GFP_NOWAIT);
+	desc = kmem_cache_zalloc(chan->ccp->dma_desc_cache, GFP_NOWAIT);
 	if (!desc)
 	if (!desc)
 		return NULL;
 		return NULL;
 
 
-	memset(desc, 0, sizeof(*desc));
-
 	dma_async_tx_descriptor_init(&desc->tx_desc, &chan->dma_chan);
 	dma_async_tx_descriptor_init(&desc->tx_desc, &chan->dma_chan);
 	desc->tx_desc.flags = flags;
 	desc->tx_desc.flags = flags;
 	desc->tx_desc.tx_submit = ccp_tx_submit;
 	desc->tx_desc.tx_submit = ccp_tx_submit;
@@ -650,8 +648,11 @@ int ccp_dmaengine_register(struct ccp_device *ccp)
 	dma_desc_cache_name = devm_kasprintf(ccp->dev, GFP_KERNEL,
 	dma_desc_cache_name = devm_kasprintf(ccp->dev, GFP_KERNEL,
 					     "%s-dmaengine-desc-cache",
 					     "%s-dmaengine-desc-cache",
 					     ccp->name);
 					     ccp->name);
-	if (!dma_cmd_cache_name)
-		return -ENOMEM;
+	if (!dma_desc_cache_name) {
+		ret = -ENOMEM;
+		goto err_cache;
+	}
+
 	ccp->dma_desc_cache = kmem_cache_create(dma_desc_cache_name,
 	ccp->dma_desc_cache = kmem_cache_create(dma_desc_cache_name,
 						sizeof(struct ccp_dma_desc),
 						sizeof(struct ccp_dma_desc),
 						sizeof(void *),
 						sizeof(void *),

+ 305 - 271
drivers/crypto/ccp/ccp-ops.c

@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -20,72 +21,28 @@
 #include "ccp-dev.h"
 #include "ccp-dev.h"
 
 
 /* SHA initial context values */
 /* SHA initial context values */
-static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = {
 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
-	cpu_to_be32(SHA1_H4), 0, 0, 0,
+	cpu_to_be32(SHA1_H4),
 };
 };
 
 
-static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
 };
 };
 
 
-static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
+static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
 };
 };
 
 
-static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
-{
-	int start;
-
-	for (;;) {
-		mutex_lock(&ccp->ksb_mutex);
-
-		start = (u32)bitmap_find_next_zero_area(ccp->ksb,
-							ccp->ksb_count,
-							ccp->ksb_start,
-							count, 0);
-		if (start <= ccp->ksb_count) {
-			bitmap_set(ccp->ksb, start, count);
-
-			mutex_unlock(&ccp->ksb_mutex);
-			break;
-		}
-
-		ccp->ksb_avail = 0;
-
-		mutex_unlock(&ccp->ksb_mutex);
-
-		/* Wait for KSB entries to become available */
-		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
-			return 0;
-	}
-
-	return KSB_START + start;
-}
-
-static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
-			 unsigned int count)
-{
-	if (!start)
-		return;
-
-	mutex_lock(&ccp->ksb_mutex);
-
-	bitmap_clear(ccp->ksb, start - KSB_START, count);
-
-	ccp->ksb_avail = 1;
-
-	mutex_unlock(&ccp->ksb_mutex);
-
-	wake_up_interruptible_all(&ccp->ksb_queue);
-}
+#define	CCP_NEW_JOBID(ccp)	((ccp->vdata->version == CCP_VERSION(3, 0)) ? \
+					ccp_gen_jobid(ccp) : 0)
 
 
 static u32 ccp_gen_jobid(struct ccp_device *ccp)
 static u32 ccp_gen_jobid(struct ccp_device *ccp)
 {
 {
@@ -231,7 +188,7 @@ static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
 				   unsigned int len, unsigned int se_len,
 				   unsigned int len, unsigned int se_len,
 				   bool sign_extend)
 				   bool sign_extend)
 {
 {
-	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
+	unsigned int nbytes, sg_offset, dm_offset, sb_len, i;
 	u8 buffer[CCP_REVERSE_BUF_SIZE];
 	u8 buffer[CCP_REVERSE_BUF_SIZE];
 
 
 	if (WARN_ON(se_len > sizeof(buffer)))
 	if (WARN_ON(se_len > sizeof(buffer)))
@@ -241,21 +198,21 @@ static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
 	dm_offset = 0;
 	dm_offset = 0;
 	nbytes = len;
 	nbytes = len;
 	while (nbytes) {
 	while (nbytes) {
-		ksb_len = min_t(unsigned int, nbytes, se_len);
-		sg_offset -= ksb_len;
+		sb_len = min_t(unsigned int, nbytes, se_len);
+		sg_offset -= sb_len;
 
 
-		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
-		for (i = 0; i < ksb_len; i++)
-			wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
+		scatterwalk_map_and_copy(buffer, sg, sg_offset, sb_len, 0);
+		for (i = 0; i < sb_len; i++)
+			wa->address[dm_offset + i] = buffer[sb_len - i - 1];
 
 
-		dm_offset += ksb_len;
-		nbytes -= ksb_len;
+		dm_offset += sb_len;
+		nbytes -= sb_len;
 
 
-		if ((ksb_len != se_len) && sign_extend) {
+		if ((sb_len != se_len) && sign_extend) {
 			/* Must sign-extend to nearest sign-extend length */
 			/* Must sign-extend to nearest sign-extend length */
 			if (wa->address[dm_offset - 1] & 0x80)
 			if (wa->address[dm_offset - 1] & 0x80)
 				memset(wa->address + dm_offset, 0xff,
 				memset(wa->address + dm_offset, 0xff,
-				       se_len - ksb_len);
+				       se_len - sb_len);
 		}
 		}
 	}
 	}
 
 
@@ -266,22 +223,22 @@ static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
 				    struct scatterlist *sg,
 				    struct scatterlist *sg,
 				    unsigned int len)
 				    unsigned int len)
 {
 {
-	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
+	unsigned int nbytes, sg_offset, dm_offset, sb_len, i;
 	u8 buffer[CCP_REVERSE_BUF_SIZE];
 	u8 buffer[CCP_REVERSE_BUF_SIZE];
 
 
 	sg_offset = 0;
 	sg_offset = 0;
 	dm_offset = len;
 	dm_offset = len;
 	nbytes = len;
 	nbytes = len;
 	while (nbytes) {
 	while (nbytes) {
-		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
-		dm_offset -= ksb_len;
+		sb_len = min_t(unsigned int, nbytes, sizeof(buffer));
+		dm_offset -= sb_len;
 
 
-		for (i = 0; i < ksb_len; i++)
-			buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
-		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
+		for (i = 0; i < sb_len; i++)
+			buffer[sb_len - i - 1] = wa->address[dm_offset + i];
+		scatterwalk_map_and_copy(buffer, sg, sg_offset, sb_len, 1);
 
 
-		sg_offset += ksb_len;
-		nbytes -= ksb_len;
+		sg_offset += sb_len;
+		nbytes -= sb_len;
 	}
 	}
 }
 }
 
 
@@ -449,9 +406,9 @@ static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
 	}
 	}
 }
 }
 
 
-static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
-				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
-				u32 byte_swap, bool from)
+static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q,
+			       struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
+			       u32 byte_swap, bool from)
 {
 {
 	struct ccp_op op;
 	struct ccp_op op;
 
 
@@ -463,8 +420,8 @@ static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
 
 
 	if (from) {
 	if (from) {
 		op.soc = 1;
 		op.soc = 1;
-		op.src.type = CCP_MEMTYPE_KSB;
-		op.src.u.ksb = ksb;
+		op.src.type = CCP_MEMTYPE_SB;
+		op.src.u.sb = sb;
 		op.dst.type = CCP_MEMTYPE_SYSTEM;
 		op.dst.type = CCP_MEMTYPE_SYSTEM;
 		op.dst.u.dma.address = wa->dma.address;
 		op.dst.u.dma.address = wa->dma.address;
 		op.dst.u.dma.length = wa->length;
 		op.dst.u.dma.length = wa->length;
@@ -472,27 +429,27 @@ static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
 		op.src.type = CCP_MEMTYPE_SYSTEM;
 		op.src.type = CCP_MEMTYPE_SYSTEM;
 		op.src.u.dma.address = wa->dma.address;
 		op.src.u.dma.address = wa->dma.address;
 		op.src.u.dma.length = wa->length;
 		op.src.u.dma.length = wa->length;
-		op.dst.type = CCP_MEMTYPE_KSB;
-		op.dst.u.ksb = ksb;
+		op.dst.type = CCP_MEMTYPE_SB;
+		op.dst.u.sb = sb;
 	}
 	}
 
 
 	op.u.passthru.byte_swap = byte_swap;
 	op.u.passthru.byte_swap = byte_swap;
 
 
-	return cmd_q->ccp->vdata->perform->perform_passthru(&op);
+	return cmd_q->ccp->vdata->perform->passthru(&op);
 }
 }
 
 
-static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
-			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
-			   u32 byte_swap)
+static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q,
+			  struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
+			  u32 byte_swap)
 {
 {
-	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
+	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false);
 }
 }
 
 
-static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
-			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
-			     u32 byte_swap)
+static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q,
+			    struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
+			    u32 byte_swap)
 {
 {
-	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
+	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true);
 }
 }
 
 
 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
@@ -527,54 +484,54 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
 			return -EINVAL;
 			return -EINVAL;
 	}
 	}
 
 
-	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
-	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
+	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
+	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
 
 
 	ret = -EIO;
 	ret = -EIO;
 	memset(&op, 0, sizeof(op));
 	memset(&op, 0, sizeof(op));
 	op.cmd_q = cmd_q;
 	op.cmd_q = cmd_q;
-	op.jobid = ccp_gen_jobid(cmd_q->ccp);
-	op.ksb_key = cmd_q->ksb_key;
-	op.ksb_ctx = cmd_q->ksb_ctx;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+	op.sb_key = cmd_q->sb_key;
+	op.sb_ctx = cmd_q->sb_ctx;
 	op.init = 1;
 	op.init = 1;
 	op.u.aes.type = aes->type;
 	op.u.aes.type = aes->type;
 	op.u.aes.mode = aes->mode;
 	op.u.aes.mode = aes->mode;
 	op.u.aes.action = aes->action;
 	op.u.aes.action = aes->action;
 
 
-	/* All supported key sizes fit in a single (32-byte) KSB entry
+	/* All supported key sizes fit in a single (32-byte) SB entry
 	 * and must be in little endian format. Use the 256-bit byte
 	 * and must be in little endian format. Use the 256-bit byte
 	 * swap passthru option to convert from big endian to little
 	 * swap passthru option to convert from big endian to little
 	 * endian.
 	 * endian.
 	 */
 	 */
 	ret = ccp_init_dm_workarea(&key, cmd_q,
 	ret = ccp_init_dm_workarea(&key, cmd_q,
-				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
+				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
 				   DMA_TO_DEVICE);
 				   DMA_TO_DEVICE);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
-	dm_offset = CCP_KSB_BYTES - aes->key_len;
+	dm_offset = CCP_SB_BYTES - aes->key_len;
 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
-	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
-			      CCP_PASSTHRU_BYTESWAP_256BIT);
+	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+			     CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
 	if (ret) {
 		cmd->engine_error = cmd_q->cmd_error;
 		cmd->engine_error = cmd_q->cmd_error;
 		goto e_key;
 		goto e_key;
 	}
 	}
 
 
-	/* The AES context fits in a single (32-byte) KSB entry and
+	/* The AES context fits in a single (32-byte) SB entry and
 	 * must be in little endian format. Use the 256-bit byte swap
 	 * must be in little endian format. Use the 256-bit byte swap
 	 * passthru option to convert from big endian to little endian.
 	 * passthru option to convert from big endian to little endian.
 	 */
 	 */
 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
-				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
+				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
 				   DMA_BIDIRECTIONAL);
 				   DMA_BIDIRECTIONAL);
 	if (ret)
 	if (ret)
 		goto e_key;
 		goto e_key;
 
 
-	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
 	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
 	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
-	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-			      CCP_PASSTHRU_BYTESWAP_256BIT);
+	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			     CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
 	if (ret) {
 		cmd->engine_error = cmd_q->cmd_error;
 		cmd->engine_error = cmd_q->cmd_error;
 		goto e_ctx;
 		goto e_ctx;
@@ -592,9 +549,9 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
 			op.eom = 1;
 			op.eom = 1;
 
 
 			/* Push the K1/K2 key to the CCP now */
 			/* Push the K1/K2 key to the CCP now */
-			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
-						op.ksb_ctx,
-						CCP_PASSTHRU_BYTESWAP_256BIT);
+			ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid,
+					       op.sb_ctx,
+					       CCP_PASSTHRU_BYTESWAP_256BIT);
 			if (ret) {
 			if (ret) {
 				cmd->engine_error = cmd_q->cmd_error;
 				cmd->engine_error = cmd_q->cmd_error;
 				goto e_src;
 				goto e_src;
@@ -602,15 +559,15 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
 
 
 			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
 			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
 					aes->cmac_key_len);
 					aes->cmac_key_len);
-			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-					      CCP_PASSTHRU_BYTESWAP_256BIT);
+			ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+					     CCP_PASSTHRU_BYTESWAP_256BIT);
 			if (ret) {
 			if (ret) {
 				cmd->engine_error = cmd_q->cmd_error;
 				cmd->engine_error = cmd_q->cmd_error;
 				goto e_src;
 				goto e_src;
 			}
 			}
 		}
 		}
 
 
-		ret = cmd_q->ccp->vdata->perform->perform_aes(&op);
+		ret = cmd_q->ccp->vdata->perform->aes(&op);
 		if (ret) {
 		if (ret) {
 			cmd->engine_error = cmd_q->cmd_error;
 			cmd->engine_error = cmd_q->cmd_error;
 			goto e_src;
 			goto e_src;
@@ -622,15 +579,15 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
 	/* Retrieve the AES context - convert from LE to BE using
 	/* Retrieve the AES context - convert from LE to BE using
 	 * 32-byte (256-bit) byteswapping
 	 * 32-byte (256-bit) byteswapping
 	 */
 	 */
-	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-				CCP_PASSTHRU_BYTESWAP_256BIT);
+	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			       CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
 	if (ret) {
 		cmd->engine_error = cmd_q->cmd_error;
 		cmd->engine_error = cmd_q->cmd_error;
 		goto e_src;
 		goto e_src;
 	}
 	}
 
 
 	/* ...but we only need AES_BLOCK_SIZE bytes */
 	/* ...but we only need AES_BLOCK_SIZE bytes */
-	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
 
 
 e_src:
 e_src:
@@ -680,56 +637,56 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 			return -EINVAL;
 			return -EINVAL;
 	}
 	}
 
 
-	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
-	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
+	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
+	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
 
 
 	ret = -EIO;
 	ret = -EIO;
 	memset(&op, 0, sizeof(op));
 	memset(&op, 0, sizeof(op));
 	op.cmd_q = cmd_q;
 	op.cmd_q = cmd_q;
-	op.jobid = ccp_gen_jobid(cmd_q->ccp);
-	op.ksb_key = cmd_q->ksb_key;
-	op.ksb_ctx = cmd_q->ksb_ctx;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+	op.sb_key = cmd_q->sb_key;
+	op.sb_ctx = cmd_q->sb_ctx;
 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
 	op.u.aes.type = aes->type;
 	op.u.aes.type = aes->type;
 	op.u.aes.mode = aes->mode;
 	op.u.aes.mode = aes->mode;
 	op.u.aes.action = aes->action;
 	op.u.aes.action = aes->action;
 
 
-	/* All supported key sizes fit in a single (32-byte) KSB entry
+	/* All supported key sizes fit in a single (32-byte) SB entry
 	 * and must be in little endian format. Use the 256-bit byte
 	 * and must be in little endian format. Use the 256-bit byte
 	 * swap passthru option to convert from big endian to little
 	 * swap passthru option to convert from big endian to little
 	 * endian.
 	 * endian.
 	 */
 	 */
 	ret = ccp_init_dm_workarea(&key, cmd_q,
 	ret = ccp_init_dm_workarea(&key, cmd_q,
-				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
+				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
 				   DMA_TO_DEVICE);
 				   DMA_TO_DEVICE);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
-	dm_offset = CCP_KSB_BYTES - aes->key_len;
+	dm_offset = CCP_SB_BYTES - aes->key_len;
 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
-	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
-			      CCP_PASSTHRU_BYTESWAP_256BIT);
+	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+			     CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
 	if (ret) {
 		cmd->engine_error = cmd_q->cmd_error;
 		cmd->engine_error = cmd_q->cmd_error;
 		goto e_key;
 		goto e_key;
 	}
 	}
 
 
-	/* The AES context fits in a single (32-byte) KSB entry and
+	/* The AES context fits in a single (32-byte) SB entry and
 	 * must be in little endian format. Use the 256-bit byte swap
 	 * must be in little endian format. Use the 256-bit byte swap
 	 * passthru option to convert from big endian to little endian.
 	 * passthru option to convert from big endian to little endian.
 	 */
 	 */
 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
-				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
+				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
 				   DMA_BIDIRECTIONAL);
 				   DMA_BIDIRECTIONAL);
 	if (ret)
 	if (ret)
 		goto e_key;
 		goto e_key;
 
 
 	if (aes->mode != CCP_AES_MODE_ECB) {
 	if (aes->mode != CCP_AES_MODE_ECB) {
-		/* Load the AES context - conver to LE */
-		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+		/* Load the AES context - convert to LE */
+		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
 		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
 		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
-		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-				      CCP_PASSTHRU_BYTESWAP_256BIT);
+		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+				     CCP_PASSTHRU_BYTESWAP_256BIT);
 		if (ret) {
 		if (ret) {
 			cmd->engine_error = cmd_q->cmd_error;
 			cmd->engine_error = cmd_q->cmd_error;
 			goto e_ctx;
 			goto e_ctx;
@@ -772,7 +729,7 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 				op.soc = 1;
 				op.soc = 1;
 		}
 		}
 
 
-		ret = cmd_q->ccp->vdata->perform->perform_aes(&op);
+		ret = cmd_q->ccp->vdata->perform->aes(&op);
 		if (ret) {
 		if (ret) {
 			cmd->engine_error = cmd_q->cmd_error;
 			cmd->engine_error = cmd_q->cmd_error;
 			goto e_dst;
 			goto e_dst;
@@ -785,15 +742,15 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		/* Retrieve the AES context - convert from LE to BE using
 		/* Retrieve the AES context - convert from LE to BE using
 		 * 32-byte (256-bit) byteswapping
 		 * 32-byte (256-bit) byteswapping
 		 */
 		 */
-		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-					CCP_PASSTHRU_BYTESWAP_256BIT);
+		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+				       CCP_PASSTHRU_BYTESWAP_256BIT);
 		if (ret) {
 		if (ret) {
 			cmd->engine_error = cmd_q->cmd_error;
 			cmd->engine_error = cmd_q->cmd_error;
 			goto e_dst;
 			goto e_dst;
 		}
 		}
 
 
 		/* ...but we only need AES_BLOCK_SIZE bytes */
 		/* ...but we only need AES_BLOCK_SIZE bytes */
-		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
 	}
 	}
 
 
@@ -857,53 +814,53 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
-	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
+	BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1);
+	BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1);
 
 
 	ret = -EIO;
 	ret = -EIO;
 	memset(&op, 0, sizeof(op));
 	memset(&op, 0, sizeof(op));
 	op.cmd_q = cmd_q;
 	op.cmd_q = cmd_q;
-	op.jobid = ccp_gen_jobid(cmd_q->ccp);
-	op.ksb_key = cmd_q->ksb_key;
-	op.ksb_ctx = cmd_q->ksb_ctx;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+	op.sb_key = cmd_q->sb_key;
+	op.sb_ctx = cmd_q->sb_ctx;
 	op.init = 1;
 	op.init = 1;
 	op.u.xts.action = xts->action;
 	op.u.xts.action = xts->action;
 	op.u.xts.unit_size = xts->unit_size;
 	op.u.xts.unit_size = xts->unit_size;
 
 
-	/* All supported key sizes fit in a single (32-byte) KSB entry
+	/* All supported key sizes fit in a single (32-byte) SB entry
 	 * and must be in little endian format. Use the 256-bit byte
 	 * and must be in little endian format. Use the 256-bit byte
 	 * swap passthru option to convert from big endian to little
 	 * swap passthru option to convert from big endian to little
 	 * endian.
 	 * endian.
 	 */
 	 */
 	ret = ccp_init_dm_workarea(&key, cmd_q,
 	ret = ccp_init_dm_workarea(&key, cmd_q,
-				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
+				   CCP_XTS_AES_KEY_SB_COUNT * CCP_SB_BYTES,
 				   DMA_TO_DEVICE);
 				   DMA_TO_DEVICE);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
 
 
-	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
+	dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
 	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
 	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
 	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
 	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
-	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
-			      CCP_PASSTHRU_BYTESWAP_256BIT);
+	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+			     CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
 	if (ret) {
 		cmd->engine_error = cmd_q->cmd_error;
 		cmd->engine_error = cmd_q->cmd_error;
 		goto e_key;
 		goto e_key;
 	}
 	}
 
 
-	/* The AES context fits in a single (32-byte) KSB entry and
+	/* The AES context fits in a single (32-byte) SB entry and
 	 * for XTS is already in little endian format so no byte swapping
 	 * for XTS is already in little endian format so no byte swapping
 	 * is needed.
 	 * is needed.
 	 */
 	 */
 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
-				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
+				   CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES,
 				   DMA_BIDIRECTIONAL);
 				   DMA_BIDIRECTIONAL);
 	if (ret)
 	if (ret)
 		goto e_key;
 		goto e_key;
 
 
 	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
 	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
-	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-			      CCP_PASSTHRU_BYTESWAP_NOOP);
+	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			     CCP_PASSTHRU_BYTESWAP_NOOP);
 	if (ret) {
 	if (ret) {
 		cmd->engine_error = cmd_q->cmd_error;
 		cmd->engine_error = cmd_q->cmd_error;
 		goto e_ctx;
 		goto e_ctx;
@@ -937,7 +894,7 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 		if (!src.sg_wa.bytes_left)
 		if (!src.sg_wa.bytes_left)
 			op.eom = 1;
 			op.eom = 1;
 
 
-		ret = cmd_q->ccp->vdata->perform->perform_xts_aes(&op);
+		ret = cmd_q->ccp->vdata->perform->xts_aes(&op);
 		if (ret) {
 		if (ret) {
 			cmd->engine_error = cmd_q->cmd_error;
 			cmd->engine_error = cmd_q->cmd_error;
 			goto e_dst;
 			goto e_dst;
@@ -949,15 +906,15 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
 	/* Retrieve the AES context - convert from LE to BE using
 	/* Retrieve the AES context - convert from LE to BE using
 	 * 32-byte (256-bit) byteswapping
 	 * 32-byte (256-bit) byteswapping
 	 */
 	 */
-	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-				CCP_PASSTHRU_BYTESWAP_256BIT);
+	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			       CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
 	if (ret) {
 		cmd->engine_error = cmd_q->cmd_error;
 		cmd->engine_error = cmd_q->cmd_error;
 		goto e_dst;
 		goto e_dst;
 	}
 	}
 
 
 	/* ...but we only need AES_BLOCK_SIZE bytes */
 	/* ...but we only need AES_BLOCK_SIZE bytes */
-	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
+	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
 
 
 e_dst:
 e_dst:
@@ -982,163 +939,227 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	struct ccp_dm_workarea ctx;
 	struct ccp_dm_workarea ctx;
 	struct ccp_data src;
 	struct ccp_data src;
 	struct ccp_op op;
 	struct ccp_op op;
+	unsigned int ioffset, ooffset;
+	unsigned int digest_size;
+	int sb_count;
+	const void *init;
+	u64 block_size;
+	int ctx_size;
 	int ret;
 	int ret;
 
 
-	if (sha->ctx_len != CCP_SHA_CTXSIZE)
+	switch (sha->type) {
+	case CCP_SHA_TYPE_1:
+		if (sha->ctx_len < SHA1_DIGEST_SIZE)
+			return -EINVAL;
+		block_size = SHA1_BLOCK_SIZE;
+		break;
+	case CCP_SHA_TYPE_224:
+		if (sha->ctx_len < SHA224_DIGEST_SIZE)
+			return -EINVAL;
+		block_size = SHA224_BLOCK_SIZE;
+		break;
+	case CCP_SHA_TYPE_256:
+		if (sha->ctx_len < SHA256_DIGEST_SIZE)
+			return -EINVAL;
+		block_size = SHA256_BLOCK_SIZE;
+		break;
+	default:
 		return -EINVAL;
 		return -EINVAL;
+	}
 
 
 	if (!sha->ctx)
 	if (!sha->ctx)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
+	if (!sha->final && (sha->src_len & (block_size - 1)))
 		return -EINVAL;
 		return -EINVAL;
 
 
-	if (!sha->src_len) {
-		const u8 *sha_zero;
+	/* The version 3 device can't handle zero-length input */
+	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
 
 
-		/* Not final, just return */
-		if (!sha->final)
-			return 0;
+		if (!sha->src_len) {
+			unsigned int digest_len;
+			const u8 *sha_zero;
 
 
-		/* CCP can't do a zero length sha operation so the caller
-		 * must buffer the data.
-		 */
-		if (sha->msg_bits)
-			return -EINVAL;
+			/* Not final, just return */
+			if (!sha->final)
+				return 0;
 
 
-		/* The CCP cannot perform zero-length sha operations so the
-		 * caller is required to buffer data for the final operation.
-		 * However, a sha operation for a message with a total length
-		 * of zero is valid so known values are required to supply
-		 * the result.
-		 */
-		switch (sha->type) {
-		case CCP_SHA_TYPE_1:
-			sha_zero = sha1_zero_message_hash;
-			break;
-		case CCP_SHA_TYPE_224:
-			sha_zero = sha224_zero_message_hash;
-			break;
-		case CCP_SHA_TYPE_256:
-			sha_zero = sha256_zero_message_hash;
-			break;
-		default:
-			return -EINVAL;
-		}
+			/* CCP can't do a zero length sha operation so the
+			 * caller must buffer the data.
+			 */
+			if (sha->msg_bits)
+				return -EINVAL;
+
+			/* The CCP cannot perform zero-length sha operations
+			 * so the caller is required to buffer data for the
+			 * final operation. However, a sha operation for a
+			 * message with a total length of zero is valid so
+			 * known values are required to supply the result.
+			 */
+			switch (sha->type) {
+			case CCP_SHA_TYPE_1:
+				sha_zero = sha1_zero_message_hash;
+				digest_len = SHA1_DIGEST_SIZE;
+				break;
+			case CCP_SHA_TYPE_224:
+				sha_zero = sha224_zero_message_hash;
+				digest_len = SHA224_DIGEST_SIZE;
+				break;
+			case CCP_SHA_TYPE_256:
+				sha_zero = sha256_zero_message_hash;
+				digest_len = SHA256_DIGEST_SIZE;
+				break;
+			default:
+				return -EINVAL;
+			}
 
 
-		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
-					 sha->ctx_len, 1);
+			scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
+						 digest_len, 1);
 
 
-		return 0;
+			return 0;
+		}
 	}
 	}
 
 
-	if (!sha->src)
-		return -EINVAL;
+	/* Set variables used throughout */
+	switch (sha->type) {
+	case CCP_SHA_TYPE_1:
+		digest_size = SHA1_DIGEST_SIZE;
+		init = (void *) ccp_sha1_init;
+		ctx_size = SHA1_DIGEST_SIZE;
+		sb_count = 1;
+		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
+			ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE;
+		else
+			ooffset = ioffset = 0;
+		break;
+	case CCP_SHA_TYPE_224:
+		digest_size = SHA224_DIGEST_SIZE;
+		init = (void *) ccp_sha224_init;
+		ctx_size = SHA256_DIGEST_SIZE;
+		sb_count = 1;
+		ioffset = 0;
+		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
+			ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE;
+		else
+			ooffset = 0;
+		break;
+	case CCP_SHA_TYPE_256:
+		digest_size = SHA256_DIGEST_SIZE;
+		init = (void *) ccp_sha256_init;
+		ctx_size = SHA256_DIGEST_SIZE;
+		sb_count = 1;
+		ooffset = ioffset = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		goto e_data;
+	}
 
 
-	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
+	/* For zero-length plaintext the src pointer is ignored;
+	 * otherwise both parts must be valid
+	 */
+	if (sha->src_len && !sha->src)
+		return -EINVAL;
 
 
 	memset(&op, 0, sizeof(op));
 	memset(&op, 0, sizeof(op));
 	op.cmd_q = cmd_q;
 	op.cmd_q = cmd_q;
-	op.jobid = ccp_gen_jobid(cmd_q->ccp);
-	op.ksb_ctx = cmd_q->ksb_ctx;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
 	op.u.sha.type = sha->type;
 	op.u.sha.type = sha->type;
 	op.u.sha.msg_bits = sha->msg_bits;
 	op.u.sha.msg_bits = sha->msg_bits;
 
 
-	/* The SHA context fits in a single (32-byte) KSB entry and
-	 * must be in little endian format. Use the 256-bit byte swap
-	 * passthru option to convert from big endian to little endian.
-	 */
-	ret = ccp_init_dm_workarea(&ctx, cmd_q,
-				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
+	ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES,
 				   DMA_BIDIRECTIONAL);
 				   DMA_BIDIRECTIONAL);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
-
 	if (sha->first) {
 	if (sha->first) {
-		const __be32 *init;
-
 		switch (sha->type) {
 		switch (sha->type) {
 		case CCP_SHA_TYPE_1:
 		case CCP_SHA_TYPE_1:
-			init = ccp_sha1_init;
-			break;
 		case CCP_SHA_TYPE_224:
 		case CCP_SHA_TYPE_224:
-			init = ccp_sha224_init;
-			break;
 		case CCP_SHA_TYPE_256:
 		case CCP_SHA_TYPE_256:
-			init = ccp_sha256_init;
+			memcpy(ctx.address + ioffset, init, ctx_size);
 			break;
 			break;
 		default:
 		default:
 			ret = -EINVAL;
 			ret = -EINVAL;
 			goto e_ctx;
 			goto e_ctx;
 		}
 		}
-		memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
 	} else {
 	} else {
-		ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
+		/* Restore the context */
+		ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
+				sb_count * CCP_SB_BYTES);
 	}
 	}
 
 
-	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-			      CCP_PASSTHRU_BYTESWAP_256BIT);
+	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			     CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
 	if (ret) {
 		cmd->engine_error = cmd_q->cmd_error;
 		cmd->engine_error = cmd_q->cmd_error;
 		goto e_ctx;
 		goto e_ctx;
 	}
 	}
 
 
-	/* Send data to the CCP SHA engine */
-	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
-			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
-	if (ret)
-		goto e_ctx;
+	if (sha->src) {
+		/* Send data to the CCP SHA engine; block_size is set above */
+		ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
+				    block_size, DMA_TO_DEVICE);
+		if (ret)
+			goto e_ctx;
 
 
-	while (src.sg_wa.bytes_left) {
-		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
-		if (sha->final && !src.sg_wa.bytes_left)
-			op.eom = 1;
+		while (src.sg_wa.bytes_left) {
+			ccp_prepare_data(&src, NULL, &op, block_size, false);
+			if (sha->final && !src.sg_wa.bytes_left)
+				op.eom = 1;
+
+			ret = cmd_q->ccp->vdata->perform->sha(&op);
+			if (ret) {
+				cmd->engine_error = cmd_q->cmd_error;
+				goto e_data;
+			}
 
 
-		ret = cmd_q->ccp->vdata->perform->perform_sha(&op);
+			ccp_process_data(&src, NULL, &op);
+		}
+	} else {
+		op.eom = 1;
+		ret = cmd_q->ccp->vdata->perform->sha(&op);
 		if (ret) {
 		if (ret) {
 			cmd->engine_error = cmd_q->cmd_error;
 			cmd->engine_error = cmd_q->cmd_error;
 			goto e_data;
 			goto e_data;
 		}
 		}
-
-		ccp_process_data(&src, NULL, &op);
 	}
 	}
 
 
 	/* Retrieve the SHA context - convert from LE to BE using
 	/* Retrieve the SHA context - convert from LE to BE using
 	 * 32-byte (256-bit) byteswapping to BE
 	 * 32-byte (256-bit) byteswapping to BE
 	 */
 	 */
-	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
-				CCP_PASSTHRU_BYTESWAP_256BIT);
+	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			       CCP_PASSTHRU_BYTESWAP_256BIT);
 	if (ret) {
 	if (ret) {
 		cmd->engine_error = cmd_q->cmd_error;
 		cmd->engine_error = cmd_q->cmd_error;
 		goto e_data;
 		goto e_data;
 	}
 	}
 
 
-	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
-
-	if (sha->final && sha->opad) {
-		/* HMAC operation, recursively perform final SHA */
-		struct ccp_cmd hmac_cmd;
-		struct scatterlist sg;
-		u64 block_size, digest_size;
-		u8 *hmac_buf;
-
+	if (sha->final) {
+		/* Finishing up, so get the digest */
 		switch (sha->type) {
 		switch (sha->type) {
 		case CCP_SHA_TYPE_1:
 		case CCP_SHA_TYPE_1:
-			block_size = SHA1_BLOCK_SIZE;
-			digest_size = SHA1_DIGEST_SIZE;
-			break;
 		case CCP_SHA_TYPE_224:
 		case CCP_SHA_TYPE_224:
-			block_size = SHA224_BLOCK_SIZE;
-			digest_size = SHA224_DIGEST_SIZE;
-			break;
 		case CCP_SHA_TYPE_256:
 		case CCP_SHA_TYPE_256:
-			block_size = SHA256_BLOCK_SIZE;
-			digest_size = SHA256_DIGEST_SIZE;
+			ccp_get_dm_area(&ctx, ooffset,
+					sha->ctx, 0,
+					digest_size);
 			break;
 			break;
 		default:
 		default:
 			ret = -EINVAL;
 			ret = -EINVAL;
-			goto e_data;
+			goto e_ctx;
 		}
 		}
+	} else {
+		/* Stash the context */
+		ccp_get_dm_area(&ctx, 0, sha->ctx, 0,
+				sb_count * CCP_SB_BYTES);
+	}
+
+	if (sha->final && sha->opad) {
+		/* HMAC operation, recursively perform final SHA */
+		struct ccp_cmd hmac_cmd;
+		struct scatterlist sg;
+		u8 *hmac_buf;
 
 
 		if (sha->opad_len != block_size) {
 		if (sha->opad_len != block_size) {
 			ret = -EINVAL;
 			ret = -EINVAL;
@@ -1153,7 +1174,18 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
 
 
 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
-		memcpy(hmac_buf + block_size, ctx.address, digest_size);
+		switch (sha->type) {
+		case CCP_SHA_TYPE_1:
+		case CCP_SHA_TYPE_224:
+		case CCP_SHA_TYPE_256:
+			memcpy(hmac_buf + block_size,
+			       ctx.address + ooffset,
+			       digest_size);
+			break;
+		default:
+			ret = -EINVAL;
+			goto e_ctx;
+		}
 
 
 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
 		hmac_cmd.engine = CCP_ENGINE_SHA;
 		hmac_cmd.engine = CCP_ENGINE_SHA;
@@ -1176,7 +1208,8 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	}
 	}
 
 
 e_data:
 e_data:
-	ccp_free_data(&src, cmd_q);
+	if (sha->src)
+		ccp_free_data(&src, cmd_q);
 
 
 e_ctx:
 e_ctx:
 	ccp_dm_free(&ctx);
 	ccp_dm_free(&ctx);
@@ -1190,7 +1223,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	struct ccp_dm_workarea exp, src;
 	struct ccp_dm_workarea exp, src;
 	struct ccp_data dst;
 	struct ccp_data dst;
 	struct ccp_op op;
 	struct ccp_op op;
-	unsigned int ksb_count, i_len, o_len;
+	unsigned int sb_count, i_len, o_len;
 	int ret;
 	int ret;
 
 
 	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
 	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
@@ -1208,16 +1241,17 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	o_len = ((rsa->key_size + 255) / 256) * 32;
 	o_len = ((rsa->key_size + 255) / 256) * 32;
 	i_len = o_len * 2;
 	i_len = o_len * 2;
 
 
-	ksb_count = o_len / CCP_KSB_BYTES;
+	sb_count = o_len / CCP_SB_BYTES;
 
 
 	memset(&op, 0, sizeof(op));
 	memset(&op, 0, sizeof(op));
 	op.cmd_q = cmd_q;
 	op.cmd_q = cmd_q;
 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
-	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
-	if (!op.ksb_key)
+	op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q, sb_count);
+
+	if (!op.sb_key)
 		return -EIO;
 		return -EIO;
 
 
-	/* The RSA exponent may span multiple (32-byte) KSB entries and must
+	/* The RSA exponent may span multiple (32-byte) SB entries and must
 	 * be in little endian format. Reverse copy each 32-byte chunk
 	 * be in little endian format. Reverse copy each 32-byte chunk
 	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
 	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
 	 * and each byte within that chunk and do not perform any byte swap
 	 * and each byte within that chunk and do not perform any byte swap
@@ -1225,14 +1259,14 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	 */
 	 */
 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
 	if (ret)
 	if (ret)
-		goto e_ksb;
+		goto e_sb;
 
 
 	ret = ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len,
 	ret = ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len,
-				      CCP_KSB_BYTES, false);
+				      CCP_SB_BYTES, false);
 	if (ret)
 	if (ret)
 		goto e_exp;
 		goto e_exp;
-	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
-			      CCP_PASSTHRU_BYTESWAP_NOOP);
+	ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
+			     CCP_PASSTHRU_BYTESWAP_NOOP);
 	if (ret) {
 	if (ret) {
 		cmd->engine_error = cmd_q->cmd_error;
 		cmd->engine_error = cmd_q->cmd_error;
 		goto e_exp;
 		goto e_exp;
@@ -1247,12 +1281,12 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		goto e_exp;
 		goto e_exp;
 
 
 	ret = ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len,
 	ret = ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len,
-				      CCP_KSB_BYTES, false);
+				      CCP_SB_BYTES, false);
 	if (ret)
 	if (ret)
 		goto e_src;
 		goto e_src;
 	src.address += o_len;	/* Adjust the address for the copy operation */
 	src.address += o_len;	/* Adjust the address for the copy operation */
 	ret = ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len,
 	ret = ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len,
-				      CCP_KSB_BYTES, false);
+				      CCP_SB_BYTES, false);
 	if (ret)
 	if (ret)
 		goto e_src;
 		goto e_src;
 	src.address -= o_len;	/* Reset the address to original value */
 	src.address -= o_len;	/* Reset the address to original value */
@@ -1274,7 +1308,7 @@ static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	op.u.rsa.mod_size = rsa->key_size;
 	op.u.rsa.mod_size = rsa->key_size;
 	op.u.rsa.input_len = i_len;
 	op.u.rsa.input_len = i_len;
 
 
-	ret = cmd_q->ccp->vdata->perform->perform_rsa(&op);
+	ret = cmd_q->ccp->vdata->perform->rsa(&op);
 	if (ret) {
 	if (ret) {
 		cmd->engine_error = cmd_q->cmd_error;
 		cmd->engine_error = cmd_q->cmd_error;
 		goto e_dst;
 		goto e_dst;
@@ -1291,8 +1325,8 @@ e_src:
 e_exp:
 e_exp:
 	ccp_dm_free(&exp);
 	ccp_dm_free(&exp);
 
 
-e_ksb:
-	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
+e_sb:
+	cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
 
 
 	return ret;
 	return ret;
 }
 }
@@ -1306,7 +1340,7 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
 	struct ccp_op op;
 	struct ccp_op op;
 	bool in_place = false;
 	bool in_place = false;
 	unsigned int i;
 	unsigned int i;
-	int ret;
+	int ret = 0;
 
 
 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
 		return -EINVAL;
 		return -EINVAL;
@@ -1321,26 +1355,26 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
 			return -EINVAL;
 			return -EINVAL;
 	}
 	}
 
 
-	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
+	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
 
 
 	memset(&op, 0, sizeof(op));
 	memset(&op, 0, sizeof(op));
 	op.cmd_q = cmd_q;
 	op.cmd_q = cmd_q;
-	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
 
 
 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
 		/* Load the mask */
 		/* Load the mask */
-		op.ksb_key = cmd_q->ksb_key;
+		op.sb_key = cmd_q->sb_key;
 
 
 		ret = ccp_init_dm_workarea(&mask, cmd_q,
 		ret = ccp_init_dm_workarea(&mask, cmd_q,
-					   CCP_PASSTHRU_KSB_COUNT *
-					   CCP_KSB_BYTES,
+					   CCP_PASSTHRU_SB_COUNT *
+					   CCP_SB_BYTES,
 					   DMA_TO_DEVICE);
 					   DMA_TO_DEVICE);
 		if (ret)
 		if (ret)
 			return ret;
 			return ret;
 
 
 		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
 		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
-		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
-				      CCP_PASSTHRU_BYTESWAP_NOOP);
+		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
+				     CCP_PASSTHRU_BYTESWAP_NOOP);
 		if (ret) {
 		if (ret) {
 			cmd->engine_error = cmd_q->cmd_error;
 			cmd->engine_error = cmd_q->cmd_error;
 			goto e_mask;
 			goto e_mask;
@@ -1399,7 +1433,7 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
 		op.dst.u.dma.length = op.src.u.dma.length;
 		op.dst.u.dma.length = op.src.u.dma.length;
 
 
-		ret = cmd_q->ccp->vdata->perform->perform_passthru(&op);
+		ret = cmd_q->ccp->vdata->perform->passthru(&op);
 		if (ret) {
 		if (ret) {
 			cmd->engine_error = cmd_q->cmd_error;
 			cmd->engine_error = cmd_q->cmd_error;
 			goto e_dst;
 			goto e_dst;
@@ -1448,7 +1482,7 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
 			return -EINVAL;
 			return -EINVAL;
 	}
 	}
 
 
-	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
+	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
 
 
 	memset(&op, 0, sizeof(op));
 	memset(&op, 0, sizeof(op));
 	op.cmd_q = cmd_q;
 	op.cmd_q = cmd_q;
@@ -1456,13 +1490,13 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
 
 
 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
 		/* Load the mask */
 		/* Load the mask */
-		op.ksb_key = cmd_q->ksb_key;
+		op.sb_key = cmd_q->sb_key;
 
 
 		mask.length = pt->mask_len;
 		mask.length = pt->mask_len;
 		mask.dma.address = pt->mask;
 		mask.dma.address = pt->mask;
 		mask.dma.length = pt->mask_len;
 		mask.dma.length = pt->mask_len;
 
 
-		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
+		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
 				     CCP_PASSTHRU_BYTESWAP_NOOP);
 				     CCP_PASSTHRU_BYTESWAP_NOOP);
 		if (ret) {
 		if (ret) {
 			cmd->engine_error = cmd_q->cmd_error;
 			cmd->engine_error = cmd_q->cmd_error;
@@ -1484,7 +1518,7 @@ static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
 	op.dst.u.dma.offset = 0;
 	op.dst.u.dma.offset = 0;
 	op.dst.u.dma.length = pt->src_len;
 	op.dst.u.dma.length = pt->src_len;
 
 
-	ret = cmd_q->ccp->vdata->perform->perform_passthru(&op);
+	ret = cmd_q->ccp->vdata->perform->passthru(&op);
 	if (ret)
 	if (ret)
 		cmd->engine_error = cmd_q->cmd_error;
 		cmd->engine_error = cmd_q->cmd_error;
 
 
@@ -1514,7 +1548,7 @@ static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
 
 	memset(&op, 0, sizeof(op));
 	memset(&op, 0, sizeof(op));
 	op.cmd_q = cmd_q;
 	op.cmd_q = cmd_q;
-	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
 
 
 	/* Concatenate the modulus and the operands. Both the modulus and
 	/* Concatenate the modulus and the operands. Both the modulus and
 	 * the operands must be in little endian format.  Since the input
 	 * the operands must be in little endian format.  Since the input
@@ -1575,7 +1609,7 @@ static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
 
 	op.u.ecc.function = cmd->u.ecc.function;
 	op.u.ecc.function = cmd->u.ecc.function;
 
 
-	ret = cmd_q->ccp->vdata->perform->perform_ecc(&op);
+	ret = cmd_q->ccp->vdata->perform->ecc(&op);
 	if (ret) {
 	if (ret) {
 		cmd->engine_error = cmd_q->cmd_error;
 		cmd->engine_error = cmd_q->cmd_error;
 		goto e_dst;
 		goto e_dst;
@@ -1639,7 +1673,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
 
 	memset(&op, 0, sizeof(op));
 	memset(&op, 0, sizeof(op));
 	op.cmd_q = cmd_q;
 	op.cmd_q = cmd_q;
-	op.jobid = ccp_gen_jobid(cmd_q->ccp);
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
 
 
 	/* Concatenate the modulus and the operands. Both the modulus and
 	/* Concatenate the modulus and the operands. Both the modulus and
 	 * the operands must be in little endian format.  Since the input
 	 * the operands must be in little endian format.  Since the input
@@ -1677,7 +1711,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 		goto e_src;
 		goto e_src;
 	src.address += CCP_ECC_OPERAND_SIZE;
 	src.address += CCP_ECC_OPERAND_SIZE;
 
 
-	/* Set the first point Z coordianate to 1 */
+	/* Set the first point Z coordinate to 1 */
 	*src.address = 0x01;
 	*src.address = 0x01;
 	src.address += CCP_ECC_OPERAND_SIZE;
 	src.address += CCP_ECC_OPERAND_SIZE;
 
 
@@ -1696,7 +1730,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 			goto e_src;
 			goto e_src;
 		src.address += CCP_ECC_OPERAND_SIZE;
 		src.address += CCP_ECC_OPERAND_SIZE;
 
 
-		/* Set the second point Z coordianate to 1 */
+		/* Set the second point Z coordinate to 1 */
 		*src.address = 0x01;
 		*src.address = 0x01;
 		src.address += CCP_ECC_OPERAND_SIZE;
 		src.address += CCP_ECC_OPERAND_SIZE;
 	} else {
 	} else {
@@ -1739,7 +1773,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 
 
 	op.u.ecc.function = cmd->u.ecc.function;
 	op.u.ecc.function = cmd->u.ecc.function;
 
 
-	ret = cmd_q->ccp->vdata->perform->perform_ecc(&op);
+	ret = cmd_q->ccp->vdata->perform->ecc(&op);
 	if (ret) {
 	if (ret) {
 		cmd->engine_error = cmd_q->cmd_error;
 		cmd->engine_error = cmd_q->cmd_error;
 		goto e_dst;
 		goto e_dst;
@@ -1810,7 +1844,7 @@ int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 	cmd->engine_error = 0;
 	cmd->engine_error = 0;
 	cmd_q->cmd_error = 0;
 	cmd_q->cmd_error = 0;
 	cmd_q->int_rcvd = 0;
 	cmd_q->int_rcvd = 0;
-	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
+	cmd_q->free_slots = cmd_q->ccp->vdata->perform->get_free_slots(cmd_q);
 
 
 	switch (cmd->engine) {
 	switch (cmd->engine) {
 	case CCP_ENGINE_AES:
 	case CCP_ENGINE_AES:

+ 14 - 9
drivers/crypto/ccp/ccp-pci.c

@@ -4,6 +4,7 @@
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
  *
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
  *
  *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * it under the terms of the GNU General Public License version 2 as
@@ -25,9 +26,6 @@
 
 
 #include "ccp-dev.h"
 #include "ccp-dev.h"
 
 
-#define IO_BAR				2
-#define IO_OFFSET			0x20000
-
 #define MSIX_VECTORS			2
 #define MSIX_VECTORS			2
 
 
 struct ccp_msix {
 struct ccp_msix {
@@ -143,10 +141,11 @@ static void ccp_free_irqs(struct ccp_device *ccp)
 			free_irq(ccp_pci->msix[ccp_pci->msix_count].vector,
 			free_irq(ccp_pci->msix[ccp_pci->msix_count].vector,
 				 dev);
 				 dev);
 		pci_disable_msix(pdev);
 		pci_disable_msix(pdev);
-	} else {
+	} else if (ccp->irq) {
 		free_irq(ccp->irq, dev);
 		free_irq(ccp->irq, dev);
 		pci_disable_msi(pdev);
 		pci_disable_msi(pdev);
 	}
 	}
+	ccp->irq = 0;
 }
 }
 
 
 static int ccp_find_mmio_area(struct ccp_device *ccp)
 static int ccp_find_mmio_area(struct ccp_device *ccp)
@@ -156,10 +155,11 @@ static int ccp_find_mmio_area(struct ccp_device *ccp)
 	resource_size_t io_len;
 	resource_size_t io_len;
 	unsigned long io_flags;
 	unsigned long io_flags;
 
 
-	io_flags = pci_resource_flags(pdev, IO_BAR);
-	io_len = pci_resource_len(pdev, IO_BAR);
-	if ((io_flags & IORESOURCE_MEM) && (io_len >= (IO_OFFSET + 0x800)))
-		return IO_BAR;
+	io_flags = pci_resource_flags(pdev, ccp->vdata->bar);
+	io_len = pci_resource_len(pdev, ccp->vdata->bar);
+	if ((io_flags & IORESOURCE_MEM) &&
+	    (io_len >= (ccp->vdata->offset + 0x800)))
+		return ccp->vdata->bar;
 
 
 	return -EIO;
 	return -EIO;
 }
 }
@@ -216,7 +216,7 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		dev_err(dev, "pci_iomap failed\n");
 		dev_err(dev, "pci_iomap failed\n");
 		goto e_device;
 		goto e_device;
 	}
 	}
-	ccp->io_regs = ccp->io_map + IO_OFFSET;
+	ccp->io_regs = ccp->io_map + ccp->vdata->offset;
 
 
 	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
 	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
 	if (ret) {
 	if (ret) {
@@ -230,6 +230,9 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 
 	dev_set_drvdata(dev, ccp);
 	dev_set_drvdata(dev, ccp);
 
 
+	if (ccp->vdata->setup)
+		ccp->vdata->setup(ccp);
+
 	ret = ccp->vdata->perform->init(ccp);
 	ret = ccp->vdata->perform->init(ccp);
 	if (ret)
 	if (ret)
 		goto e_iomap;
 		goto e_iomap;
@@ -322,6 +325,8 @@ static int ccp_pci_resume(struct pci_dev *pdev)
 
 
 static const struct pci_device_id ccp_pci_table[] = {
 static const struct pci_device_id ccp_pci_table[] = {
 	{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&ccpv3 },
 	{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&ccpv3 },
+	{ PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&ccpv5a },
+	{ PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&ccpv5b },
 	/* Last entry must be zero */
 	/* Last entry must be zero */
 	{ 0, }
 	{ 0, }
 };
 };

+ 2 - 10
drivers/crypto/hifn_795x.c

@@ -636,20 +636,12 @@ struct hifn_request_context {
 
 
 static inline u32 hifn_read_0(struct hifn_device *dev, u32 reg)
 static inline u32 hifn_read_0(struct hifn_device *dev, u32 reg)
 {
 {
-	u32 ret;
-
-	ret = readl(dev->bar[0] + reg);
-
-	return ret;
+	return readl(dev->bar[0] + reg);
 }
 }
 
 
 static inline u32 hifn_read_1(struct hifn_device *dev, u32 reg)
 static inline u32 hifn_read_1(struct hifn_device *dev, u32 reg)
 {
 {
-	u32 ret;
-
-	ret = readl(dev->bar[1] + reg);
-
-	return ret;
+	return readl(dev->bar[1] + reg);
 }
 }
 
 
 static inline void hifn_write_0(struct hifn_device *dev, u32 reg, u32 val)
 static inline void hifn_write_0(struct hifn_device *dev, u32 reg, u32 val)

+ 98 - 10
drivers/crypto/img-hash.c

@@ -71,6 +71,7 @@
 #define DRIVER_FLAGS_MD5		BIT(21)
 #define DRIVER_FLAGS_MD5		BIT(21)
 
 
 #define IMG_HASH_QUEUE_LENGTH		20
 #define IMG_HASH_QUEUE_LENGTH		20
+#define IMG_HASH_DMA_BURST		4
 #define IMG_HASH_DMA_THRESHOLD		64
 #define IMG_HASH_DMA_THRESHOLD		64
 
 
 #ifdef __LITTLE_ENDIAN
 #ifdef __LITTLE_ENDIAN
@@ -102,8 +103,10 @@ struct img_hash_request_ctx {
 	unsigned long		op;
 	unsigned long		op;
 
 
 	size_t			bufcnt;
 	size_t			bufcnt;
-	u8 buffer[0] __aligned(sizeof(u32));
 	struct ahash_request	fallback_req;
 	struct ahash_request	fallback_req;
+
+	/* Zero length buffer must remain last member of struct */
+	u8 buffer[0] __aligned(sizeof(u32));
 };
 };
 
 
 struct img_hash_ctx {
 struct img_hash_ctx {
@@ -340,7 +343,7 @@ static int img_hash_dma_init(struct img_hash_dev *hdev)
 	dma_conf.direction = DMA_MEM_TO_DEV;
 	dma_conf.direction = DMA_MEM_TO_DEV;
 	dma_conf.dst_addr = hdev->bus_addr;
 	dma_conf.dst_addr = hdev->bus_addr;
 	dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
 	dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	dma_conf.dst_maxburst = 16;
+	dma_conf.dst_maxburst = IMG_HASH_DMA_BURST;
 	dma_conf.device_fc = false;
 	dma_conf.device_fc = false;
 
 
 	err = dmaengine_slave_config(hdev->dma_lch,  &dma_conf);
 	err = dmaengine_slave_config(hdev->dma_lch,  &dma_conf);
@@ -361,7 +364,7 @@ static void img_hash_dma_task(unsigned long d)
 	size_t nbytes, bleft, wsend, len, tbc;
 	size_t nbytes, bleft, wsend, len, tbc;
 	struct scatterlist tsg;
 	struct scatterlist tsg;
 
 
-	if (!ctx->sg)
+	if (!hdev->req || !ctx->sg)
 		return;
 		return;
 
 
 	addr = sg_virt(ctx->sg);
 	addr = sg_virt(ctx->sg);
@@ -587,6 +590,32 @@ static int img_hash_finup(struct ahash_request *req)
 	return crypto_ahash_finup(&rctx->fallback_req);
 	return crypto_ahash_finup(&rctx->fallback_req);
 }
 }
 
 
+static int img_hash_import(struct ahash_request *req, const void *in)
+{
+	struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+	rctx->fallback_req.base.flags = req->base.flags
+		& CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	return crypto_ahash_import(&rctx->fallback_req, in);
+}
+
+static int img_hash_export(struct ahash_request *req, void *out)
+{
+	struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+	rctx->fallback_req.base.flags = req->base.flags
+		& CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	return crypto_ahash_export(&rctx->fallback_req, out);
+}
+
 static int img_hash_digest(struct ahash_request *req)
 static int img_hash_digest(struct ahash_request *req)
 {
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
@@ -643,10 +672,9 @@ static int img_hash_digest(struct ahash_request *req)
 	return err;
 	return err;
 }
 }
 
 
-static int img_hash_cra_init(struct crypto_tfm *tfm)
+static int img_hash_cra_init(struct crypto_tfm *tfm, const char *alg_name)
 {
 {
 	struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm);
-	const char *alg_name = crypto_tfm_alg_name(tfm);
 	int err = -ENOMEM;
 	int err = -ENOMEM;
 
 
 	ctx->fallback = crypto_alloc_ahash(alg_name, 0,
 	ctx->fallback = crypto_alloc_ahash(alg_name, 0,
@@ -658,6 +686,7 @@ static int img_hash_cra_init(struct crypto_tfm *tfm)
 	}
 	}
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 				 sizeof(struct img_hash_request_ctx) +
 				 sizeof(struct img_hash_request_ctx) +
+				 crypto_ahash_reqsize(ctx->fallback) +
 				 IMG_HASH_DMA_THRESHOLD);
 				 IMG_HASH_DMA_THRESHOLD);
 
 
 	return 0;
 	return 0;
@@ -666,6 +695,26 @@ err:
 	return err;
 	return err;
 }
 }
 
 
+static int img_hash_cra_md5_init(struct crypto_tfm *tfm)
+{
+	return img_hash_cra_init(tfm, "md5-generic");
+}
+
+static int img_hash_cra_sha1_init(struct crypto_tfm *tfm)
+{
+	return img_hash_cra_init(tfm, "sha1-generic");
+}
+
+static int img_hash_cra_sha224_init(struct crypto_tfm *tfm)
+{
+	return img_hash_cra_init(tfm, "sha224-generic");
+}
+
+static int img_hash_cra_sha256_init(struct crypto_tfm *tfm)
+{
+	return img_hash_cra_init(tfm, "sha256-generic");
+}
+
 static void img_hash_cra_exit(struct crypto_tfm *tfm)
 static void img_hash_cra_exit(struct crypto_tfm *tfm)
 {
 {
 	struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm);
 	struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm);
@@ -711,9 +760,12 @@ static struct ahash_alg img_algs[] = {
 		.update = img_hash_update,
 		.update = img_hash_update,
 		.final = img_hash_final,
 		.final = img_hash_final,
 		.finup = img_hash_finup,
 		.finup = img_hash_finup,
+		.export = img_hash_export,
+		.import = img_hash_import,
 		.digest = img_hash_digest,
 		.digest = img_hash_digest,
 		.halg = {
 		.halg = {
 			.digestsize = MD5_DIGEST_SIZE,
 			.digestsize = MD5_DIGEST_SIZE,
+			.statesize = sizeof(struct md5_state),
 			.base = {
 			.base = {
 				.cra_name = "md5",
 				.cra_name = "md5",
 				.cra_driver_name = "img-md5",
 				.cra_driver_name = "img-md5",
@@ -723,7 +775,7 @@ static struct ahash_alg img_algs[] = {
 				CRYPTO_ALG_NEED_FALLBACK,
 				CRYPTO_ALG_NEED_FALLBACK,
 				.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
 				.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct img_hash_ctx),
 				.cra_ctxsize = sizeof(struct img_hash_ctx),
-				.cra_init = img_hash_cra_init,
+				.cra_init = img_hash_cra_md5_init,
 				.cra_exit = img_hash_cra_exit,
 				.cra_exit = img_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 				.cra_module = THIS_MODULE,
 			}
 			}
@@ -734,9 +786,12 @@ static struct ahash_alg img_algs[] = {
 		.update = img_hash_update,
 		.update = img_hash_update,
 		.final = img_hash_final,
 		.final = img_hash_final,
 		.finup = img_hash_finup,
 		.finup = img_hash_finup,
+		.export = img_hash_export,
+		.import = img_hash_import,
 		.digest = img_hash_digest,
 		.digest = img_hash_digest,
 		.halg = {
 		.halg = {
 			.digestsize = SHA1_DIGEST_SIZE,
 			.digestsize = SHA1_DIGEST_SIZE,
+			.statesize = sizeof(struct sha1_state),
 			.base = {
 			.base = {
 				.cra_name = "sha1",
 				.cra_name = "sha1",
 				.cra_driver_name = "img-sha1",
 				.cra_driver_name = "img-sha1",
@@ -746,7 +801,7 @@ static struct ahash_alg img_algs[] = {
 				CRYPTO_ALG_NEED_FALLBACK,
 				CRYPTO_ALG_NEED_FALLBACK,
 				.cra_blocksize = SHA1_BLOCK_SIZE,
 				.cra_blocksize = SHA1_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct img_hash_ctx),
 				.cra_ctxsize = sizeof(struct img_hash_ctx),
-				.cra_init = img_hash_cra_init,
+				.cra_init = img_hash_cra_sha1_init,
 				.cra_exit = img_hash_cra_exit,
 				.cra_exit = img_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 				.cra_module = THIS_MODULE,
 			}
 			}
@@ -757,9 +812,12 @@ static struct ahash_alg img_algs[] = {
 		.update = img_hash_update,
 		.update = img_hash_update,
 		.final = img_hash_final,
 		.final = img_hash_final,
 		.finup = img_hash_finup,
 		.finup = img_hash_finup,
+		.export = img_hash_export,
+		.import = img_hash_import,
 		.digest = img_hash_digest,
 		.digest = img_hash_digest,
 		.halg = {
 		.halg = {
 			.digestsize = SHA224_DIGEST_SIZE,
 			.digestsize = SHA224_DIGEST_SIZE,
+			.statesize = sizeof(struct sha256_state),
 			.base = {
 			.base = {
 				.cra_name = "sha224",
 				.cra_name = "sha224",
 				.cra_driver_name = "img-sha224",
 				.cra_driver_name = "img-sha224",
@@ -769,7 +827,7 @@ static struct ahash_alg img_algs[] = {
 				CRYPTO_ALG_NEED_FALLBACK,
 				CRYPTO_ALG_NEED_FALLBACK,
 				.cra_blocksize = SHA224_BLOCK_SIZE,
 				.cra_blocksize = SHA224_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct img_hash_ctx),
 				.cra_ctxsize = sizeof(struct img_hash_ctx),
-				.cra_init = img_hash_cra_init,
+				.cra_init = img_hash_cra_sha224_init,
 				.cra_exit = img_hash_cra_exit,
 				.cra_exit = img_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 				.cra_module = THIS_MODULE,
 			}
 			}
@@ -780,9 +838,12 @@ static struct ahash_alg img_algs[] = {
 		.update = img_hash_update,
 		.update = img_hash_update,
 		.final = img_hash_final,
 		.final = img_hash_final,
 		.finup = img_hash_finup,
 		.finup = img_hash_finup,
+		.export = img_hash_export,
+		.import = img_hash_import,
 		.digest = img_hash_digest,
 		.digest = img_hash_digest,
 		.halg = {
 		.halg = {
 			.digestsize = SHA256_DIGEST_SIZE,
 			.digestsize = SHA256_DIGEST_SIZE,
+			.statesize = sizeof(struct sha256_state),
 			.base = {
 			.base = {
 				.cra_name = "sha256",
 				.cra_name = "sha256",
 				.cra_driver_name = "img-sha256",
 				.cra_driver_name = "img-sha256",
@@ -792,7 +853,7 @@ static struct ahash_alg img_algs[] = {
 				CRYPTO_ALG_NEED_FALLBACK,
 				CRYPTO_ALG_NEED_FALLBACK,
 				.cra_blocksize = SHA256_BLOCK_SIZE,
 				.cra_blocksize = SHA256_BLOCK_SIZE,
 				.cra_ctxsize = sizeof(struct img_hash_ctx),
 				.cra_ctxsize = sizeof(struct img_hash_ctx),
-				.cra_init = img_hash_cra_init,
+				.cra_init = img_hash_cra_sha256_init,
 				.cra_exit = img_hash_cra_exit,
 				.cra_exit = img_hash_cra_exit,
 				.cra_module = THIS_MODULE,
 				.cra_module = THIS_MODULE,
 			}
 			}
@@ -971,7 +1032,7 @@ static int img_hash_probe(struct platform_device *pdev)
 	err = img_register_algs(hdev);
 	err = img_register_algs(hdev);
 	if (err)
 	if (err)
 		goto err_algs;
 		goto err_algs;
-	dev_dbg(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator initialized\n");
+	dev_info(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator initialized\n");
 
 
 	return 0;
 	return 0;
 
 
@@ -1013,11 +1074,38 @@ static int img_hash_remove(struct platform_device *pdev)
 	return 0;
 	return 0;
 }
 }
 
 
+#ifdef CONFIG_PM_SLEEP
+static int img_hash_suspend(struct device *dev)
+{
+	struct img_hash_dev *hdev = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(hdev->hash_clk);
+	clk_disable_unprepare(hdev->sys_clk);
+
+	return 0;
+}
+
+static int img_hash_resume(struct device *dev)
+{
+	struct img_hash_dev *hdev = dev_get_drvdata(dev);
+
+	clk_prepare_enable(hdev->hash_clk);
+	clk_prepare_enable(hdev->sys_clk);
+
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct dev_pm_ops img_hash_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(img_hash_suspend, img_hash_resume)
+};
+
 static struct platform_driver img_hash_driver = {
 static struct platform_driver img_hash_driver = {
 	.probe		= img_hash_probe,
 	.probe		= img_hash_probe,
 	.remove		= img_hash_remove,
 	.remove		= img_hash_remove,
 	.driver		= {
 	.driver		= {
 		.name	= "img-hash-accelerator",
 		.name	= "img-hash-accelerator",
+		.pm	= &img_hash_pm_ops,
 		.of_match_table	= of_match_ptr(img_hash_match),
 		.of_match_table	= of_match_ptr(img_hash_match),
 	}
 	}
 };
 };

+ 5 - 4
drivers/crypto/ixp4xx_crypto.c

@@ -447,9 +447,8 @@ static int init_ixp_crypto(struct device *dev)
 
 
 	if (!npe_running(npe_c)) {
 	if (!npe_running(npe_c)) {
 		ret = npe_load_firmware(npe_c, npe_name(npe_c), dev);
 		ret = npe_load_firmware(npe_c, npe_name(npe_c), dev);
-		if (ret) {
-			return ret;
-		}
+		if (ret)
+			goto npe_release;
 		if (npe_recv_message(npe_c, msg, "STATUS_MSG"))
 		if (npe_recv_message(npe_c, msg, "STATUS_MSG"))
 			goto npe_error;
 			goto npe_error;
 	} else {
 	} else {
@@ -473,7 +472,8 @@ static int init_ixp_crypto(struct device *dev)
 	default:
 	default:
 		printk(KERN_ERR "Firmware of %s lacks crypto support\n",
 		printk(KERN_ERR "Firmware of %s lacks crypto support\n",
 			npe_name(npe_c));
 			npe_name(npe_c));
-		return -ENODEV;
+		ret = -ENODEV;
+		goto npe_release;
 	}
 	}
 	/* buffer_pool will also be used to sometimes store the hmac,
 	/* buffer_pool will also be used to sometimes store the hmac,
 	 * so assure it is large enough
 	 * so assure it is large enough
@@ -512,6 +512,7 @@ npe_error:
 err:
 err:
 	dma_pool_destroy(ctx_pool);
 	dma_pool_destroy(ctx_pool);
 	dma_pool_destroy(buffer_pool);
 	dma_pool_destroy(buffer_pool);
+npe_release:
 	npe_release(npe_c);
 	npe_release(npe_c);
 	return ret;
 	return ret;
 }
 }

+ 1 - 0
drivers/crypto/marvell/cesa.c

@@ -166,6 +166,7 @@ static irqreturn_t mv_cesa_int(int irq, void *priv)
 			if (!req)
 			if (!req)
 				break;
 				break;
 
 
+			ctx = crypto_tfm_ctx(req->tfm);
 			mv_cesa_complete_req(ctx, req, 0);
 			mv_cesa_complete_req(ctx, req, 0);
 		}
 		}
 	}
 	}

+ 21 - 23
drivers/crypto/marvell/hash.c

@@ -374,7 +374,7 @@ static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = {
 	.complete = mv_cesa_ahash_complete,
 	.complete = mv_cesa_ahash_complete,
 };
 };
 
 
-static int mv_cesa_ahash_init(struct ahash_request *req,
+static void mv_cesa_ahash_init(struct ahash_request *req,
 			      struct mv_cesa_op_ctx *tmpl, bool algo_le)
 			      struct mv_cesa_op_ctx *tmpl, bool algo_le)
 {
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
@@ -390,8 +390,6 @@ static int mv_cesa_ahash_init(struct ahash_request *req,
 	creq->op_tmpl = *tmpl;
 	creq->op_tmpl = *tmpl;
 	creq->len = 0;
 	creq->len = 0;
 	creq->algo_le = algo_le;
 	creq->algo_le = algo_le;
-
-	return 0;
 }
 }
 
 
 static inline int mv_cesa_ahash_cra_init(struct crypto_tfm *tfm)
 static inline int mv_cesa_ahash_cra_init(struct crypto_tfm *tfm)
@@ -405,15 +403,16 @@ static inline int mv_cesa_ahash_cra_init(struct crypto_tfm *tfm)
 	return 0;
 	return 0;
 }
 }
 
 
-static int mv_cesa_ahash_cache_req(struct ahash_request *req, bool *cached)
+static bool mv_cesa_ahash_cache_req(struct ahash_request *req)
 {
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
+	bool cached = false;
 
 
-	if (creq->cache_ptr + req->nbytes < 64 && !creq->last_req) {
-		*cached = true;
+	if (creq->cache_ptr + req->nbytes < CESA_MAX_HASH_BLOCK_SIZE && !creq->last_req) {
+		cached = true;
 
 
 		if (!req->nbytes)
 		if (!req->nbytes)
-			return 0;
+			return cached;
 
 
 		sg_pcopy_to_buffer(req->src, creq->src_nents,
 		sg_pcopy_to_buffer(req->src, creq->src_nents,
 				   creq->cache + creq->cache_ptr,
 				   creq->cache + creq->cache_ptr,
@@ -422,7 +421,7 @@ static int mv_cesa_ahash_cache_req(struct ahash_request *req, bool *cached)
 		creq->cache_ptr += req->nbytes;
 		creq->cache_ptr += req->nbytes;
 	}
 	}
 
 
-	return 0;
+	return cached;
 }
 }
 
 
 static struct mv_cesa_op_ctx *
 static struct mv_cesa_op_ctx *
@@ -455,7 +454,6 @@ mv_cesa_dma_add_frag(struct mv_cesa_tdma_chain *chain,
 
 
 static int
 static int
 mv_cesa_ahash_dma_add_cache(struct mv_cesa_tdma_chain *chain,
 mv_cesa_ahash_dma_add_cache(struct mv_cesa_tdma_chain *chain,
-			    struct mv_cesa_ahash_dma_iter *dma_iter,
 			    struct mv_cesa_ahash_req *creq,
 			    struct mv_cesa_ahash_req *creq,
 			    gfp_t flags)
 			    gfp_t flags)
 {
 {
@@ -586,7 +584,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 	 * Add the cache (left-over data from a previous block) first.
 	 * Add the cache (left-over data from a previous block) first.
 	 * This will never overflow the SRAM size.
 	 * This will never overflow the SRAM size.
 	 */
 	 */
-	ret = mv_cesa_ahash_dma_add_cache(&basereq->chain, &iter, creq, flags);
+	ret = mv_cesa_ahash_dma_add_cache(&basereq->chain, creq, flags);
 	if (ret)
 	if (ret)
 		goto err_free_tdma;
 		goto err_free_tdma;
 
 
@@ -668,7 +666,6 @@ err:
 static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached)
 static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached)
 {
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	int ret;
 
 
 	creq->src_nents = sg_nents_for_len(req->src, req->nbytes);
 	creq->src_nents = sg_nents_for_len(req->src, req->nbytes);
 	if (creq->src_nents < 0) {
 	if (creq->src_nents < 0) {
@@ -676,17 +673,15 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached)
 		return creq->src_nents;
 		return creq->src_nents;
 	}
 	}
 
 
-	ret = mv_cesa_ahash_cache_req(req, cached);
-	if (ret)
-		return ret;
+	*cached = mv_cesa_ahash_cache_req(req);
 
 
 	if (*cached)
 	if (*cached)
 		return 0;
 		return 0;
 
 
 	if (cesa_dev->caps->has_tdma)
 	if (cesa_dev->caps->has_tdma)
-		ret = mv_cesa_ahash_dma_req_init(req);
-
-	return ret;
+		return mv_cesa_ahash_dma_req_init(req);
+	else
+		return 0;
 }
 }
 
 
 static int mv_cesa_ahash_queue_req(struct ahash_request *req)
 static int mv_cesa_ahash_queue_req(struct ahash_request *req)
@@ -805,13 +800,14 @@ static int mv_cesa_md5_init(struct ahash_request *req)
 	struct mv_cesa_op_ctx tmpl = { };
 	struct mv_cesa_op_ctx tmpl = { };
 
 
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_MD5);
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_MD5);
+
+	mv_cesa_ahash_init(req, &tmpl, true);
+
 	creq->state[0] = MD5_H0;
 	creq->state[0] = MD5_H0;
 	creq->state[1] = MD5_H1;
 	creq->state[1] = MD5_H1;
 	creq->state[2] = MD5_H2;
 	creq->state[2] = MD5_H2;
 	creq->state[3] = MD5_H3;
 	creq->state[3] = MD5_H3;
 
 
-	mv_cesa_ahash_init(req, &tmpl, true);
-
 	return 0;
 	return 0;
 }
 }
 
 
@@ -873,14 +869,15 @@ static int mv_cesa_sha1_init(struct ahash_request *req)
 	struct mv_cesa_op_ctx tmpl = { };
 	struct mv_cesa_op_ctx tmpl = { };
 
 
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA1);
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA1);
+
+	mv_cesa_ahash_init(req, &tmpl, false);
+
 	creq->state[0] = SHA1_H0;
 	creq->state[0] = SHA1_H0;
 	creq->state[1] = SHA1_H1;
 	creq->state[1] = SHA1_H1;
 	creq->state[2] = SHA1_H2;
 	creq->state[2] = SHA1_H2;
 	creq->state[3] = SHA1_H3;
 	creq->state[3] = SHA1_H3;
 	creq->state[4] = SHA1_H4;
 	creq->state[4] = SHA1_H4;
 
 
-	mv_cesa_ahash_init(req, &tmpl, false);
-
 	return 0;
 	return 0;
 }
 }
 
 
@@ -942,6 +939,9 @@ static int mv_cesa_sha256_init(struct ahash_request *req)
 	struct mv_cesa_op_ctx tmpl = { };
 	struct mv_cesa_op_ctx tmpl = { };
 
 
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA256);
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA256);
+
+	mv_cesa_ahash_init(req, &tmpl, false);
+
 	creq->state[0] = SHA256_H0;
 	creq->state[0] = SHA256_H0;
 	creq->state[1] = SHA256_H1;
 	creq->state[1] = SHA256_H1;
 	creq->state[2] = SHA256_H2;
 	creq->state[2] = SHA256_H2;
@@ -951,8 +951,6 @@ static int mv_cesa_sha256_init(struct ahash_request *req)
 	creq->state[6] = SHA256_H6;
 	creq->state[6] = SHA256_H6;
 	creq->state[7] = SHA256_H7;
 	creq->state[7] = SHA256_H7;
 
 
-	mv_cesa_ahash_init(req, &tmpl, false);
-
 	return 0;
 	return 0;
 }
 }
 
 

+ 1 - 0
drivers/crypto/marvell/tdma.c

@@ -261,6 +261,7 @@ struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
 	tdma->op = op;
 	tdma->op = op;
 	tdma->byte_cnt = cpu_to_le32(size | BIT(31));
 	tdma->byte_cnt = cpu_to_le32(size | BIT(31));
 	tdma->src = cpu_to_le32(dma_handle);
 	tdma->src = cpu_to_le32(dma_handle);
+	tdma->dst = CESA_SA_CFG_SRAM_OFFSET;
 	tdma->flags = CESA_TDMA_DST_IN_SRAM | CESA_TDMA_OP;
 	tdma->flags = CESA_TDMA_DST_IN_SRAM | CESA_TDMA_OP;
 
 
 	return op;
 	return op;

+ 2 - 5
drivers/crypto/mv_cesa.c

@@ -1091,11 +1091,8 @@ static int mv_probe(struct platform_device *pdev)
 
 
 	cp->max_req_size = cp->sram_size - SRAM_CFG_SPACE;
 	cp->max_req_size = cp->sram_size - SRAM_CFG_SPACE;
 
 
-	if (pdev->dev.of_node)
-		irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
-	else
-		irq = platform_get_irq(pdev, 0);
-	if (irq < 0 || irq == NO_IRQ) {
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
 		ret = irq;
 		ret = irq;
 		goto err;
 		goto err;
 	}
 	}

+ 3 - 1
drivers/crypto/mxc-scc.c

@@ -668,7 +668,9 @@ static int mxc_scc_probe(struct platform_device *pdev)
 		return PTR_ERR(scc->clk);
 		return PTR_ERR(scc->clk);
 	}
 	}
 
 
-	clk_prepare_enable(scc->clk);
+	ret = clk_prepare_enable(scc->clk);
+	if (ret)
+		return ret;
 
 
 	/* clear error status register */
 	/* clear error status register */
 	writel(0x0, scc->base + SCC_SCM_ERROR_STATUS);
 	writel(0x0, scc->base + SCC_SCM_ERROR_STATUS);

+ 86 - 55
drivers/crypto/omap-aes.c

@@ -35,7 +35,8 @@
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/aes.h>
 #include <crypto/aes.h>
-#include <crypto/algapi.h>
+#include <crypto/engine.h>
+#include <crypto/internal/skcipher.h>
 
 
 #define DST_MAXBURST			4
 #define DST_MAXBURST			4
 #define DMA_MIN				(DST_MAXBURST * sizeof(u32))
 #define DMA_MIN				(DST_MAXBURST * sizeof(u32))
@@ -85,6 +86,8 @@
 #define AES_REG_IRQ_DATA_OUT           BIT(2)
 #define AES_REG_IRQ_DATA_OUT           BIT(2)
 #define DEFAULT_TIMEOUT		(5*HZ)
 #define DEFAULT_TIMEOUT		(5*HZ)
 
 
+#define DEFAULT_AUTOSUSPEND_DELAY	1000
+
 #define FLAGS_MODE_MASK		0x000f
 #define FLAGS_MODE_MASK		0x000f
 #define FLAGS_ENCRYPT		BIT(0)
 #define FLAGS_ENCRYPT		BIT(0)
 #define FLAGS_CBC		BIT(1)
 #define FLAGS_CBC		BIT(1)
@@ -103,6 +106,7 @@ struct omap_aes_ctx {
 	int		keylen;
 	int		keylen;
 	u32		key[AES_KEYSIZE_256 / sizeof(u32)];
 	u32		key[AES_KEYSIZE_256 / sizeof(u32)];
 	unsigned long	flags;
 	unsigned long	flags;
+	struct crypto_skcipher	*fallback;
 };
 };
 
 
 struct omap_aes_reqctx {
 struct omap_aes_reqctx {
@@ -238,11 +242,19 @@ static void omap_aes_write_n(struct omap_aes_dev *dd, u32 offset,
 
 
 static int omap_aes_hw_init(struct omap_aes_dev *dd)
 static int omap_aes_hw_init(struct omap_aes_dev *dd)
 {
 {
+	int err;
+
 	if (!(dd->flags & FLAGS_INIT)) {
 	if (!(dd->flags & FLAGS_INIT)) {
 		dd->flags |= FLAGS_INIT;
 		dd->flags |= FLAGS_INIT;
 		dd->err = 0;
 		dd->err = 0;
 	}
 	}
 
 
+	err = pm_runtime_get_sync(dd->dev);
+	if (err < 0) {
+		dev_err(dd->dev, "failed to get sync: %d\n", err);
+		return err;
+	}
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -319,20 +331,12 @@ static void omap_aes_dma_stop(struct omap_aes_dev *dd)
 
 
 static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx)
 static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx)
 {
 {
-	struct omap_aes_dev *dd = NULL, *tmp;
+	struct omap_aes_dev *dd;
 
 
 	spin_lock_bh(&list_lock);
 	spin_lock_bh(&list_lock);
-	if (!ctx->dd) {
-		list_for_each_entry(tmp, &dev_list, list) {
-			/* FIXME: take fist available aes core */
-			dd = tmp;
-			break;
-		}
-		ctx->dd = dd;
-	} else {
-		/* already found before */
-		dd = ctx->dd;
-	}
+	dd = list_first_entry(&dev_list, struct omap_aes_dev, list);
+	list_move_tail(&dd->list, &dev_list);
+	ctx->dd = dd;
 	spin_unlock_bh(&list_lock);
 	spin_unlock_bh(&list_lock);
 
 
 	return dd;
 	return dd;
@@ -519,7 +523,10 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
 
 
 	pr_debug("err: %d\n", err);
 	pr_debug("err: %d\n", err);
 
 
-	crypto_finalize_request(dd->engine, req, err);
+	crypto_finalize_cipher_request(dd->engine, req, err);
+
+	pm_runtime_mark_last_busy(dd->dev);
+	pm_runtime_put_autosuspend(dd->dev);
 }
 }
 
 
 static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
 static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
@@ -592,7 +599,7 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd,
 				 struct ablkcipher_request *req)
 				 struct ablkcipher_request *req)
 {
 {
 	if (req)
 	if (req)
-		return crypto_transfer_request_to_engine(dd->engine, req);
+		return crypto_transfer_cipher_request_to_engine(dd->engine, req);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -602,7 +609,7 @@ static int omap_aes_prepare_req(struct crypto_engine *engine,
 {
 {
 	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
 	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
 			crypto_ablkcipher_reqtfm(req));
 			crypto_ablkcipher_reqtfm(req));
-	struct omap_aes_dev *dd = omap_aes_find_dev(ctx);
+	struct omap_aes_dev *dd = ctx->dd;
 	struct omap_aes_reqctx *rctx;
 	struct omap_aes_reqctx *rctx;
 
 
 	if (!dd)
 	if (!dd)
@@ -648,7 +655,7 @@ static int omap_aes_crypt_req(struct crypto_engine *engine,
 {
 {
 	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
 	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(
 			crypto_ablkcipher_reqtfm(req));
 			crypto_ablkcipher_reqtfm(req));
-	struct omap_aes_dev *dd = omap_aes_find_dev(ctx);
+	struct omap_aes_dev *dd = ctx->dd;
 
 
 	if (!dd)
 	if (!dd)
 		return -ENODEV;
 		return -ENODEV;
@@ -696,11 +703,29 @@ static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
 			crypto_ablkcipher_reqtfm(req));
 			crypto_ablkcipher_reqtfm(req));
 	struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
 	struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req);
 	struct omap_aes_dev *dd;
 	struct omap_aes_dev *dd;
+	int ret;
 
 
 	pr_debug("nbytes: %d, enc: %d, cbc: %d\n", req->nbytes,
 	pr_debug("nbytes: %d, enc: %d, cbc: %d\n", req->nbytes,
 		  !!(mode & FLAGS_ENCRYPT),
 		  !!(mode & FLAGS_ENCRYPT),
 		  !!(mode & FLAGS_CBC));
 		  !!(mode & FLAGS_CBC));
 
 
+	if (req->nbytes < 200) {
+		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback);
+
+		skcipher_request_set_tfm(subreq, ctx->fallback);
+		skcipher_request_set_callback(subreq, req->base.flags, NULL,
+					      NULL);
+		skcipher_request_set_crypt(subreq, req->src, req->dst,
+					   req->nbytes, req->info);
+
+		if (mode & FLAGS_ENCRYPT)
+			ret = crypto_skcipher_encrypt(subreq);
+		else
+			ret = crypto_skcipher_decrypt(subreq);
+
+		skcipher_request_zero(subreq);
+		return ret;
+	}
 	dd = omap_aes_find_dev(ctx);
 	dd = omap_aes_find_dev(ctx);
 	if (!dd)
 	if (!dd)
 		return -ENODEV;
 		return -ENODEV;
@@ -716,6 +741,7 @@ static int omap_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 			   unsigned int keylen)
 			   unsigned int keylen)
 {
 {
 	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	int ret;
 
 
 	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
 	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
 		   keylen != AES_KEYSIZE_256)
 		   keylen != AES_KEYSIZE_256)
@@ -726,6 +752,14 @@ static int omap_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	memcpy(ctx->key, key, keylen);
 	memcpy(ctx->key, key, keylen);
 	ctx->keylen = keylen;
 	ctx->keylen = keylen;
 
 
+	crypto_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags &
+						 CRYPTO_TFM_REQ_MASK);
+
+	ret = crypto_skcipher_setkey(ctx->fallback, key, keylen);
+	if (!ret)
+		return 0;
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -761,22 +795,16 @@ static int omap_aes_ctr_decrypt(struct ablkcipher_request *req)
 
 
 static int omap_aes_cra_init(struct crypto_tfm *tfm)
 static int omap_aes_cra_init(struct crypto_tfm *tfm)
 {
 {
-	struct omap_aes_dev *dd = NULL;
-	int err;
+	const char *name = crypto_tfm_alg_name(tfm);
+	const u32 flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK;
+	struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_skcipher *blk;
 
 
-	/* Find AES device, currently picks the first device */
-	spin_lock_bh(&list_lock);
-	list_for_each_entry(dd, &dev_list, list) {
-		break;
-	}
-	spin_unlock_bh(&list_lock);
+	blk = crypto_alloc_skcipher(name, 0, flags);
+	if (IS_ERR(blk))
+		return PTR_ERR(blk);
 
 
-	err = pm_runtime_get_sync(dd->dev);
-	if (err < 0) {
-		dev_err(dd->dev, "%s: failed to get_sync(%d)\n",
-			__func__, err);
-		return err;
-	}
+	ctx->fallback = blk;
 
 
 	tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx);
 	tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx);
 
 
@@ -785,16 +813,12 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm)
 
 
 static void omap_aes_cra_exit(struct crypto_tfm *tfm)
 static void omap_aes_cra_exit(struct crypto_tfm *tfm)
 {
 {
-	struct omap_aes_dev *dd = NULL;
+	struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 
 
-	/* Find AES device, currently picks the first device */
-	spin_lock_bh(&list_lock);
-	list_for_each_entry(dd, &dev_list, list) {
-		break;
-	}
-	spin_unlock_bh(&list_lock);
+	if (ctx->fallback)
+		crypto_free_skcipher(ctx->fallback);
 
 
-	pm_runtime_put_sync(dd->dev);
+	ctx->fallback = NULL;
 }
 }
 
 
 /* ********************** ALGS ************************************ */
 /* ********************** ALGS ************************************ */
@@ -806,7 +830,7 @@ static struct crypto_alg algs_ecb_cbc[] = {
 	.cra_priority		= 300,
 	.cra_priority		= 300,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
 				  CRYPTO_ALG_KERN_DRIVER_ONLY |
 				  CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_ASYNC,
+				  CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct omap_aes_ctx),
 	.cra_ctxsize		= sizeof(struct omap_aes_ctx),
 	.cra_alignmask		= 0,
 	.cra_alignmask		= 0,
@@ -828,7 +852,7 @@ static struct crypto_alg algs_ecb_cbc[] = {
 	.cra_priority		= 300,
 	.cra_priority		= 300,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
 				  CRYPTO_ALG_KERN_DRIVER_ONLY |
 				  CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_ASYNC,
+				  CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct omap_aes_ctx),
 	.cra_ctxsize		= sizeof(struct omap_aes_ctx),
 	.cra_alignmask		= 0,
 	.cra_alignmask		= 0,
@@ -854,7 +878,7 @@ static struct crypto_alg algs_ctr[] = {
 	.cra_priority		= 300,
 	.cra_priority		= 300,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
 				  CRYPTO_ALG_KERN_DRIVER_ONLY |
 				  CRYPTO_ALG_KERN_DRIVER_ONLY |
-				  CRYPTO_ALG_ASYNC,
+				  CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct omap_aes_ctx),
 	.cra_ctxsize		= sizeof(struct omap_aes_ctx),
 	.cra_alignmask		= 0,
 	.cra_alignmask		= 0,
@@ -1140,6 +1164,9 @@ static int omap_aes_probe(struct platform_device *pdev)
 	}
 	}
 	dd->phys_base = res.start;
 	dd->phys_base = res.start;
 
 
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, DEFAULT_AUTOSUSPEND_DELAY);
+
 	pm_runtime_enable(dev);
 	pm_runtime_enable(dev);
 	err = pm_runtime_get_sync(dev);
 	err = pm_runtime_get_sync(dev);
 	if (err < 0) {
 	if (err < 0) {
@@ -1186,6 +1213,19 @@ static int omap_aes_probe(struct platform_device *pdev)
 	list_add_tail(&dd->list, &dev_list);
 	list_add_tail(&dd->list, &dev_list);
 	spin_unlock(&list_lock);
 	spin_unlock(&list_lock);
 
 
+	/* Initialize crypto engine */
+	dd->engine = crypto_engine_alloc_init(dev, 1);
+	if (!dd->engine) {
+		err = -ENOMEM;
+		goto err_engine;
+	}
+
+	dd->engine->prepare_cipher_request = omap_aes_prepare_req;
+	dd->engine->cipher_one_request = omap_aes_crypt_req;
+	err = crypto_engine_start(dd->engine);
+	if (err)
+		goto err_engine;
+
 	for (i = 0; i < dd->pdata->algs_info_size; i++) {
 	for (i = 0; i < dd->pdata->algs_info_size; i++) {
 		if (!dd->pdata->algs_info[i].registered) {
 		if (!dd->pdata->algs_info[i].registered) {
 			for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
 			for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
@@ -1203,26 +1243,17 @@ static int omap_aes_probe(struct platform_device *pdev)
 		}
 		}
 	}
 	}
 
 
-	/* Initialize crypto engine */
-	dd->engine = crypto_engine_alloc_init(dev, 1);
-	if (!dd->engine)
-		goto err_algs;
-
-	dd->engine->prepare_request = omap_aes_prepare_req;
-	dd->engine->crypt_one_request = omap_aes_crypt_req;
-	err = crypto_engine_start(dd->engine);
-	if (err)
-		goto err_engine;
-
 	return 0;
 	return 0;
-err_engine:
-	crypto_engine_exit(dd->engine);
 err_algs:
 err_algs:
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
 			crypto_unregister_alg(
 			crypto_unregister_alg(
 					&dd->pdata->algs_info[i].algs_list[j]);
 					&dd->pdata->algs_info[i].algs_list[j]);
 
 
+err_engine:
+	if (dd->engine)
+		crypto_engine_exit(dd->engine);
+
 	omap_aes_dma_cleanup(dd);
 	omap_aes_dma_cleanup(dd);
 err_irq:
 err_irq:
 	tasklet_kill(&dd->done_task);
 	tasklet_kill(&dd->done_task);

+ 20 - 15
drivers/crypto/omap-des.c

@@ -39,6 +39,7 @@
 #include <crypto/scatterwalk.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/des.h>
 #include <crypto/des.h>
 #include <crypto/algapi.h>
 #include <crypto/algapi.h>
+#include <crypto/engine.h>
 
 
 #define DST_MAXBURST			2
 #define DST_MAXBURST			2
 
 
@@ -506,7 +507,7 @@ static void omap_des_finish_req(struct omap_des_dev *dd, int err)
 	pr_debug("err: %d\n", err);
 	pr_debug("err: %d\n", err);
 
 
 	pm_runtime_put(dd->dev);
 	pm_runtime_put(dd->dev);
-	crypto_finalize_request(dd->engine, req, err);
+	crypto_finalize_cipher_request(dd->engine, req, err);
 }
 }
 
 
 static int omap_des_crypt_dma_stop(struct omap_des_dev *dd)
 static int omap_des_crypt_dma_stop(struct omap_des_dev *dd)
@@ -574,7 +575,7 @@ static int omap_des_handle_queue(struct omap_des_dev *dd,
 				 struct ablkcipher_request *req)
 				 struct ablkcipher_request *req)
 {
 {
 	if (req)
 	if (req)
-		return crypto_transfer_request_to_engine(dd->engine, req);
+		return crypto_transfer_cipher_request_to_engine(dd->engine, req);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -1078,6 +1079,19 @@ static int omap_des_probe(struct platform_device *pdev)
 	list_add_tail(&dd->list, &dev_list);
 	list_add_tail(&dd->list, &dev_list);
 	spin_unlock(&list_lock);
 	spin_unlock(&list_lock);
 
 
+	/* Initialize des crypto engine */
+	dd->engine = crypto_engine_alloc_init(dev, 1);
+	if (!dd->engine) {
+		err = -ENOMEM;
+		goto err_engine;
+	}
+
+	dd->engine->prepare_cipher_request = omap_des_prepare_req;
+	dd->engine->cipher_one_request = omap_des_crypt_req;
+	err = crypto_engine_start(dd->engine);
+	if (err)
+		goto err_engine;
+
 	for (i = 0; i < dd->pdata->algs_info_size; i++) {
 	for (i = 0; i < dd->pdata->algs_info_size; i++) {
 		for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
 		for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
 			algp = &dd->pdata->algs_info[i].algs_list[j];
 			algp = &dd->pdata->algs_info[i].algs_list[j];
@@ -1093,27 +1107,18 @@ static int omap_des_probe(struct platform_device *pdev)
 		}
 		}
 	}
 	}
 
 
-	/* Initialize des crypto engine */
-	dd->engine = crypto_engine_alloc_init(dev, 1);
-	if (!dd->engine)
-		goto err_algs;
-
-	dd->engine->prepare_request = omap_des_prepare_req;
-	dd->engine->crypt_one_request = omap_des_crypt_req;
-	err = crypto_engine_start(dd->engine);
-	if (err)
-		goto err_engine;
-
 	return 0;
 	return 0;
 
 
-err_engine:
-	crypto_engine_exit(dd->engine);
 err_algs:
 err_algs:
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 	for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
 			crypto_unregister_alg(
 			crypto_unregister_alg(
 					&dd->pdata->algs_info[i].algs_list[j]);
 					&dd->pdata->algs_info[i].algs_list[j]);
 
 
+err_engine:
+	if (dd->engine)
+		crypto_engine_exit(dd->engine);
+
 	omap_des_dma_cleanup(dd);
 	omap_des_dma_cleanup(dd);
 err_irq:
 err_irq:
 	tasklet_kill(&dd->done_task);
 	tasklet_kill(&dd->done_task);

+ 342 - 226
drivers/crypto/omap-sham.c

@@ -112,9 +112,10 @@
 #define FLAGS_DMA_READY		6
 #define FLAGS_DMA_READY		6
 #define FLAGS_AUTO_XOR		7
 #define FLAGS_AUTO_XOR		7
 #define FLAGS_BE32_SHA1		8
 #define FLAGS_BE32_SHA1		8
+#define FLAGS_SGS_COPIED	9
+#define FLAGS_SGS_ALLOCED	10
 /* context flags */
 /* context flags */
 #define FLAGS_FINUP		16
 #define FLAGS_FINUP		16
-#define FLAGS_SG		17
 
 
 #define FLAGS_MODE_SHIFT	18
 #define FLAGS_MODE_SHIFT	18
 #define FLAGS_MODE_MASK		(SHA_REG_MODE_ALGO_MASK	<< FLAGS_MODE_SHIFT)
 #define FLAGS_MODE_MASK		(SHA_REG_MODE_ALGO_MASK	<< FLAGS_MODE_SHIFT)
@@ -134,7 +135,8 @@
 #define OMAP_ALIGN_MASK		(sizeof(u32)-1)
 #define OMAP_ALIGN_MASK		(sizeof(u32)-1)
 #define OMAP_ALIGNED		__attribute__((aligned(sizeof(u32))))
 #define OMAP_ALIGNED		__attribute__((aligned(sizeof(u32))))
 
 
-#define BUFLEN			PAGE_SIZE
+#define BUFLEN			SHA512_BLOCK_SIZE
+#define OMAP_SHA_DMA_THRESHOLD	256
 
 
 struct omap_sham_dev;
 struct omap_sham_dev;
 
 
@@ -147,12 +149,12 @@ struct omap_sham_reqctx {
 	size_t			digcnt;
 	size_t			digcnt;
 	size_t			bufcnt;
 	size_t			bufcnt;
 	size_t			buflen;
 	size_t			buflen;
-	dma_addr_t		dma_addr;
 
 
 	/* walk state */
 	/* walk state */
 	struct scatterlist	*sg;
 	struct scatterlist	*sg;
-	struct scatterlist	sgl;
-	unsigned int		offset;	/* offset in current sg */
+	struct scatterlist	sgl[2];
+	int			offset;	/* offset in current sg */
+	int			sg_len;
 	unsigned int		total;	/* total request */
 	unsigned int		total;	/* total request */
 
 
 	u8			buffer[0] OMAP_ALIGNED;
 	u8			buffer[0] OMAP_ALIGNED;
@@ -223,6 +225,7 @@ struct omap_sham_dev {
 	struct dma_chan		*dma_lch;
 	struct dma_chan		*dma_lch;
 	struct tasklet_struct	done_task;
 	struct tasklet_struct	done_task;
 	u8			polling_mode;
 	u8			polling_mode;
+	u8			xmit_buf[BUFLEN];
 
 
 	unsigned long		flags;
 	unsigned long		flags;
 	struct crypto_queue	queue;
 	struct crypto_queue	queue;
@@ -510,12 +513,14 @@ static int omap_sham_poll_irq_omap4(struct omap_sham_dev *dd)
 			      SHA_REG_IRQSTATUS_INPUT_RDY);
 			      SHA_REG_IRQSTATUS_INPUT_RDY);
 }
 }
 
 
-static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf,
-			      size_t length, int final)
+static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, size_t length,
+			      int final)
 {
 {
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
 	int count, len32, bs32, offset = 0;
 	int count, len32, bs32, offset = 0;
-	const u32 *buffer = (const u32 *)buf;
+	const u32 *buffer;
+	int mlen;
+	struct sg_mapping_iter mi;
 
 
 	dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
 	dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
 						ctx->digcnt, length, final);
 						ctx->digcnt, length, final);
@@ -525,6 +530,7 @@ static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf,
 
 
 	/* should be non-zero before next lines to disable clocks later */
 	/* should be non-zero before next lines to disable clocks later */
 	ctx->digcnt += length;
 	ctx->digcnt += length;
+	ctx->total -= length;
 
 
 	if (final)
 	if (final)
 		set_bit(FLAGS_FINAL, &dd->flags); /* catch last interrupt */
 		set_bit(FLAGS_FINAL, &dd->flags); /* catch last interrupt */
@@ -534,16 +540,35 @@ static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf,
 	len32 = DIV_ROUND_UP(length, sizeof(u32));
 	len32 = DIV_ROUND_UP(length, sizeof(u32));
 	bs32 = get_block_size(ctx) / sizeof(u32);
 	bs32 = get_block_size(ctx) / sizeof(u32);
 
 
+	sg_miter_start(&mi, ctx->sg, ctx->sg_len,
+		       SG_MITER_FROM_SG | SG_MITER_ATOMIC);
+
+	mlen = 0;
+
 	while (len32) {
 	while (len32) {
 		if (dd->pdata->poll_irq(dd))
 		if (dd->pdata->poll_irq(dd))
 			return -ETIMEDOUT;
 			return -ETIMEDOUT;
 
 
-		for (count = 0; count < min(len32, bs32); count++, offset++)
+		for (count = 0; count < min(len32, bs32); count++, offset++) {
+			if (!mlen) {
+				sg_miter_next(&mi);
+				mlen = mi.length;
+				if (!mlen) {
+					pr_err("sg miter failure.\n");
+					return -EINVAL;
+				}
+				offset = 0;
+				buffer = mi.addr;
+			}
 			omap_sham_write(dd, SHA_REG_DIN(dd, count),
 			omap_sham_write(dd, SHA_REG_DIN(dd, count),
 					buffer[offset]);
 					buffer[offset]);
+			mlen -= 4;
+		}
 		len32 -= min(len32, bs32);
 		len32 -= min(len32, bs32);
 	}
 	}
 
 
+	sg_miter_stop(&mi);
+
 	return -EINPROGRESS;
 	return -EINPROGRESS;
 }
 }
 
 
@@ -555,22 +580,27 @@ static void omap_sham_dma_callback(void *param)
 	tasklet_schedule(&dd->done_task);
 	tasklet_schedule(&dd->done_task);
 }
 }
 
 
-static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
-			      size_t length, int final, int is_sg)
+static int omap_sham_xmit_dma(struct omap_sham_dev *dd, size_t length,
+			      int final)
 {
 {
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
 	struct dma_async_tx_descriptor *tx;
 	struct dma_async_tx_descriptor *tx;
 	struct dma_slave_config cfg;
 	struct dma_slave_config cfg;
-	int len32, ret, dma_min = get_block_size(ctx);
+	int ret;
 
 
 	dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n",
 	dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n",
 						ctx->digcnt, length, final);
 						ctx->digcnt, length, final);
 
 
+	if (!dma_map_sg(dd->dev, ctx->sg, ctx->sg_len, DMA_TO_DEVICE)) {
+		dev_err(dd->dev, "dma_map_sg error\n");
+		return -EINVAL;
+	}
+
 	memset(&cfg, 0, sizeof(cfg));
 	memset(&cfg, 0, sizeof(cfg));
 
 
 	cfg.dst_addr = dd->phys_base + SHA_REG_DIN(dd, 0);
 	cfg.dst_addr = dd->phys_base + SHA_REG_DIN(dd, 0);
 	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
 	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	cfg.dst_maxburst = dma_min / DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.dst_maxburst = get_block_size(ctx) / DMA_SLAVE_BUSWIDTH_4_BYTES;
 
 
 	ret = dmaengine_slave_config(dd->dma_lch, &cfg);
 	ret = dmaengine_slave_config(dd->dma_lch, &cfg);
 	if (ret) {
 	if (ret) {
@@ -578,30 +608,12 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
 		return ret;
 		return ret;
 	}
 	}
 
 
-	len32 = DIV_ROUND_UP(length, dma_min) * dma_min;
-
-	if (is_sg) {
-		/*
-		 * The SG entry passed in may not have the 'length' member
-		 * set correctly so use a local SG entry (sgl) with the
-		 * proper value for 'length' instead.  If this is not done,
-		 * the dmaengine may try to DMA the incorrect amount of data.
-		 */
-		sg_init_table(&ctx->sgl, 1);
-		sg_assign_page(&ctx->sgl, sg_page(ctx->sg));
-		ctx->sgl.offset = ctx->sg->offset;
-		sg_dma_len(&ctx->sgl) = len32;
-		sg_dma_address(&ctx->sgl) = sg_dma_address(ctx->sg);
-
-		tx = dmaengine_prep_slave_sg(dd->dma_lch, &ctx->sgl, 1,
-			DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-	} else {
-		tx = dmaengine_prep_slave_single(dd->dma_lch, dma_addr, len32,
-			DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-	}
+	tx = dmaengine_prep_slave_sg(dd->dma_lch, ctx->sg, ctx->sg_len,
+				     DMA_MEM_TO_DEV,
+				     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 
 
 	if (!tx) {
 	if (!tx) {
-		dev_err(dd->dev, "prep_slave_sg/single() failed\n");
+		dev_err(dd->dev, "prep_slave_sg failed\n");
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
@@ -611,6 +623,7 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
 	dd->pdata->write_ctrl(dd, length, final, 1);
 	dd->pdata->write_ctrl(dd, length, final, 1);
 
 
 	ctx->digcnt += length;
 	ctx->digcnt += length;
+	ctx->total -= length;
 
 
 	if (final)
 	if (final)
 		set_bit(FLAGS_FINAL, &dd->flags); /* catch last interrupt */
 		set_bit(FLAGS_FINAL, &dd->flags); /* catch last interrupt */
@@ -625,189 +638,257 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
 	return -EINPROGRESS;
 	return -EINPROGRESS;
 }
 }
 
 
-static size_t omap_sham_append_buffer(struct omap_sham_reqctx *ctx,
-				const u8 *data, size_t length)
+static int omap_sham_copy_sg_lists(struct omap_sham_reqctx *ctx,
+				   struct scatterlist *sg, int bs, int new_len)
 {
 {
-	size_t count = min(length, ctx->buflen - ctx->bufcnt);
+	int n = sg_nents(sg);
+	struct scatterlist *tmp;
+	int offset = ctx->offset;
 
 
-	count = min(count, ctx->total);
-	if (count <= 0)
-		return 0;
-	memcpy(ctx->buffer + ctx->bufcnt, data, count);
-	ctx->bufcnt += count;
+	if (ctx->bufcnt)
+		n++;
 
 
-	return count;
-}
+	ctx->sg = kmalloc_array(n, sizeof(*sg), GFP_KERNEL);
+	if (!ctx->sg)
+		return -ENOMEM;
 
 
-static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx)
-{
-	size_t count;
-	const u8 *vaddr;
+	sg_init_table(ctx->sg, n);
 
 
-	while (ctx->sg) {
-		vaddr = kmap_atomic(sg_page(ctx->sg));
-		vaddr += ctx->sg->offset;
+	tmp = ctx->sg;
 
 
-		count = omap_sham_append_buffer(ctx,
-				vaddr + ctx->offset,
-				ctx->sg->length - ctx->offset);
+	ctx->sg_len = 0;
 
 
-		kunmap_atomic((void *)vaddr);
+	if (ctx->bufcnt) {
+		sg_set_buf(tmp, ctx->dd->xmit_buf, ctx->bufcnt);
+		tmp = sg_next(tmp);
+		ctx->sg_len++;
+	}
 
 
-		if (!count)
-			break;
-		ctx->offset += count;
-		ctx->total -= count;
-		if (ctx->offset == ctx->sg->length) {
-			ctx->sg = sg_next(ctx->sg);
-			if (ctx->sg)
-				ctx->offset = 0;
-			else
-				ctx->total = 0;
+	while (sg && new_len) {
+		int len = sg->length - offset;
+
+		if (offset) {
+			offset -= sg->length;
+			if (offset < 0)
+				offset = 0;
+		}
+
+		if (new_len < len)
+			len = new_len;
+
+		if (len > 0) {
+			new_len -= len;
+			sg_set_page(tmp, sg_page(sg), len, sg->offset);
+			if (new_len <= 0)
+				sg_mark_end(tmp);
+			tmp = sg_next(tmp);
+			ctx->sg_len++;
 		}
 		}
+
+		sg = sg_next(sg);
 	}
 	}
 
 
+	set_bit(FLAGS_SGS_ALLOCED, &ctx->dd->flags);
+
+	ctx->bufcnt = 0;
+
 	return 0;
 	return 0;
 }
 }
 
 
-static int omap_sham_xmit_dma_map(struct omap_sham_dev *dd,
-					struct omap_sham_reqctx *ctx,
-					size_t length, int final)
+static int omap_sham_copy_sgs(struct omap_sham_reqctx *ctx,
+			      struct scatterlist *sg, int bs, int new_len)
 {
 {
-	int ret;
+	int pages;
+	void *buf;
+	int len;
 
 
-	ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen,
-				       DMA_TO_DEVICE);
-	if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
-		dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen);
-		return -EINVAL;
+	len = new_len + ctx->bufcnt;
+
+	pages = get_order(ctx->total);
+
+	buf = (void *)__get_free_pages(GFP_ATOMIC, pages);
+	if (!buf) {
+		pr_err("Couldn't allocate pages for unaligned cases.\n");
+		return -ENOMEM;
 	}
 	}
 
 
-	ctx->flags &= ~BIT(FLAGS_SG);
+	if (ctx->bufcnt)
+		memcpy(buf, ctx->dd->xmit_buf, ctx->bufcnt);
 
 
-	ret = omap_sham_xmit_dma(dd, ctx->dma_addr, length, final, 0);
-	if (ret != -EINPROGRESS)
-		dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen,
-				 DMA_TO_DEVICE);
+	scatterwalk_map_and_copy(buf + ctx->bufcnt, sg, ctx->offset,
+				 ctx->total - ctx->bufcnt, 0);
+	sg_init_table(ctx->sgl, 1);
+	sg_set_buf(ctx->sgl, buf, len);
+	ctx->sg = ctx->sgl;
+	set_bit(FLAGS_SGS_COPIED, &ctx->dd->flags);
+	ctx->sg_len = 1;
+	ctx->bufcnt = 0;
+	ctx->offset = 0;
 
 
-	return ret;
+	return 0;
 }
 }
 
 
-static int omap_sham_update_dma_slow(struct omap_sham_dev *dd)
+static int omap_sham_align_sgs(struct scatterlist *sg,
+			       int nbytes, int bs, bool final,
+			       struct omap_sham_reqctx *rctx)
 {
 {
-	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
-	unsigned int final;
-	size_t count;
+	int n = 0;
+	bool aligned = true;
+	bool list_ok = true;
+	struct scatterlist *sg_tmp = sg;
+	int new_len;
+	int offset = rctx->offset;
 
 
-	omap_sham_append_sg(ctx);
+	if (!sg || !sg->length || !nbytes)
+		return 0;
+
+	new_len = nbytes;
 
 
-	final = (ctx->flags & BIT(FLAGS_FINUP)) && !ctx->total;
+	if (offset)
+		list_ok = false;
 
 
-	dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: %d, final: %d\n",
-					 ctx->bufcnt, ctx->digcnt, final);
+	if (final)
+		new_len = DIV_ROUND_UP(new_len, bs) * bs;
+	else
+		new_len = new_len / bs * bs;
 
 
-	if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
-		count = ctx->bufcnt;
-		ctx->bufcnt = 0;
-		return omap_sham_xmit_dma_map(dd, ctx, count, final);
+	while (nbytes > 0 && sg_tmp) {
+		n++;
+
+		if (offset < sg_tmp->length) {
+			if (!IS_ALIGNED(offset + sg_tmp->offset, 4)) {
+				aligned = false;
+				break;
+			}
+
+			if (!IS_ALIGNED(sg_tmp->length - offset, bs)) {
+				aligned = false;
+				break;
+			}
+		}
+
+		if (offset) {
+			offset -= sg_tmp->length;
+			if (offset < 0) {
+				nbytes += offset;
+				offset = 0;
+			}
+		} else {
+			nbytes -= sg_tmp->length;
+		}
+
+		sg_tmp = sg_next(sg_tmp);
+
+		if (nbytes < 0) {
+			list_ok = false;
+			break;
+		}
 	}
 	}
 
 
+	if (!aligned)
+		return omap_sham_copy_sgs(rctx, sg, bs, new_len);
+	else if (!list_ok)
+		return omap_sham_copy_sg_lists(rctx, sg, bs, new_len);
+
+	rctx->sg_len = n;
+	rctx->sg = sg;
+
 	return 0;
 	return 0;
 }
 }
 
 
-/* Start address alignment */
-#define SG_AA(sg)	(IS_ALIGNED(sg->offset, sizeof(u32)))
-/* SHA1 block size alignment */
-#define SG_SA(sg, bs)	(IS_ALIGNED(sg->length, bs))
-
-static int omap_sham_update_dma_start(struct omap_sham_dev *dd)
+static int omap_sham_prepare_request(struct ahash_request *req, bool update)
 {
 {
-	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
-	unsigned int length, final, tail;
-	struct scatterlist *sg;
-	int ret, bs;
+	struct omap_sham_reqctx *rctx = ahash_request_ctx(req);
+	int bs;
+	int ret;
+	int nbytes;
+	bool final = rctx->flags & BIT(FLAGS_FINUP);
+	int xmit_len, hash_later;
 
 
-	if (!ctx->total)
+	if (!req)
 		return 0;
 		return 0;
 
 
-	if (ctx->bufcnt || ctx->offset)
-		return omap_sham_update_dma_slow(dd);
-
-	/*
-	 * Don't use the sg interface when the transfer size is less
-	 * than the number of elements in a DMA frame.  Otherwise,
-	 * the dmaengine infrastructure will calculate that it needs
-	 * to transfer 0 frames which ultimately fails.
-	 */
-	if (ctx->total < get_block_size(ctx))
-		return omap_sham_update_dma_slow(dd);
-
-	dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n",
-			ctx->digcnt, ctx->bufcnt, ctx->total);
+	bs = get_block_size(rctx);
 
 
-	sg = ctx->sg;
-	bs = get_block_size(ctx);
+	if (update)
+		nbytes = req->nbytes;
+	else
+		nbytes = 0;
 
 
-	if (!SG_AA(sg))
-		return omap_sham_update_dma_slow(dd);
+	rctx->total = nbytes + rctx->bufcnt;
 
 
-	if (!sg_is_last(sg) && !SG_SA(sg, bs))
-		/* size is not BLOCK_SIZE aligned */
-		return omap_sham_update_dma_slow(dd);
+	if (!rctx->total)
+		return 0;
 
 
-	length = min(ctx->total, sg->length);
+	if (nbytes && (!IS_ALIGNED(rctx->bufcnt, bs))) {
+		int len = bs - rctx->bufcnt % bs;
 
 
-	if (sg_is_last(sg)) {
-		if (!(ctx->flags & BIT(FLAGS_FINUP))) {
-			/* not last sg must be BLOCK_SIZE aligned */
-			tail = length & (bs - 1);
-			/* without finup() we need one block to close hash */
-			if (!tail)
-				tail = bs;
-			length -= tail;
-		}
+		if (len > nbytes)
+			len = nbytes;
+		scatterwalk_map_and_copy(rctx->buffer + rctx->bufcnt, req->src,
+					 0, len, 0);
+		rctx->bufcnt += len;
+		nbytes -= len;
+		rctx->offset = len;
 	}
 	}
 
 
-	if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
-		dev_err(dd->dev, "dma_map_sg  error\n");
-		return -EINVAL;
-	}
+	if (rctx->bufcnt)
+		memcpy(rctx->dd->xmit_buf, rctx->buffer, rctx->bufcnt);
 
 
-	ctx->flags |= BIT(FLAGS_SG);
+	ret = omap_sham_align_sgs(req->src, nbytes, bs, final, rctx);
+	if (ret)
+		return ret;
 
 
-	ctx->total -= length;
-	ctx->offset = length; /* offset where to start slow */
+	xmit_len = rctx->total;
 
 
-	final = (ctx->flags & BIT(FLAGS_FINUP)) && !ctx->total;
+	if (!IS_ALIGNED(xmit_len, bs)) {
+		if (final)
+			xmit_len = DIV_ROUND_UP(xmit_len, bs) * bs;
+		else
+			xmit_len = xmit_len / bs * bs;
+	}
 
 
-	ret = omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, final, 1);
-	if (ret != -EINPROGRESS)
-		dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
+	hash_later = rctx->total - xmit_len;
+	if (hash_later < 0)
+		hash_later = 0;
 
 
-	return ret;
-}
+	if (rctx->bufcnt && nbytes) {
+		/* have data from previous operation and current */
+		sg_init_table(rctx->sgl, 2);
+		sg_set_buf(rctx->sgl, rctx->dd->xmit_buf, rctx->bufcnt);
 
 
-static int omap_sham_update_cpu(struct omap_sham_dev *dd)
-{
-	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
-	int bufcnt, final;
+		sg_chain(rctx->sgl, 2, req->src);
 
 
-	if (!ctx->total)
-		return 0;
+		rctx->sg = rctx->sgl;
 
 
-	omap_sham_append_sg(ctx);
+		rctx->sg_len++;
+	} else if (rctx->bufcnt) {
+		/* have buffered data only */
+		sg_init_table(rctx->sgl, 1);
+		sg_set_buf(rctx->sgl, rctx->dd->xmit_buf, xmit_len);
 
 
-	final = (ctx->flags & BIT(FLAGS_FINUP)) && !ctx->total;
+		rctx->sg = rctx->sgl;
 
 
-	dev_dbg(dd->dev, "cpu: bufcnt: %u, digcnt: %d, final: %d\n",
-		ctx->bufcnt, ctx->digcnt, final);
+		rctx->sg_len = 1;
+	}
 
 
-	if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
-		bufcnt = ctx->bufcnt;
-		ctx->bufcnt = 0;
-		return omap_sham_xmit_cpu(dd, ctx->buffer, bufcnt, final);
+	if (hash_later) {
+		if (req->nbytes) {
+			scatterwalk_map_and_copy(rctx->buffer, req->src,
+						 req->nbytes - hash_later,
+						 hash_later, 0);
+		} else {
+			memcpy(rctx->buffer, rctx->buffer + xmit_len,
+			       hash_later);
+		}
+		rctx->bufcnt = hash_later;
+	} else {
+		rctx->bufcnt = 0;
 	}
 	}
 
 
+	if (!final)
+		rctx->total = xmit_len;
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -815,18 +896,9 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd)
 {
 {
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
 
 
+	dma_unmap_sg(dd->dev, ctx->sg, ctx->sg_len, DMA_TO_DEVICE);
 
 
-	if (ctx->flags & BIT(FLAGS_SG)) {
-		dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
-		if (ctx->sg->length == ctx->offset) {
-			ctx->sg = sg_next(ctx->sg);
-			if (ctx->sg)
-				ctx->offset = 0;
-		}
-	} else {
-		dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen,
-				 DMA_TO_DEVICE);
-	}
+	clear_bit(FLAGS_DMA_ACTIVE, &dd->flags);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -887,6 +959,8 @@ static int omap_sham_init(struct ahash_request *req)
 
 
 	ctx->bufcnt = 0;
 	ctx->bufcnt = 0;
 	ctx->digcnt = 0;
 	ctx->digcnt = 0;
+	ctx->total = 0;
+	ctx->offset = 0;
 	ctx->buflen = BUFLEN;
 	ctx->buflen = BUFLEN;
 
 
 	if (tctx->flags & BIT(FLAGS_HMAC)) {
 	if (tctx->flags & BIT(FLAGS_HMAC)) {
@@ -909,14 +983,19 @@ static int omap_sham_update_req(struct omap_sham_dev *dd)
 	struct ahash_request *req = dd->req;
 	struct ahash_request *req = dd->req;
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
 	int err;
 	int err;
+	bool final = ctx->flags & BIT(FLAGS_FINUP);
 
 
 	dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n",
 	dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n",
 		 ctx->total, ctx->digcnt, (ctx->flags & BIT(FLAGS_FINUP)) != 0);
 		 ctx->total, ctx->digcnt, (ctx->flags & BIT(FLAGS_FINUP)) != 0);
 
 
+	if (ctx->total < get_block_size(ctx) ||
+	    ctx->total < OMAP_SHA_DMA_THRESHOLD)
+		ctx->flags |= BIT(FLAGS_CPU);
+
 	if (ctx->flags & BIT(FLAGS_CPU))
 	if (ctx->flags & BIT(FLAGS_CPU))
-		err = omap_sham_update_cpu(dd);
+		err = omap_sham_xmit_cpu(dd, ctx->total, final);
 	else
 	else
-		err = omap_sham_update_dma_start(dd);
+		err = omap_sham_xmit_dma(dd, ctx->total, final);
 
 
 	/* wait for dma completion before can take more data */
 	/* wait for dma completion before can take more data */
 	dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", err, ctx->digcnt);
 	dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", err, ctx->digcnt);
@@ -930,7 +1009,7 @@ static int omap_sham_final_req(struct omap_sham_dev *dd)
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
 	int err = 0, use_dma = 1;
 	int err = 0, use_dma = 1;
 
 
-	if ((ctx->bufcnt <= get_block_size(ctx)) || dd->polling_mode)
+	if ((ctx->total <= get_block_size(ctx)) || dd->polling_mode)
 		/*
 		/*
 		 * faster to handle last block with cpu or
 		 * faster to handle last block with cpu or
 		 * use cpu when dma is not present.
 		 * use cpu when dma is not present.
@@ -938,9 +1017,9 @@ static int omap_sham_final_req(struct omap_sham_dev *dd)
 		use_dma = 0;
 		use_dma = 0;
 
 
 	if (use_dma)
 	if (use_dma)
-		err = omap_sham_xmit_dma_map(dd, ctx, ctx->bufcnt, 1);
+		err = omap_sham_xmit_dma(dd, ctx->total, 1);
 	else
 	else
-		err = omap_sham_xmit_cpu(dd, ctx->buffer, ctx->bufcnt, 1);
+		err = omap_sham_xmit_cpu(dd, ctx->total, 1);
 
 
 	ctx->bufcnt = 0;
 	ctx->bufcnt = 0;
 
 
@@ -988,6 +1067,17 @@ static void omap_sham_finish_req(struct ahash_request *req, int err)
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
 	struct omap_sham_dev *dd = ctx->dd;
 	struct omap_sham_dev *dd = ctx->dd;
 
 
+	if (test_bit(FLAGS_SGS_COPIED, &dd->flags))
+		free_pages((unsigned long)sg_virt(ctx->sg),
+			   get_order(ctx->sg->length));
+
+	if (test_bit(FLAGS_SGS_ALLOCED, &dd->flags))
+		kfree(ctx->sg);
+
+	ctx->sg = NULL;
+
+	dd->flags &= ~(BIT(FLAGS_SGS_ALLOCED) | BIT(FLAGS_SGS_COPIED));
+
 	if (!err) {
 	if (!err) {
 		dd->pdata->copy_hash(req, 1);
 		dd->pdata->copy_hash(req, 1);
 		if (test_bit(FLAGS_FINAL, &dd->flags))
 		if (test_bit(FLAGS_FINAL, &dd->flags))
@@ -1005,9 +1095,6 @@ static void omap_sham_finish_req(struct ahash_request *req, int err)
 
 
 	if (req->base.complete)
 	if (req->base.complete)
 		req->base.complete(&req->base, err);
 		req->base.complete(&req->base, err);
-
-	/* handle new request */
-	tasklet_schedule(&dd->done_task);
 }
 }
 
 
 static int omap_sham_handle_queue(struct omap_sham_dev *dd,
 static int omap_sham_handle_queue(struct omap_sham_dev *dd,
@@ -1018,6 +1105,7 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd,
 	unsigned long flags;
 	unsigned long flags;
 	int err = 0, ret = 0;
 	int err = 0, ret = 0;
 
 
+retry:
 	spin_lock_irqsave(&dd->lock, flags);
 	spin_lock_irqsave(&dd->lock, flags);
 	if (req)
 	if (req)
 		ret = ahash_enqueue_request(&dd->queue, req);
 		ret = ahash_enqueue_request(&dd->queue, req);
@@ -1041,6 +1129,10 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd,
 	dd->req = req;
 	dd->req = req;
 	ctx = ahash_request_ctx(req);
 	ctx = ahash_request_ctx(req);
 
 
+	err = omap_sham_prepare_request(req, ctx->op == OP_UPDATE);
+	if (err)
+		goto err1;
+
 	dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
 	dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
 						ctx->op, req->nbytes);
 						ctx->op, req->nbytes);
 
 
@@ -1061,11 +1153,19 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd,
 		err = omap_sham_final_req(dd);
 		err = omap_sham_final_req(dd);
 	}
 	}
 err1:
 err1:
-	if (err != -EINPROGRESS)
+	dev_dbg(dd->dev, "exit, err: %d\n", err);
+
+	if (err != -EINPROGRESS) {
 		/* done_task will not finish it, so do it here */
 		/* done_task will not finish it, so do it here */
 		omap_sham_finish_req(req, err);
 		omap_sham_finish_req(req, err);
+		req = NULL;
 
 
-	dev_dbg(dd->dev, "exit, err: %d\n", err);
+		/*
+		 * Execute next request immediately if there is anything
+		 * in queue.
+		 */
+		goto retry;
+	}
 
 
 	return ret;
 	return ret;
 }
 }
@@ -1085,34 +1185,15 @@ static int omap_sham_update(struct ahash_request *req)
 {
 {
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
 	struct omap_sham_dev *dd = ctx->dd;
 	struct omap_sham_dev *dd = ctx->dd;
-	int bs = get_block_size(ctx);
 
 
 	if (!req->nbytes)
 	if (!req->nbytes)
 		return 0;
 		return 0;
 
 
-	ctx->total = req->nbytes;
-	ctx->sg = req->src;
-	ctx->offset = 0;
-
-	if (ctx->flags & BIT(FLAGS_FINUP)) {
-		if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 240) {
-			/*
-			* OMAP HW accel works only with buffers >= 9
-			* will switch to bypass in final()
-			* final has the same request and data
-			*/
-			omap_sham_append_sg(ctx);
-			return 0;
-		} else if ((ctx->bufcnt + ctx->total <= bs) ||
-			   dd->polling_mode) {
-			/*
-			 * faster to use CPU for short transfers or
-			 * use cpu when dma is not present.
-			 */
-			ctx->flags |= BIT(FLAGS_CPU);
-		}
-	} else if (ctx->bufcnt + ctx->total < ctx->buflen) {
-		omap_sham_append_sg(ctx);
+	if (ctx->total + req->nbytes < ctx->buflen) {
+		scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, req->src,
+					 0, req->nbytes, 0);
+		ctx->bufcnt += req->nbytes;
+		ctx->total += req->nbytes;
 		return 0;
 		return 0;
 	}
 	}
 
 
@@ -1137,9 +1218,20 @@ static int omap_sham_final_shash(struct ahash_request *req)
 {
 {
 	struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
 	struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
 	struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+	int offset = 0;
+
+	/*
+	 * If we are running HMAC on limited hardware support, skip
+	 * the ipad in the beginning of the buffer if we are going for
+	 * software fallback algorithm.
+	 */
+	if (test_bit(FLAGS_HMAC, &ctx->flags) &&
+	    !test_bit(FLAGS_AUTO_XOR, &ctx->dd->flags))
+		offset = get_block_size(ctx);
 
 
 	return omap_sham_shash_digest(tctx->fallback, req->base.flags,
 	return omap_sham_shash_digest(tctx->fallback, req->base.flags,
-				      ctx->buffer, ctx->bufcnt, req->result);
+				      ctx->buffer + offset,
+				      ctx->bufcnt - offset, req->result);
 }
 }
 
 
 static int omap_sham_final(struct ahash_request *req)
 static int omap_sham_final(struct ahash_request *req)
@@ -1154,10 +1246,11 @@ static int omap_sham_final(struct ahash_request *req)
 	/*
 	/*
 	 * OMAP HW accel works only with buffers >= 9.
 	 * OMAP HW accel works only with buffers >= 9.
 	 * HMAC is always >= 9 because ipad == block size.
 	 * HMAC is always >= 9 because ipad == block size.
-	 * If buffersize is less than 240, we use fallback SW encoding,
-	 * as using DMA + HW in this case doesn't provide any benefit.
+	 * If buffersize is less than DMA_THRESHOLD, we use fallback
+	 * SW encoding, as using DMA + HW in this case doesn't provide
+	 * any benefit.
 	 */
 	 */
-	if ((ctx->digcnt + ctx->bufcnt) < 240)
+	if (!ctx->digcnt && ctx->bufcnt < OMAP_SHA_DMA_THRESHOLD)
 		return omap_sham_final_shash(req);
 		return omap_sham_final_shash(req);
 	else if (ctx->bufcnt)
 	else if (ctx->bufcnt)
 		return omap_sham_enqueue(req, OP_FINAL);
 		return omap_sham_enqueue(req, OP_FINAL);
@@ -1323,6 +1416,25 @@ static void omap_sham_cra_exit(struct crypto_tfm *tfm)
 	}
 	}
 }
 }
 
 
+static int omap_sham_export(struct ahash_request *req, void *out)
+{
+	struct omap_sham_reqctx *rctx = ahash_request_ctx(req);
+
+	memcpy(out, rctx, sizeof(*rctx) + rctx->bufcnt);
+
+	return 0;
+}
+
+static int omap_sham_import(struct ahash_request *req, const void *in)
+{
+	struct omap_sham_reqctx *rctx = ahash_request_ctx(req);
+	const struct omap_sham_reqctx *ctx_in = in;
+
+	memcpy(rctx, in, sizeof(*rctx) + ctx_in->bufcnt);
+
+	return 0;
+}
+
 static struct ahash_alg algs_sha1_md5[] = {
 static struct ahash_alg algs_sha1_md5[] = {
 {
 {
 	.init		= omap_sham_init,
 	.init		= omap_sham_init,
@@ -1341,7 +1453,7 @@ static struct ahash_alg algs_sha1_md5[] = {
 						CRYPTO_ALG_NEED_FALLBACK,
 						CRYPTO_ALG_NEED_FALLBACK,
 		.cra_blocksize		= SHA1_BLOCK_SIZE,
 		.cra_blocksize		= SHA1_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct omap_sham_ctx),
 		.cra_ctxsize		= sizeof(struct omap_sham_ctx),
-		.cra_alignmask		= 0,
+		.cra_alignmask		= OMAP_ALIGN_MASK,
 		.cra_module		= THIS_MODULE,
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_init,
 		.cra_init		= omap_sham_cra_init,
 		.cra_exit		= omap_sham_cra_exit,
 		.cra_exit		= omap_sham_cra_exit,
@@ -1440,7 +1552,7 @@ static struct ahash_alg algs_sha224_sha256[] = {
 						CRYPTO_ALG_NEED_FALLBACK,
 						CRYPTO_ALG_NEED_FALLBACK,
 		.cra_blocksize		= SHA224_BLOCK_SIZE,
 		.cra_blocksize		= SHA224_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct omap_sham_ctx),
 		.cra_ctxsize		= sizeof(struct omap_sham_ctx),
-		.cra_alignmask		= 0,
+		.cra_alignmask		= OMAP_ALIGN_MASK,
 		.cra_module		= THIS_MODULE,
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_init,
 		.cra_init		= omap_sham_cra_init,
 		.cra_exit		= omap_sham_cra_exit,
 		.cra_exit		= omap_sham_cra_exit,
@@ -1462,7 +1574,7 @@ static struct ahash_alg algs_sha224_sha256[] = {
 						CRYPTO_ALG_NEED_FALLBACK,
 						CRYPTO_ALG_NEED_FALLBACK,
 		.cra_blocksize		= SHA256_BLOCK_SIZE,
 		.cra_blocksize		= SHA256_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct omap_sham_ctx),
 		.cra_ctxsize		= sizeof(struct omap_sham_ctx),
-		.cra_alignmask		= 0,
+		.cra_alignmask		= OMAP_ALIGN_MASK,
 		.cra_module		= THIS_MODULE,
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_init,
 		.cra_init		= omap_sham_cra_init,
 		.cra_exit		= omap_sham_cra_exit,
 		.cra_exit		= omap_sham_cra_exit,
@@ -1535,7 +1647,7 @@ static struct ahash_alg algs_sha384_sha512[] = {
 						CRYPTO_ALG_NEED_FALLBACK,
 						CRYPTO_ALG_NEED_FALLBACK,
 		.cra_blocksize		= SHA384_BLOCK_SIZE,
 		.cra_blocksize		= SHA384_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct omap_sham_ctx),
 		.cra_ctxsize		= sizeof(struct omap_sham_ctx),
-		.cra_alignmask		= 0,
+		.cra_alignmask		= OMAP_ALIGN_MASK,
 		.cra_module		= THIS_MODULE,
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_init,
 		.cra_init		= omap_sham_cra_init,
 		.cra_exit		= omap_sham_cra_exit,
 		.cra_exit		= omap_sham_cra_exit,
@@ -1557,7 +1669,7 @@ static struct ahash_alg algs_sha384_sha512[] = {
 						CRYPTO_ALG_NEED_FALLBACK,
 						CRYPTO_ALG_NEED_FALLBACK,
 		.cra_blocksize		= SHA512_BLOCK_SIZE,
 		.cra_blocksize		= SHA512_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct omap_sham_ctx),
 		.cra_ctxsize		= sizeof(struct omap_sham_ctx),
-		.cra_alignmask		= 0,
+		.cra_alignmask		= OMAP_ALIGN_MASK,
 		.cra_module		= THIS_MODULE,
 		.cra_module		= THIS_MODULE,
 		.cra_init		= omap_sham_cra_init,
 		.cra_init		= omap_sham_cra_init,
 		.cra_exit		= omap_sham_cra_exit,
 		.cra_exit		= omap_sham_cra_exit,
@@ -1624,12 +1736,8 @@ static void omap_sham_done_task(unsigned long data)
 	}
 	}
 
 
 	if (test_bit(FLAGS_CPU, &dd->flags)) {
 	if (test_bit(FLAGS_CPU, &dd->flags)) {
-		if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags)) {
-			/* hash or semi-hash ready */
-			err = omap_sham_update_cpu(dd);
-			if (err != -EINPROGRESS)
-				goto finish;
-		}
+		if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags))
+			goto finish;
 	} else if (test_bit(FLAGS_DMA_READY, &dd->flags)) {
 	} else if (test_bit(FLAGS_DMA_READY, &dd->flags)) {
 		if (test_and_clear_bit(FLAGS_DMA_ACTIVE, &dd->flags)) {
 		if (test_and_clear_bit(FLAGS_DMA_ACTIVE, &dd->flags)) {
 			omap_sham_update_dma_stop(dd);
 			omap_sham_update_dma_stop(dd);
@@ -1641,8 +1749,6 @@ static void omap_sham_done_task(unsigned long data)
 		if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags)) {
 		if (test_and_clear_bit(FLAGS_OUTPUT_READY, &dd->flags)) {
 			/* hash or semi-hash ready */
 			/* hash or semi-hash ready */
 			clear_bit(FLAGS_DMA_READY, &dd->flags);
 			clear_bit(FLAGS_DMA_READY, &dd->flags);
-			err = omap_sham_update_dma_start(dd);
-			if (err != -EINPROGRESS)
 				goto finish;
 				goto finish;
 		}
 		}
 	}
 	}
@@ -1653,6 +1759,10 @@ finish:
 	dev_dbg(dd->dev, "update done: err: %d\n", err);
 	dev_dbg(dd->dev, "update done: err: %d\n", err);
 	/* finish curent request */
 	/* finish curent request */
 	omap_sham_finish_req(dd->req, err);
 	omap_sham_finish_req(dd->req, err);
+
+	/* If we are not busy, process next req */
+	if (!test_bit(FLAGS_BUSY, &dd->flags))
+		omap_sham_handle_queue(dd, NULL);
 }
 }
 
 
 static irqreturn_t omap_sham_irq_common(struct omap_sham_dev *dd)
 static irqreturn_t omap_sham_irq_common(struct omap_sham_dev *dd)
@@ -1977,8 +2087,14 @@ static int omap_sham_probe(struct platform_device *pdev)
 
 
 	for (i = 0; i < dd->pdata->algs_info_size; i++) {
 	for (i = 0; i < dd->pdata->algs_info_size; i++) {
 		for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
 		for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
-			err = crypto_register_ahash(
-					&dd->pdata->algs_info[i].algs_list[j]);
+			struct ahash_alg *alg;
+
+			alg = &dd->pdata->algs_info[i].algs_list[j];
+			alg->export = omap_sham_export;
+			alg->import = omap_sham_import;
+			alg->halg.statesize = sizeof(struct omap_sham_reqctx) +
+					      BUFLEN;
+			err = crypto_register_ahash(alg);
 			if (err)
 			if (err)
 				goto err_algs;
 				goto err_algs;
 
 

+ 1 - 1
drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.h

@@ -55,7 +55,7 @@
 #define ADF_C3XXX_MAX_ACCELERATORS 3
 #define ADF_C3XXX_MAX_ACCELERATORS 3
 #define ADF_C3XXX_MAX_ACCELENGINES 6
 #define ADF_C3XXX_MAX_ACCELENGINES 6
 #define ADF_C3XXX_ACCELERATORS_REG_OFFSET 16
 #define ADF_C3XXX_ACCELERATORS_REG_OFFSET 16
-#define ADF_C3XXX_ACCELERATORS_MASK 0x3
+#define ADF_C3XXX_ACCELERATORS_MASK 0x7
 #define ADF_C3XXX_ACCELENGINES_MASK 0x3F
 #define ADF_C3XXX_ACCELENGINES_MASK 0x3F
 #define ADF_C3XXX_ETR_MAX_BANKS 16
 #define ADF_C3XXX_ETR_MAX_BANKS 16
 #define ADF_C3XXX_SMIAPF0_MASK_OFFSET (0x3A000 + 0x28)
 #define ADF_C3XXX_SMIAPF0_MASK_OFFSET (0x3A000 + 0x28)

+ 12 - 8
drivers/crypto/qat/qat_common/adf_admin.c

@@ -146,6 +146,7 @@ struct adf_admin_comms {
 	dma_addr_t phy_addr;
 	dma_addr_t phy_addr;
 	dma_addr_t const_tbl_addr;
 	dma_addr_t const_tbl_addr;
 	void *virt_addr;
 	void *virt_addr;
+	void *virt_tbl_addr;
 	void __iomem *mailbox_addr;
 	void __iomem *mailbox_addr;
 	struct mutex lock;	/* protects adf_admin_comms struct */
 	struct mutex lock;	/* protects adf_admin_comms struct */
 };
 };
@@ -251,17 +252,19 @@ int adf_init_admin_comms(struct adf_accel_dev *accel_dev)
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 
 
-	admin->const_tbl_addr = dma_map_single(&GET_DEV(accel_dev),
-					       (void *) const_tab, 1024,
-					       DMA_TO_DEVICE);
-
-	if (unlikely(dma_mapping_error(&GET_DEV(accel_dev),
-				       admin->const_tbl_addr))) {
+	admin->virt_tbl_addr = dma_zalloc_coherent(&GET_DEV(accel_dev),
+						   PAGE_SIZE,
+						   &admin->const_tbl_addr,
+						   GFP_KERNEL);
+	if (!admin->virt_tbl_addr) {
+		dev_err(&GET_DEV(accel_dev), "Failed to allocate const_tbl\n");
 		dma_free_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
 		dma_free_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
 				  admin->virt_addr, admin->phy_addr);
 				  admin->virt_addr, admin->phy_addr);
 		kfree(admin);
 		kfree(admin);
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
+
+	memcpy(admin->virt_tbl_addr, const_tab, sizeof(const_tab));
 	reg_val = (u64)admin->phy_addr;
 	reg_val = (u64)admin->phy_addr;
 	ADF_CSR_WR(csr, ADF_DH895XCC_ADMINMSGUR_OFFSET, reg_val >> 32);
 	ADF_CSR_WR(csr, ADF_DH895XCC_ADMINMSGUR_OFFSET, reg_val >> 32);
 	ADF_CSR_WR(csr, ADF_DH895XCC_ADMINMSGLR_OFFSET, reg_val);
 	ADF_CSR_WR(csr, ADF_DH895XCC_ADMINMSGLR_OFFSET, reg_val);
@@ -282,9 +285,10 @@ void adf_exit_admin_comms(struct adf_accel_dev *accel_dev)
 	if (admin->virt_addr)
 	if (admin->virt_addr)
 		dma_free_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
 		dma_free_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
 				  admin->virt_addr, admin->phy_addr);
 				  admin->virt_addr, admin->phy_addr);
+	if (admin->virt_tbl_addr)
+		dma_free_coherent(&GET_DEV(accel_dev), PAGE_SIZE,
+				  admin->virt_tbl_addr, admin->const_tbl_addr);
 
 
-	dma_unmap_single(&GET_DEV(accel_dev), admin->const_tbl_addr, 1024,
-			 DMA_TO_DEVICE);
 	mutex_destroy(&admin->lock);
 	mutex_destroy(&admin->lock);
 	kfree(admin);
 	kfree(admin);
 	accel_dev->admin = NULL;
 	accel_dev->admin = NULL;

+ 4 - 4
drivers/crypto/qat/qat_common/qat_uclo.c

@@ -967,10 +967,6 @@ static int qat_uclo_parse_uof_obj(struct icp_qat_fw_loader_handle *handle)
 	struct icp_qat_uclo_objhandle *obj_handle = handle->obj_handle;
 	struct icp_qat_uclo_objhandle *obj_handle = handle->obj_handle;
 	unsigned int ae;
 	unsigned int ae;
 
 
-	obj_handle->uword_buf = kcalloc(UWORD_CPYBUF_SIZE, sizeof(uint64_t),
-					GFP_KERNEL);
-	if (!obj_handle->uword_buf)
-		return -ENOMEM;
 	obj_handle->encap_uof_obj.beg_uof = obj_handle->obj_hdr->file_buff;
 	obj_handle->encap_uof_obj.beg_uof = obj_handle->obj_hdr->file_buff;
 	obj_handle->encap_uof_obj.obj_hdr = (struct icp_qat_uof_objhdr *)
 	obj_handle->encap_uof_obj.obj_hdr = (struct icp_qat_uof_objhdr *)
 					     obj_handle->obj_hdr->file_buff;
 					     obj_handle->obj_hdr->file_buff;
@@ -982,6 +978,10 @@ static int qat_uclo_parse_uof_obj(struct icp_qat_fw_loader_handle *handle)
 		pr_err("QAT: UOF incompatible\n");
 		pr_err("QAT: UOF incompatible\n");
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
+	obj_handle->uword_buf = kcalloc(UWORD_CPYBUF_SIZE, sizeof(uint64_t),
+					GFP_KERNEL);
+	if (!obj_handle->uword_buf)
+		return -ENOMEM;
 	obj_handle->ustore_phy_size = ICP_QAT_UCLO_MAX_USTORE;
 	obj_handle->ustore_phy_size = ICP_QAT_UCLO_MAX_USTORE;
 	if (!obj_handle->obj_hdr->file_buff ||
 	if (!obj_handle->obj_hdr->file_buff ||
 	    !qat_uclo_map_str_table(obj_handle->obj_hdr, ICP_QAT_UOF_STRT,
 	    !qat_uclo_map_str_table(obj_handle->obj_hdr, ICP_QAT_UOF_STRT,

+ 2 - 4
drivers/crypto/rockchip/rk3288_crypto.c

@@ -304,11 +304,9 @@ static int rk_crypto_probe(struct platform_device *pdev)
 	usleep_range(10, 20);
 	usleep_range(10, 20);
 	reset_control_deassert(crypto_info->rst);
 	reset_control_deassert(crypto_info->rst);
 
 
-	err = devm_add_action(dev, rk_crypto_action, crypto_info);
-	if (err) {
-		reset_control_assert(crypto_info->rst);
+	err = devm_add_action_or_reset(dev, rk_crypto_action, crypto_info);
+	if (err)
 		goto err_crypto;
 		goto err_crypto;
-	}
 
 
 	spin_lock_init(&crypto_info->lock);
 	spin_lock_init(&crypto_info->lock);
 
 

+ 4 - 2
drivers/crypto/sunxi-ss/sun4i-ss-cipher.c

@@ -29,7 +29,8 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq)
 	u32 tx_cnt = 0;
 	u32 tx_cnt = 0;
 	u32 spaces;
 	u32 spaces;
 	u32 v;
 	u32 v;
-	int i, err = 0;
+	int err = 0;
+	unsigned int i;
 	unsigned int ileft = areq->nbytes;
 	unsigned int ileft = areq->nbytes;
 	unsigned int oleft = areq->nbytes;
 	unsigned int oleft = areq->nbytes;
 	unsigned int todo;
 	unsigned int todo;
@@ -139,7 +140,8 @@ static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
 	u32 tx_cnt = 0;
 	u32 tx_cnt = 0;
 	u32 v;
 	u32 v;
 	u32 spaces;
 	u32 spaces;
-	int i, err = 0;
+	int err = 0;
+	unsigned int i;
 	unsigned int ileft = areq->nbytes;
 	unsigned int ileft = areq->nbytes;
 	unsigned int oleft = areq->nbytes;
 	unsigned int oleft = areq->nbytes;
 	unsigned int todo;
 	unsigned int todo;

+ 34 - 34
drivers/crypto/sunxi-ss/sun4i-ss-core.c

@@ -172,45 +172,45 @@ static struct sun4i_ss_alg_template ss_algs[] = {
 },
 },
 {       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
 {       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
 	.alg.crypto = {
 	.alg.crypto = {
-			.cra_name = "cbc(des3_ede)",
-			.cra_driver_name = "cbc-des3-sun4i-ss",
-			.cra_priority = 300,
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
-			.cra_ctxsize = sizeof(struct sun4i_req_ctx),
-			.cra_module = THIS_MODULE,
-			.cra_alignmask = 3,
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_init = sun4i_ss_cipher_init,
-			.cra_u.ablkcipher = {
-				.min_keysize    = DES3_EDE_KEY_SIZE,
-				.max_keysize    = DES3_EDE_KEY_SIZE,
-				.ivsize         = DES3_EDE_BLOCK_SIZE,
-				.setkey         = sun4i_ss_des3_setkey,
-				.encrypt        = sun4i_ss_cbc_des3_encrypt,
-				.decrypt        = sun4i_ss_cbc_des3_decrypt,
+		.cra_name = "cbc(des3_ede)",
+		.cra_driver_name = "cbc-des3-sun4i-ss",
+		.cra_priority = 300,
+		.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+		.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.cra_ctxsize = sizeof(struct sun4i_req_ctx),
+		.cra_module = THIS_MODULE,
+		.cra_alignmask = 3,
+		.cra_type = &crypto_ablkcipher_type,
+		.cra_init = sun4i_ss_cipher_init,
+		.cra_u.ablkcipher = {
+			.min_keysize    = DES3_EDE_KEY_SIZE,
+			.max_keysize    = DES3_EDE_KEY_SIZE,
+			.ivsize         = DES3_EDE_BLOCK_SIZE,
+			.setkey         = sun4i_ss_des3_setkey,
+			.encrypt        = sun4i_ss_cbc_des3_encrypt,
+			.decrypt        = sun4i_ss_cbc_des3_decrypt,
 		}
 		}
 	}
 	}
 },
 },
 {       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
 {       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
 	.alg.crypto = {
 	.alg.crypto = {
-			.cra_name = "ecb(des3_ede)",
-			.cra_driver_name = "ecb-des3-sun4i-ss",
-			.cra_priority = 300,
-			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
-			.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
-			.cra_ctxsize = sizeof(struct sun4i_req_ctx),
-			.cra_module = THIS_MODULE,
-			.cra_alignmask = 3,
-			.cra_type = &crypto_ablkcipher_type,
-			.cra_init = sun4i_ss_cipher_init,
-			.cra_u.ablkcipher = {
-				.min_keysize    = DES3_EDE_KEY_SIZE,
-				.max_keysize    = DES3_EDE_KEY_SIZE,
-				.ivsize         = DES3_EDE_BLOCK_SIZE,
-				.setkey         = sun4i_ss_des3_setkey,
-				.encrypt        = sun4i_ss_ecb_des3_encrypt,
-				.decrypt        = sun4i_ss_ecb_des3_decrypt,
+		.cra_name = "ecb(des3_ede)",
+		.cra_driver_name = "ecb-des3-sun4i-ss",
+		.cra_priority = 300,
+		.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+		.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.cra_ctxsize = sizeof(struct sun4i_req_ctx),
+		.cra_module = THIS_MODULE,
+		.cra_alignmask = 3,
+		.cra_type = &crypto_ablkcipher_type,
+		.cra_init = sun4i_ss_cipher_init,
+		.cra_u.ablkcipher = {
+			.min_keysize    = DES3_EDE_KEY_SIZE,
+			.max_keysize    = DES3_EDE_KEY_SIZE,
+			.ivsize         = DES3_EDE_BLOCK_SIZE,
+			.setkey         = sun4i_ss_des3_setkey,
+			.encrypt        = sun4i_ss_ecb_des3_encrypt,
+			.decrypt        = sun4i_ss_ecb_des3_decrypt,
 		}
 		}
 	}
 	}
 },
 },

+ 97 - 68
drivers/crypto/sunxi-ss/sun4i-ss-hash.c

@@ -20,6 +20,15 @@
 
 
 int sun4i_hash_crainit(struct crypto_tfm *tfm)
 int sun4i_hash_crainit(struct crypto_tfm *tfm)
 {
 {
+	struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
+	struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
+	struct sun4i_ss_alg_template *algt;
+
+	memset(op, 0, sizeof(struct sun4i_tfm_ctx));
+
+	algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
+	op->ss = algt->ss;
+
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 				 sizeof(struct sun4i_req_ctx));
 				 sizeof(struct sun4i_req_ctx));
 	return 0;
 	return 0;
@@ -32,13 +41,10 @@ int sun4i_hash_init(struct ahash_request *areq)
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
 	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
 	struct sun4i_ss_alg_template *algt;
 	struct sun4i_ss_alg_template *algt;
-	struct sun4i_ss_ctx *ss;
 
 
 	memset(op, 0, sizeof(struct sun4i_req_ctx));
 	memset(op, 0, sizeof(struct sun4i_req_ctx));
 
 
 	algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
 	algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
-	ss = algt->ss;
-	op->ss = algt->ss;
 	op->mode = algt->mode;
 	op->mode = algt->mode;
 
 
 	return 0;
 	return 0;
@@ -129,6 +135,9 @@ int sun4i_hash_import_sha1(struct ahash_request *areq, const void *in)
 	return 0;
 	return 0;
 }
 }
 
 
+#define SS_HASH_UPDATE 1
+#define SS_HASH_FINAL 2
+
 /*
 /*
  * sun4i_hash_update: update hash engine
  * sun4i_hash_update: update hash engine
  *
  *
@@ -156,7 +165,7 @@ int sun4i_hash_import_sha1(struct ahash_request *areq, const void *in)
  * write remaining data in op->buf
  * write remaining data in op->buf
  * final state op->len=56
  * final state op->len=56
  */
  */
-int sun4i_hash_update(struct ahash_request *areq)
+static int sun4i_hash(struct ahash_request *areq)
 {
 {
 	u32 v, ivmode = 0;
 	u32 v, ivmode = 0;
 	unsigned int i = 0;
 	unsigned int i = 0;
@@ -167,8 +176,9 @@ int sun4i_hash_update(struct ahash_request *areq)
 	 */
 	 */
 
 
 	struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 	struct sun4i_req_ctx *op = ahash_request_ctx(areq);
-	struct sun4i_ss_ctx *ss = op->ss;
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct sun4i_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
+	struct sun4i_ss_ctx *ss = tfmctx->ss;
 	unsigned int in_i = 0; /* advancement in the current SG */
 	unsigned int in_i = 0; /* advancement in the current SG */
 	unsigned int end;
 	unsigned int end;
 	/*
 	/*
@@ -180,22 +190,30 @@ int sun4i_hash_update(struct ahash_request *areq)
 	u32 spaces, rx_cnt = SS_RX_DEFAULT;
 	u32 spaces, rx_cnt = SS_RX_DEFAULT;
 	size_t copied = 0;
 	size_t copied = 0;
 	struct sg_mapping_iter mi;
 	struct sg_mapping_iter mi;
+	unsigned int j = 0;
+	int zeros;
+	unsigned int index, padlen;
+	__be64 bits;
+	u32 bf[32];
+	u32 wb = 0;
+	unsigned int nwait, nbw = 0;
+	struct scatterlist *in_sg = areq->src;
 
 
 	dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x",
 	dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x",
 		__func__, crypto_tfm_alg_name(areq->base.tfm),
 		__func__, crypto_tfm_alg_name(areq->base.tfm),
 		op->byte_count, areq->nbytes, op->mode,
 		op->byte_count, areq->nbytes, op->mode,
 		op->len, op->hash[0]);
 		op->len, op->hash[0]);
 
 
-	if (areq->nbytes == 0)
+	if (unlikely(areq->nbytes == 0) && (op->flags & SS_HASH_FINAL) == 0)
 		return 0;
 		return 0;
 
 
 	/* protect against overflow */
 	/* protect against overflow */
-	if (areq->nbytes > UINT_MAX - op->len) {
+	if (unlikely(areq->nbytes > UINT_MAX - op->len)) {
 		dev_err(ss->dev, "Cannot process too large request\n");
 		dev_err(ss->dev, "Cannot process too large request\n");
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	if (op->len + areq->nbytes < 64) {
+	if (op->len + areq->nbytes < 64 && (op->flags & SS_HASH_FINAL) == 0) {
 		/* linearize data to op->buf */
 		/* linearize data to op->buf */
 		copied = sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
 		copied = sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
 					    op->buf + op->len, areq->nbytes, 0);
 					    op->buf + op->len, areq->nbytes, 0);
@@ -203,14 +221,6 @@ int sun4i_hash_update(struct ahash_request *areq)
 		return 0;
 		return 0;
 	}
 	}
 
 
-	end = ((areq->nbytes + op->len) / 64) * 64 - op->len;
-
-	if (end > areq->nbytes || areq->nbytes - end > 63) {
-		dev_err(ss->dev, "ERROR: Bound error %u %u\n",
-			end, areq->nbytes);
-		return -EINVAL;
-	}
-
 	spin_lock_bh(&ss->slock);
 	spin_lock_bh(&ss->slock);
 
 
 	/*
 	/*
@@ -225,6 +235,34 @@ int sun4i_hash_update(struct ahash_request *areq)
 	/* Enable the device */
 	/* Enable the device */
 	writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL);
 	writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL);
 
 
+	if ((op->flags & SS_HASH_UPDATE) == 0)
+		goto hash_final;
+
+	/* start of handling data */
+	if ((op->flags & SS_HASH_FINAL) == 0) {
+		end = ((areq->nbytes + op->len) / 64) * 64 - op->len;
+
+		if (end > areq->nbytes || areq->nbytes - end > 63) {
+			dev_err(ss->dev, "ERROR: Bound error %u %u\n",
+				end, areq->nbytes);
+			err = -EINVAL;
+			goto release_ss;
+		}
+	} else {
+		/* Since we have the flag final, we can go up to modulo 4 */
+		end = ((areq->nbytes + op->len) / 4) * 4 - op->len;
+	}
+
+	/* TODO if SGlen % 4 and op->len == 0 then DMA */
+	i = 1;
+	while (in_sg && i == 1) {
+		if ((in_sg->length % 4) != 0)
+			i = 0;
+		in_sg = sg_next(in_sg);
+	}
+	if (i == 1 && op->len == 0)
+		dev_dbg(ss->dev, "We can DMA\n");
+
 	i = 0;
 	i = 0;
 	sg_miter_start(&mi, areq->src, sg_nents(areq->src),
 	sg_miter_start(&mi, areq->src, sg_nents(areq->src),
 		       SG_MITER_FROM_SG | SG_MITER_ATOMIC);
 		       SG_MITER_FROM_SG | SG_MITER_ATOMIC);
@@ -285,7 +323,11 @@ int sun4i_hash_update(struct ahash_request *areq)
 			}
 			}
 		}
 		}
 	} while (i < end);
 	} while (i < end);
-	/* final linear */
+
+	/*
+	 * Now we have written to the device all that we can,
+	 * store the remaining bytes in op->buf
+	 */
 	if ((areq->nbytes - i) < 64) {
 	if ((areq->nbytes - i) < 64) {
 		while (i < areq->nbytes && in_i < mi.length && op->len < 64) {
 		while (i < areq->nbytes && in_i < mi.length && op->len < 64) {
 			/* how many bytes we can read from current SG */
 			/* how many bytes we can read from current SG */
@@ -304,13 +346,21 @@ int sun4i_hash_update(struct ahash_request *areq)
 
 
 	sg_miter_stop(&mi);
 	sg_miter_stop(&mi);
 
 
+	/*
+	 * End of data process
+	 * Now if we have the flag final go to finalize part
+	 * If not, store the partial hash
+	 */
+	if ((op->flags & SS_HASH_FINAL) > 0)
+		goto hash_final;
+
 	writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
 	writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
 	i = 0;
 	i = 0;
 	do {
 	do {
 		v = readl(ss->base + SS_CTL);
 		v = readl(ss->base + SS_CTL);
 		i++;
 		i++;
 	} while (i < SS_TIMEOUT && (v & SS_DATA_END) > 0);
 	} while (i < SS_TIMEOUT && (v & SS_DATA_END) > 0);
-	if (i >= SS_TIMEOUT) {
+	if (unlikely(i >= SS_TIMEOUT)) {
 		dev_err_ratelimited(ss->dev,
 		dev_err_ratelimited(ss->dev,
 				    "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
 				    "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
 				    i, SS_TIMEOUT, v, areq->nbytes);
 				    i, SS_TIMEOUT, v, areq->nbytes);
@@ -318,56 +368,24 @@ int sun4i_hash_update(struct ahash_request *areq)
 		goto release_ss;
 		goto release_ss;
 	}
 	}
 
 
-	/* get the partial hash only if something was written */
 	for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
 	for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
 		op->hash[i] = readl(ss->base + SS_MD0 + i * 4);
 		op->hash[i] = readl(ss->base + SS_MD0 + i * 4);
 
 
-release_ss:
-	writel(0, ss->base + SS_CTL);
-	spin_unlock_bh(&ss->slock);
-	return err;
-}
+	goto release_ss;
 
 
 /*
 /*
- * sun4i_hash_final: finalize hashing operation
+ * hash_final: finalize hashing operation
  *
  *
  * If we have some remaining bytes, we write them.
  * If we have some remaining bytes, we write them.
  * Then ask the SS for finalizing the hashing operation
  * Then ask the SS for finalizing the hashing operation
  *
  *
  * I do not check RX FIFO size in this function since the size is 32
  * I do not check RX FIFO size in this function since the size is 32
  * after each enabling and this function neither write more than 32 words.
  * after each enabling and this function neither write more than 32 words.
+ * If we come from the update part, we cannot have more than
+ * 3 remaining bytes to write and SS is fast enough to not care about it.
  */
  */
-int sun4i_hash_final(struct ahash_request *areq)
-{
-	u32 v, ivmode = 0;
-	unsigned int i;
-	unsigned int j = 0;
-	int zeros, err = 0;
-	unsigned int index, padlen;
-	__be64 bits;
-	struct sun4i_req_ctx *op = ahash_request_ctx(areq);
-	struct sun4i_ss_ctx *ss = op->ss;
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
-	u32 bf[32];
-	u32 wb = 0;
-	unsigned int nwait, nbw = 0;
-
-	dev_dbg(ss->dev, "%s: byte=%llu len=%u mode=%x wl=%u h=%x",
-		__func__, op->byte_count, areq->nbytes, op->mode,
-		op->len, op->hash[0]);
 
 
-	spin_lock_bh(&ss->slock);
-
-	/*
-	 * if we have already written something,
-	 * restore the partial hash state
-	 */
-	if (op->byte_count > 0) {
-		ivmode = SS_IV_ARBITRARY;
-		for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
-			writel(op->hash[i], ss->base + SS_IV0 + i * 4);
-	}
-	writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL);
+hash_final:
 
 
 	/* write the remaining words of the wait buffer */
 	/* write the remaining words of the wait buffer */
 	if (op->len > 0) {
 	if (op->len > 0) {
@@ -428,7 +446,7 @@ int sun4i_hash_final(struct ahash_request *areq)
 
 
 	/*
 	/*
 	 * Wait for SS to finish the hash.
 	 * Wait for SS to finish the hash.
-	 * The timeout could happen only in case of bad overcloking
+	 * The timeout could happen only in case of bad overclocking
 	 * or driver bug.
 	 * or driver bug.
 	 */
 	 */
 	i = 0;
 	i = 0;
@@ -436,7 +454,7 @@ int sun4i_hash_final(struct ahash_request *areq)
 		v = readl(ss->base + SS_CTL);
 		v = readl(ss->base + SS_CTL);
 		i++;
 		i++;
 	} while (i < SS_TIMEOUT && (v & SS_DATA_END) > 0);
 	} while (i < SS_TIMEOUT && (v & SS_DATA_END) > 0);
-	if (i >= SS_TIMEOUT) {
+	if (unlikely(i >= SS_TIMEOUT)) {
 		dev_err_ratelimited(ss->dev,
 		dev_err_ratelimited(ss->dev,
 				    "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
 				    "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
 				    i, SS_TIMEOUT, v, areq->nbytes);
 				    i, SS_TIMEOUT, v, areq->nbytes);
@@ -463,30 +481,41 @@ release_ss:
 	return err;
 	return err;
 }
 }
 
 
+int sun4i_hash_final(struct ahash_request *areq)
+{
+	struct sun4i_req_ctx *op = ahash_request_ctx(areq);
+
+	op->flags = SS_HASH_FINAL;
+	return sun4i_hash(areq);
+}
+
+int sun4i_hash_update(struct ahash_request *areq)
+{
+	struct sun4i_req_ctx *op = ahash_request_ctx(areq);
+
+	op->flags = SS_HASH_UPDATE;
+	return sun4i_hash(areq);
+}
+
 /* sun4i_hash_finup: finalize hashing operation after an update */
 /* sun4i_hash_finup: finalize hashing operation after an update */
 int sun4i_hash_finup(struct ahash_request *areq)
 int sun4i_hash_finup(struct ahash_request *areq)
 {
 {
-	int err;
-
-	err = sun4i_hash_update(areq);
-	if (err != 0)
-		return err;
+	struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 
 
-	return sun4i_hash_final(areq);
+	op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
+	return sun4i_hash(areq);
 }
 }
 
 
 /* combo of init/update/final functions */
 /* combo of init/update/final functions */
 int sun4i_hash_digest(struct ahash_request *areq)
 int sun4i_hash_digest(struct ahash_request *areq)
 {
 {
 	int err;
 	int err;
+	struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 
 
 	err = sun4i_hash_init(areq);
 	err = sun4i_hash_init(areq);
 	if (err != 0)
 	if (err != 0)
 		return err;
 		return err;
 
 
-	err = sun4i_hash_update(areq);
-	if (err != 0)
-		return err;
-
-	return sun4i_hash_final(areq);
+	op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
+	return sun4i_hash(areq);
 }
 }

+ 1 - 1
drivers/crypto/sunxi-ss/sun4i-ss.h

@@ -163,7 +163,7 @@ struct sun4i_req_ctx {
 	u32 hash[5]; /* for storing SS_IVx register */
 	u32 hash[5]; /* for storing SS_IVx register */
 	char buf[64];
 	char buf[64];
 	unsigned int len;
 	unsigned int len;
-	struct sun4i_ss_ctx *ss;
+	int flags;
 };
 };
 
 
 int sun4i_hash_crainit(struct crypto_tfm *tfm);
 int sun4i_hash_crainit(struct crypto_tfm *tfm);

+ 1 - 0
drivers/crypto/vmx/Kconfig

@@ -1,6 +1,7 @@
 config CRYPTO_DEV_VMX_ENCRYPT
 config CRYPTO_DEV_VMX_ENCRYPT
 	tristate "Encryption acceleration support on P8 CPU"
 	tristate "Encryption acceleration support on P8 CPU"
 	depends on CRYPTO_DEV_VMX
 	depends on CRYPTO_DEV_VMX
+	select CRYPTO_GHASH
 	default m
 	default m
 	help
 	help
 	  Support for VMX cryptographic acceleration instructions on Power8 CPU.
 	  Support for VMX cryptographic acceleration instructions on Power8 CPU.

+ 16 - 15
drivers/crypto/vmx/ghash.c

@@ -26,16 +26,13 @@
 #include <linux/hardirq.h>
 #include <linux/hardirq.h>
 #include <asm/switch_to.h>
 #include <asm/switch_to.h>
 #include <crypto/aes.h>
 #include <crypto/aes.h>
+#include <crypto/ghash.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/hash.h>
 #include <crypto/b128ops.h>
 #include <crypto/b128ops.h>
 
 
 #define IN_INTERRUPT in_interrupt()
 #define IN_INTERRUPT in_interrupt()
 
 
-#define GHASH_BLOCK_SIZE (16)
-#define GHASH_DIGEST_SIZE (16)
-#define GHASH_KEY_LEN (16)
-
 void gcm_init_p8(u128 htable[16], const u64 Xi[2]);
 void gcm_init_p8(u128 htable[16], const u64 Xi[2]);
 void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]);
 void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]);
 void gcm_ghash_p8(u64 Xi[2], const u128 htable[16],
 void gcm_ghash_p8(u64 Xi[2], const u128 htable[16],
@@ -55,16 +52,11 @@ struct p8_ghash_desc_ctx {
 
 
 static int p8_ghash_init_tfm(struct crypto_tfm *tfm)
 static int p8_ghash_init_tfm(struct crypto_tfm *tfm)
 {
 {
-	const char *alg;
+	const char *alg = "ghash-generic";
 	struct crypto_shash *fallback;
 	struct crypto_shash *fallback;
 	struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm);
 	struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm);
 	struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm);
 
 
-	if (!(alg = crypto_tfm_alg_name(tfm))) {
-		printk(KERN_ERR "Failed to get algorithm name.\n");
-		return -ENOENT;
-	}
-
 	fallback = crypto_alloc_shash(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
 	fallback = crypto_alloc_shash(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(fallback)) {
 	if (IS_ERR(fallback)) {
 		printk(KERN_ERR
 		printk(KERN_ERR
@@ -78,10 +70,18 @@ static int p8_ghash_init_tfm(struct crypto_tfm *tfm)
 	crypto_shash_set_flags(fallback,
 	crypto_shash_set_flags(fallback,
 			       crypto_shash_get_flags((struct crypto_shash
 			       crypto_shash_get_flags((struct crypto_shash
 						       *) tfm));
 						       *) tfm));
-	ctx->fallback = fallback;
 
 
-	shash_tfm->descsize = sizeof(struct p8_ghash_desc_ctx)
-	    + crypto_shash_descsize(fallback);
+	/* Check if the descsize defined in the algorithm is still enough. */
+	if (shash_tfm->descsize < sizeof(struct p8_ghash_desc_ctx)
+	    + crypto_shash_descsize(fallback)) {
+		printk(KERN_ERR
+		       "Desc size of the fallback implementation (%s) does not match the expected value: %lu vs %u\n",
+		       alg,
+		       shash_tfm->descsize - sizeof(struct p8_ghash_desc_ctx),
+		       crypto_shash_descsize(fallback));
+		return -EINVAL;
+	}
+	ctx->fallback = fallback;
 
 
 	return 0;
 	return 0;
 }
 }
@@ -113,7 +113,7 @@ static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
 {
 {
 	struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm));
 	struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm));
 
 
-	if (keylen != GHASH_KEY_LEN)
+	if (keylen != GHASH_BLOCK_SIZE)
 		return -EINVAL;
 		return -EINVAL;
 
 
 	preempt_disable();
 	preempt_disable();
@@ -211,7 +211,8 @@ struct shash_alg p8_ghash_alg = {
 	.update = p8_ghash_update,
 	.update = p8_ghash_update,
 	.final = p8_ghash_final,
 	.final = p8_ghash_final,
 	.setkey = p8_ghash_setkey,
 	.setkey = p8_ghash_setkey,
-	.descsize = sizeof(struct p8_ghash_desc_ctx),
+	.descsize = sizeof(struct p8_ghash_desc_ctx)
+		+ sizeof(struct ghash_desc_ctx),
 	.base = {
 	.base = {
 		 .cra_name = "ghash",
 		 .cra_name = "ghash",
 		 .cra_driver_name = "p8_ghash",
 		 .cra_driver_name = "p8_ghash",

+ 11 - 0
drivers/pci/quirks.c

@@ -834,6 +834,17 @@ static void quirk_amd_ioapic(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_VIPER_7410,	quirk_amd_ioapic);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD,	PCI_DEVICE_ID_AMD_VIPER_7410,	quirk_amd_ioapic);
 #endif /* CONFIG_X86_IO_APIC */
 #endif /* CONFIG_X86_IO_APIC */
 
 
+#if defined(CONFIG_ARM64) && defined(CONFIG_PCI_ATS)
+
+static void quirk_cavium_sriov_rnm_link(struct pci_dev *dev)
+{
+	/* Fix for improper SRIOV configuration on Cavium cn88xx  RNM device */
+	if (dev->subsystem_device == 0xa118)
+		dev->sriov->link = dev->devfn;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CAVIUM, 0xa018, quirk_cavium_sriov_rnm_link);
+#endif
+
 /*
 /*
  * Some settings of MMRBC can lead to data corruption so block changes.
  * Some settings of MMRBC can lead to data corruption so block changes.
  * See AMD 8131 HyperTransport PCI-X Tunnel Revision Guide
  * See AMD 8131 HyperTransport PCI-X Tunnel Revision Guide

+ 0 - 70
include/crypto/algapi.h

@@ -15,7 +15,6 @@
 #include <linux/crypto.h>
 #include <linux/crypto.h>
 #include <linux/list.h>
 #include <linux/list.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
-#include <linux/kthread.h>
 #include <linux/skbuff.h>
 #include <linux/skbuff.h>
 
 
 struct crypto_aead;
 struct crypto_aead;
@@ -129,75 +128,6 @@ struct ablkcipher_walk {
 	unsigned int		blocksize;
 	unsigned int		blocksize;
 };
 };
 
 
-#define ENGINE_NAME_LEN	30
-/*
- * struct crypto_engine - crypto hardware engine
- * @name: the engine name
- * @idling: the engine is entering idle state
- * @busy: request pump is busy
- * @running: the engine is on working
- * @cur_req_prepared: current request is prepared
- * @list: link with the global crypto engine list
- * @queue_lock: spinlock to syncronise access to request queue
- * @queue: the crypto queue of the engine
- * @rt: whether this queue is set to run as a realtime task
- * @prepare_crypt_hardware: a request will soon arrive from the queue
- * so the subsystem requests the driver to prepare the hardware
- * by issuing this call
- * @unprepare_crypt_hardware: there are currently no more requests on the
- * queue so the subsystem notifies the driver that it may relax the
- * hardware by issuing this call
- * @prepare_request: do some prepare if need before handle the current request
- * @unprepare_request: undo any work done by prepare_message()
- * @crypt_one_request: do encryption for current request
- * @kworker: thread struct for request pump
- * @kworker_task: pointer to task for request pump kworker thread
- * @pump_requests: work struct for scheduling work to the request pump
- * @priv_data: the engine private data
- * @cur_req: the current request which is on processing
- */
-struct crypto_engine {
-	char			name[ENGINE_NAME_LEN];
-	bool			idling;
-	bool			busy;
-	bool			running;
-	bool			cur_req_prepared;
-
-	struct list_head	list;
-	spinlock_t		queue_lock;
-	struct crypto_queue	queue;
-
-	bool			rt;
-
-	int (*prepare_crypt_hardware)(struct crypto_engine *engine);
-	int (*unprepare_crypt_hardware)(struct crypto_engine *engine);
-
-	int (*prepare_request)(struct crypto_engine *engine,
-			       struct ablkcipher_request *req);
-	int (*unprepare_request)(struct crypto_engine *engine,
-				 struct ablkcipher_request *req);
-	int (*crypt_one_request)(struct crypto_engine *engine,
-				 struct ablkcipher_request *req);
-
-	struct kthread_worker           kworker;
-	struct task_struct              *kworker_task;
-	struct kthread_work             pump_requests;
-
-	void				*priv_data;
-	struct ablkcipher_request	*cur_req;
-};
-
-int crypto_transfer_request(struct crypto_engine *engine,
-			    struct ablkcipher_request *req, bool need_pump);
-int crypto_transfer_request_to_engine(struct crypto_engine *engine,
-				      struct ablkcipher_request *req);
-void crypto_finalize_request(struct crypto_engine *engine,
-			     struct ablkcipher_request *req, int err);
-int crypto_engine_start(struct crypto_engine *engine);
-int crypto_engine_stop(struct crypto_engine *engine);
-struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt);
-int crypto_engine_exit(struct crypto_engine *engine);
-
 extern const struct crypto_type crypto_ablkcipher_type;
 extern const struct crypto_type crypto_ablkcipher_type;
 extern const struct crypto_type crypto_blkcipher_type;
 extern const struct crypto_type crypto_blkcipher_type;
 
 

+ 107 - 0
include/crypto/engine.h

@@ -0,0 +1,107 @@
+/*
+ * Crypto engine API
+ *
+ * Copyright (c) 2016 Baolin Wang <baolin.wang@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_ENGINE_H
+#define _CRYPTO_ENGINE_H
+
+#include <linux/crypto.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+
+#define ENGINE_NAME_LEN	30
+/*
+ * struct crypto_engine - crypto hardware engine
+ * @name: the engine name
+ * @idling: the engine is entering idle state
+ * @busy: request pump is busy
+ * @running: the engine is on working
+ * @cur_req_prepared: current request is prepared
+ * @list: link with the global crypto engine list
+ * @queue_lock: spinlock to syncronise access to request queue
+ * @queue: the crypto queue of the engine
+ * @rt: whether this queue is set to run as a realtime task
+ * @prepare_crypt_hardware: a request will soon arrive from the queue
+ * so the subsystem requests the driver to prepare the hardware
+ * by issuing this call
+ * @unprepare_crypt_hardware: there are currently no more requests on the
+ * queue so the subsystem notifies the driver that it may relax the
+ * hardware by issuing this call
+ * @prepare_cipher_request: do some prepare if need before handle the current request
+ * @unprepare_cipher_request: undo any work done by prepare_cipher_request()
+ * @cipher_one_request: do encryption for current request
+ * @prepare_hash_request: do some prepare if need before handle the current request
+ * @unprepare_hash_request: undo any work done by prepare_hash_request()
+ * @hash_one_request: do hash for current request
+ * @kworker: thread struct for request pump
+ * @kworker_task: pointer to task for request pump kworker thread
+ * @pump_requests: work struct for scheduling work to the request pump
+ * @priv_data: the engine private data
+ * @cur_req: the current request which is on processing
+ */
+struct crypto_engine {
+	char			name[ENGINE_NAME_LEN];
+	bool			idling;
+	bool			busy;
+	bool			running;
+	bool			cur_req_prepared;
+
+	struct list_head	list;
+	spinlock_t		queue_lock;
+	struct crypto_queue	queue;
+
+	bool			rt;
+
+	int (*prepare_crypt_hardware)(struct crypto_engine *engine);
+	int (*unprepare_crypt_hardware)(struct crypto_engine *engine);
+
+	int (*prepare_cipher_request)(struct crypto_engine *engine,
+				      struct ablkcipher_request *req);
+	int (*unprepare_cipher_request)(struct crypto_engine *engine,
+					struct ablkcipher_request *req);
+	int (*prepare_hash_request)(struct crypto_engine *engine,
+				    struct ahash_request *req);
+	int (*unprepare_hash_request)(struct crypto_engine *engine,
+				      struct ahash_request *req);
+	int (*cipher_one_request)(struct crypto_engine *engine,
+				  struct ablkcipher_request *req);
+	int (*hash_one_request)(struct crypto_engine *engine,
+				struct ahash_request *req);
+
+	struct kthread_worker           kworker;
+	struct task_struct              *kworker_task;
+	struct kthread_work             pump_requests;
+
+	void				*priv_data;
+	struct crypto_async_request	*cur_req;
+};
+
+int crypto_transfer_cipher_request(struct crypto_engine *engine,
+				   struct ablkcipher_request *req,
+				   bool need_pump);
+int crypto_transfer_cipher_request_to_engine(struct crypto_engine *engine,
+					     struct ablkcipher_request *req);
+int crypto_transfer_hash_request(struct crypto_engine *engine,
+				 struct ahash_request *req, bool need_pump);
+int crypto_transfer_hash_request_to_engine(struct crypto_engine *engine,
+					   struct ahash_request *req);
+void crypto_finalize_cipher_request(struct crypto_engine *engine,
+				    struct ablkcipher_request *req, int err);
+void crypto_finalize_hash_request(struct crypto_engine *engine,
+				  struct ahash_request *req, int err);
+int crypto_engine_start(struct crypto_engine *engine);
+int crypto_engine_stop(struct crypto_engine *engine);
+struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt);
+int crypto_engine_exit(struct crypto_engine *engine);
+
+#endif /* _CRYPTO_ENGINE_H */

+ 23 - 0
include/crypto/ghash.h

@@ -0,0 +1,23 @@
+/*
+ * Common values for GHASH algorithms
+ */
+
+#ifndef __CRYPTO_GHASH_H__
+#define __CRYPTO_GHASH_H__
+
+#include <linux/types.h>
+#include <crypto/gf128mul.h>
+
+#define GHASH_BLOCK_SIZE	16
+#define GHASH_DIGEST_SIZE	16
+
+struct ghash_ctx {
+	struct gf128mul_4k *gf128;
+};
+
+struct ghash_desc_ctx {
+	u8 buffer[GHASH_BLOCK_SIZE];
+	u32 bytes;
+};
+
+#endif

+ 0 - 3
include/linux/ccp.h

@@ -238,9 +238,6 @@ struct ccp_xts_aes_engine {
 };
 };
 
 
 /***** SHA engine *****/
 /***** SHA engine *****/
-#define CCP_SHA_BLOCKSIZE               SHA256_BLOCK_SIZE
-#define CCP_SHA_CTXSIZE                 SHA256_DIGEST_SIZE
-
 /**
 /**
  * ccp_sha_type - type of SHA operation
  * ccp_sha_type - type of SHA operation
  *
  *

+ 3 - 1
include/linux/hw_random.h

@@ -29,7 +29,9 @@
  *			Returns the number of lower random bytes in "data".
  *			Returns the number of lower random bytes in "data".
  *			Must not be NULL.    *OBSOLETE*
  *			Must not be NULL.    *OBSOLETE*
  * @read:		New API. drivers can fill up to max bytes of data
  * @read:		New API. drivers can fill up to max bytes of data
- *			into the buffer. The buffer is aligned for any type.
+ *			into the buffer. The buffer is aligned for any type
+ *			and max is guaranteed to be >= to that alignment
+ *			(either 4 or 8 depending on architecture).
  * @priv:		Private data, for use by the RNG driver.
  * @priv:		Private data, for use by the RNG driver.
  * @quality:		Estimation of true entropy in RNG's bitstream
  * @quality:		Estimation of true entropy in RNG's bitstream
  *			(per mill).
  *			(per mill).