12 lat temu · 13c789a6b2
--- a/Documentation/devicetree/bindings/crypto/fsl-dcp.txt
+++ b/Documentation/devicetree/bindings/crypto/fsl-dcp.txt
@@ -0,0 +1,17 @@
 
															+Freescale DCP (Data Co-Processor) found on i.MX23/i.MX28 .
														
 
															+
														
 
															+Required properties:
														
 
															+- compatible : Should be "fsl,<soc>-dcp"
														
 
															+- reg : Should contain MXS DCP registers location and length
														
 
															+- interrupts : Should contain MXS DCP interrupt numbers, VMI IRQ and DCP IRQ
														
 
															+               must be supplied, optionally Secure IRQ can be present, but
														
 
															+	       is currently not implemented and not used.
														
 
															+
														
 
															+Example:
														
 
															+
														
 
															+dcp@80028000 {
														
 
															+	compatible = "fsl,imx28-dcp", "fsl,imx23-dcp";
														
 
															+	reg = <0x80028000 0x2000>;
														
 
															+	interrupts = <52 53>;
														
 
															+	status = "okay";
														
 
															+};
														
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -538,6 +538,13 @@ F:	drivers/tty/serial/altera_jtaguart.c
 
															 F:	include/linux/altera_uart.h
														
 
															 F:	include/linux/altera_jtaguart.h
														
 
															+AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
														
 
															+M:	Tom Lendacky <thomas.lendacky@amd.com>
														
 
															+L:	linux-crypto@vger.kernel.org
														
 
															+S:	Supported
														
 
															+F:	drivers/crypto/ccp/
														
 
															+F:	include/linux/ccp.h
														
 
															+
														
 
															 AMD FAM15H PROCESSOR POWER MONITORING DRIVER
														
 
															 M:	Andreas Herrmann <herrmann.der.user@googlemail.com>
														
 
															 L:	lm-sensors@lm-sensors.org
														
--- a/arch/arm/boot/dts/imx23.dtsi
+++ b/arch/arm/boot/dts/imx23.dtsi
@@ -337,8 +337,10 @@
 
															 			};
														
 
															 			dcp@80028000 {
														
 
															+				compatible = "fsl,imx23-dcp";
														
 
															 				reg = <0x80028000 0x2000>;
														
 
															-				status = "disabled";
														
 
															+				interrupts = <53 54>;
														
 
															+				status = "okay";
														
 
															 			};
														
 
															 			pxp@8002a000 {
														
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -813,9 +813,10 @@
 
															 			};
														
 
															 			dcp: dcp@80028000 {
														
 
															+				compatible = "fsl,imx28-dcp", "fsl,imx23-dcp";
														
 
															 				reg = <0x80028000 0x2000>;
														
 
															 				interrupts = <52 53 54>;
														
 
															-				compatible = "fsl-dcp";
														
 
															+				status = "okay";
														
 
															 			};
														
 
															 			pxp: pxp@8002a000 {
														
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -237,9 +237,9 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
 
															 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															 	u32 *flags = &tfm->crt_flags;
														
 
															-	if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
														
 
															-	    memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
														
 
															-		   DES_KEY_SIZE)) &&
														
 
															+	if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
														
 
															+	    crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
														
 
															+			  DES_KEY_SIZE)) &&
														
 
															 	    (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
														
 
															 		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
														
 
															 		return -EINVAL;
														
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -76,6 +76,7 @@ ifeq ($(avx2_supported),yes)
 
															 endif
														
 
															 aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
														
 
															+aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o
														
 
															 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
														
 
															 sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
														
 
															 crc32c-intel-y := crc32c-intel_glue.o
														
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -0,0 +1,2811 @@
 
															+########################################################################
														
 
															+# Copyright (c) 2013, Intel Corporation
														
 
															+#
														
 
															+# This software is available to you under a choice of one of two
														
 
															+# licenses.  You may choose to be licensed under the terms of the GNU
														
 
															+# General Public License (GPL) Version 2, available from the file
														
 
															+# COPYING in the main directory of this source tree, or the
														
 
															+# OpenIB.org BSD license below:
														
 
															+#
														
 
															+# Redistribution and use in source and binary forms, with or without
														
 
															+# modification, are permitted provided that the following conditions are
														
 
															+# met:
														
 
															+#
														
 
															+# * Redistributions of source code must retain the above copyright
														
 
															+#   notice, this list of conditions and the following disclaimer.
														
 
															+#
														
 
															+# * Redistributions in binary form must reproduce the above copyright
														
 
															+#   notice, this list of conditions and the following disclaimer in the
														
 
															+#   documentation and/or other materials provided with the
														
 
															+#   distribution.
														
 
															+#
														
 
															+# * Neither the name of the Intel Corporation nor the names of its
														
 
															+#   contributors may be used to endorse or promote products derived from
														
 
															+#   this software without specific prior written permission.
														
 
															+#
														
 
															+#
														
 
															+# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
														
 
															+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
														
 
															+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
														
 
															+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
														
 
															+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
														
 
															+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
														
 
															+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES# LOSS OF USE, DATA, OR
														
 
															+# PROFITS# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
														
 
															+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
														
 
															+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
														
 
															+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
														
 
															+########################################################################
														
 
															+##
														
 
															+## Authors:
														
 
															+##	Erdinc Ozturk <erdinc.ozturk@intel.com>
														
 
															+##	Vinodh Gopal <vinodh.gopal@intel.com>
														
 
															+##	James Guilford <james.guilford@intel.com>
														
 
															+##	Tim Chen <tim.c.chen@linux.intel.com>
														
 
															+##
														
 
															+## References:
														
 
															+##       This code was derived and highly optimized from the code described in paper:
														
 
															+##               Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation
														
 
															+##			on Intel Architecture Processors. August, 2010
														
 
															+##       The details of the implementation is explained in:
														
 
															+##               Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode
														
 
															+##			on Intel Architecture Processors. October, 2012.
														
 
															+##
														
 
															+## Assumptions:
														
 
															+##
														
 
															+##
														
 
															+##
														
 
															+## iv:
														
 
															+##       0                   1                   2                   3
														
 
															+##       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
														
 
															+##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
														
 
															+##       |                             Salt  (From the SA)               |
														
 
															+##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
														
 
															+##       |                     Initialization Vector                     |
														
 
															+##       |         (This is the sequence number from IPSec header)       |
														
 
															+##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
														
 
															+##       |                              0x1                              |
														
 
															+##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
														
 
															+##
														
 
															+##
														
 
															+##
														
 
															+## AAD:
														
 
															+##       AAD padded to 128 bits with 0
														
 
															+##       for example, assume AAD is a u32 vector
														
 
															+##
														
 
															+##       if AAD is 8 bytes:
														
 
															+##       AAD[3] = {A0, A1}#
														
 
															+##       padded AAD in xmm register = {A1 A0 0 0}
														
 
															+##
														
 
															+##       0                   1                   2                   3
														
 
															+##       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
														
 
															+##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
														
 
															+##       |                               SPI (A1)                        |
														
 
															+##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
														
 
															+##       |                     32-bit Sequence Number (A0)               |
														
 
															+##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
														
 
															+##       |                              0x0                              |
														
 
															+##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
														
 
															+##
														
 
															+##                                       AAD Format with 32-bit Sequence Number
														
 
															+##
														
 
															+##       if AAD is 12 bytes:
														
 
															+##       AAD[3] = {A0, A1, A2}#
														
 
															+##       padded AAD in xmm register = {A2 A1 A0 0}
														
 
															+##
														
 
															+##       0                   1                   2                   3
														
 
															+##       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
														
 
															+##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
														
 
															+##       |                               SPI (A2)                        |
														
 
															+##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
														
 
															+##       |                 64-bit Extended Sequence Number {A1,A0}       |
														
 
															+##       |                                                               |
														
 
															+##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
														
 
															+##       |                              0x0                              |
														
 
															+##       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
														
 
															+##
														
 
															+##        AAD Format with 64-bit Extended Sequence Number
														
 
															+##
														
 
															+##
														
 
															+## aadLen:
														
 
															+##       from the definition of the spec, aadLen can only be 8 or 12 bytes.
														
 
															+##	 The code additionally supports aadLen of length 16 bytes.
														
 
															+##
														
 
															+## TLen:
														
 
															+##       from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
														
 
															+##
														
 
															+## poly = x^128 + x^127 + x^126 + x^121 + 1
														
 
															+## throughout the code, one tab and two tab indentations are used. one tab is
														
 
															+## for GHASH part, two tabs is for AES part.
														
 
															+##
														
 
															+
														
 
															+#include <linux/linkage.h>
														
 
															+#include <asm/inst.h>
														
 
															+
														
 
															+.data
														
 
															+.align 16
														
 
															+
														
 
															+POLY:            .octa     0xC2000000000000000000000000000001
														
 
															+POLY2:           .octa     0xC20000000000000000000001C2000000
														
 
															+TWOONE:          .octa     0x00000001000000000000000000000001
														
 
															+
														
 
															+# order of these constants should not change.
														
 
															+# more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F
														
 
															+
														
 
															+SHUF_MASK:       .octa     0x000102030405060708090A0B0C0D0E0F
														
 
															+SHIFT_MASK:      .octa     0x0f0e0d0c0b0a09080706050403020100
														
 
															+ALL_F:           .octa     0xffffffffffffffffffffffffffffffff
														
 
															+ZERO:            .octa     0x00000000000000000000000000000000
														
 
															+ONE:             .octa     0x00000000000000000000000000000001
														
 
															+ONEf:            .octa     0x01000000000000000000000000000000
														
 
															+
														
 
															+.text
														
 
															+
														
 
															+
														
 
															+##define the fields of the gcm aes context
														
 
															+#{
														
 
															+#        u8 expanded_keys[16*11] store expanded keys
														
 
															+#        u8 shifted_hkey_1[16]   store HashKey <<1 mod poly here
														
 
															+#        u8 shifted_hkey_2[16]   store HashKey^2 <<1 mod poly here
														
 
															+#        u8 shifted_hkey_3[16]   store HashKey^3 <<1 mod poly here
														
 
															+#        u8 shifted_hkey_4[16]   store HashKey^4 <<1 mod poly here
														
 
															+#        u8 shifted_hkey_5[16]   store HashKey^5 <<1 mod poly here
														
 
															+#        u8 shifted_hkey_6[16]   store HashKey^6 <<1 mod poly here
														
 
															+#        u8 shifted_hkey_7[16]   store HashKey^7 <<1 mod poly here
														
 
															+#        u8 shifted_hkey_8[16]   store HashKey^8 <<1 mod poly here
														
 
															+#        u8 shifted_hkey_1_k[16] store XOR HashKey <<1 mod poly here (for Karatsuba purposes)
														
 
															+#        u8 shifted_hkey_2_k[16] store XOR HashKey^2 <<1 mod poly here (for Karatsuba purposes)
														
 
															+#        u8 shifted_hkey_3_k[16] store XOR HashKey^3 <<1 mod poly here (for Karatsuba purposes)
														
 
															+#        u8 shifted_hkey_4_k[16] store XOR HashKey^4 <<1 mod poly here (for Karatsuba purposes)
														
 
															+#        u8 shifted_hkey_5_k[16] store XOR HashKey^5 <<1 mod poly here (for Karatsuba purposes)
														
 
															+#        u8 shifted_hkey_6_k[16] store XOR HashKey^6 <<1 mod poly here (for Karatsuba purposes)
														
 
															+#        u8 shifted_hkey_7_k[16] store XOR HashKey^7 <<1 mod poly here (for Karatsuba purposes)
														
 
															+#        u8 shifted_hkey_8_k[16] store XOR HashKey^8 <<1 mod poly here (for Karatsuba purposes)
														
 
															+#} gcm_ctx#
														
 
															+
														
 
															+HashKey        = 16*11   # store HashKey <<1 mod poly here
														
 
															+HashKey_2      = 16*12   # store HashKey^2 <<1 mod poly here
														
 
															+HashKey_3      = 16*13   # store HashKey^3 <<1 mod poly here
														
 
															+HashKey_4      = 16*14   # store HashKey^4 <<1 mod poly here
														
 
															+HashKey_5      = 16*15   # store HashKey^5 <<1 mod poly here
														
 
															+HashKey_6      = 16*16   # store HashKey^6 <<1 mod poly here
														
 
															+HashKey_7      = 16*17   # store HashKey^7 <<1 mod poly here
														
 
															+HashKey_8      = 16*18   # store HashKey^8 <<1 mod poly here
														
 
															+HashKey_k      = 16*19   # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes)
														
 
															+HashKey_2_k    = 16*20   # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes)
														
 
															+HashKey_3_k    = 16*21   # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes)
														
 
															+HashKey_4_k    = 16*22   # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes)
														
 
															+HashKey_5_k    = 16*23   # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes)
														
 
															+HashKey_6_k    = 16*24   # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes)
														
 
															+HashKey_7_k    = 16*25   # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes)
														
 
															+HashKey_8_k    = 16*26   # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes)
														
 
															+
														
 
															+#define arg1 %rdi
														
 
															+#define arg2 %rsi
														
 
															+#define arg3 %rdx
														
 
															+#define arg4 %rcx
														
 
															+#define arg5 %r8
														
 
															+#define arg6 %r9
														
 
															+#define arg7 STACK_OFFSET+8*1(%r14)
														
 
															+#define arg8 STACK_OFFSET+8*2(%r14)
														
 
															+#define arg9 STACK_OFFSET+8*3(%r14)
														
 
															+
														
 
															+i = 0
														
 
															+j = 0
														
 
															+
														
 
															+out_order = 0
														
 
															+in_order = 1
														
 
															+DEC = 0
														
 
															+ENC = 1
														
 
															+
														
 
															+.macro define_reg r n
														
 
															+reg_\r = %xmm\n
														
 
															+.endm
														
 
															+
														
 
															+.macro setreg
														
 
															+.altmacro
														
 
															+define_reg i %i
														
 
															+define_reg j %j
														
 
															+.noaltmacro
														
 
															+.endm
														
 
															+
														
 
															+# need to push 4 registers into stack to maintain
														
 
															+STACK_OFFSET = 8*4
														
 
															+
														
 
															+TMP1 =   16*0    # Temporary storage for AAD
														
 
															+TMP2 =   16*1    # Temporary storage for AES State 2 (State 1 is stored in an XMM register)
														
 
															+TMP3 =   16*2    # Temporary storage for AES State 3
														
 
															+TMP4 =   16*3    # Temporary storage for AES State 4
														
 
															+TMP5 =   16*4    # Temporary storage for AES State 5
														
 
															+TMP6 =   16*5    # Temporary storage for AES State 6
														
 
															+TMP7 =   16*6    # Temporary storage for AES State 7
														
 
															+TMP8 =   16*7    # Temporary storage for AES State 8
														
 
															+
														
 
															+VARIABLE_OFFSET = 16*8
														
 
															+
														
 
															+################################
														
 
															+# Utility Macros
														
 
															+################################
														
 
															+
														
 
															+# Encryption of a single block
														
 
															+.macro ENCRYPT_SINGLE_BLOCK XMM0
														
 
															+                vpxor    (arg1), \XMM0, \XMM0
														
 
															+		i = 1
														
 
															+		setreg
														
 
															+.rep 9
														
 
															+                vaesenc  16*i(arg1), \XMM0, \XMM0
														
 
															+		i = (i+1)
														
 
															+		setreg
														
 
															+.endr
														
 
															+                vaesenclast 16*10(arg1), \XMM0, \XMM0
														
 
															+.endm
														
 
															+
														
 
															+#ifdef CONFIG_AS_AVX
														
 
															+###############################################################################
														
 
															+# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
														
 
															+# Input: A and B (128-bits each, bit-reflected)
														
 
															+# Output: C = A*B*x mod poly, (i.e. >>1 )
														
 
															+# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
														
 
															+# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
														
 
															+###############################################################################
														
 
															+.macro  GHASH_MUL_AVX GH HK T1 T2 T3 T4 T5
														
 
															+
														
 
															+        vpshufd         $0b01001110, \GH, \T2
														
 
															+        vpshufd         $0b01001110, \HK, \T3
														
 
															+        vpxor           \GH     , \T2, \T2      # T2 = (a1+a0)
														
 
															+        vpxor           \HK     , \T3, \T3      # T3 = (b1+b0)
														
 
															+
														
 
															+        vpclmulqdq      $0x11, \HK, \GH, \T1    # T1 = a1*b1
														
 
															+        vpclmulqdq      $0x00, \HK, \GH, \GH    # GH = a0*b0
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2    # T2 = (a1+a0)*(b1+b0)
														
 
															+        vpxor           \GH, \T2,\T2
														
 
															+        vpxor           \T1, \T2,\T2            # T2 = a0*b1+a1*b0
														
 
															+
														
 
															+        vpslldq         $8, \T2,\T3             # shift-L T3 2 DWs
														
 
															+        vpsrldq         $8, \T2,\T2             # shift-R T2 2 DWs
														
 
															+        vpxor           \T3, \GH, \GH
														
 
															+        vpxor           \T2, \T1, \T1           # <T1:GH> = GH x HK
														
 
															+
														
 
															+        #first phase of the reduction
														
 
															+        vpslld  $31, \GH, \T2                   # packed right shifting << 31
														
 
															+        vpslld  $30, \GH, \T3                   # packed right shifting shift << 30
														
 
															+        vpslld  $25, \GH, \T4                   # packed right shifting shift << 25
														
 
															+
														
 
															+        vpxor   \T3, \T2, \T2                   # xor the shifted versions
														
 
															+        vpxor   \T4, \T2, \T2
														
 
															+
														
 
															+        vpsrldq $4, \T2, \T5                    # shift-R T5 1 DW
														
 
															+
														
 
															+        vpslldq $12, \T2, \T2                   # shift-L T2 3 DWs
														
 
															+        vpxor   \T2, \GH, \GH                   # first phase of the reduction complete
														
 
															+
														
 
															+        #second phase of the reduction
														
 
															+
														
 
															+        vpsrld  $1,\GH, \T2                     # packed left shifting >> 1
														
 
															+        vpsrld  $2,\GH, \T3                     # packed left shifting >> 2
														
 
															+        vpsrld  $7,\GH, \T4                     # packed left shifting >> 7
														
 
															+        vpxor   \T3, \T2, \T2                   # xor the shifted versions
														
 
															+        vpxor   \T4, \T2, \T2
														
 
															+
														
 
															+        vpxor   \T5, \T2, \T2
														
 
															+        vpxor   \T2, \GH, \GH
														
 
															+        vpxor   \T1, \GH, \GH                   # the result is in GH
														
 
															+
														
 
															+
														
 
															+.endm
														
 
															+
														
 
															+.macro PRECOMPUTE_AVX HK T1 T2 T3 T4 T5 T6
														
 
															+
														
 
															+        # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
														
 
															+        vmovdqa  \HK, \T5
														
 
															+
														
 
															+        vpshufd  $0b01001110, \T5, \T1
														
 
															+        vpxor    \T5, \T1, \T1
														
 
															+        vmovdqa  \T1, HashKey_k(arg1)
														
 
															+
														
 
															+        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^2<<1 mod poly
														
 
															+        vmovdqa  \T5, HashKey_2(arg1)                    #  [HashKey_2] = HashKey^2<<1 mod poly
														
 
															+        vpshufd  $0b01001110, \T5, \T1
														
 
															+        vpxor    \T5, \T1, \T1
														
 
															+        vmovdqa  \T1, HashKey_2_k(arg1)
														
 
															+
														
 
															+        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^3<<1 mod poly
														
 
															+        vmovdqa  \T5, HashKey_3(arg1)
														
 
															+        vpshufd  $0b01001110, \T5, \T1
														
 
															+        vpxor    \T5, \T1, \T1
														
 
															+        vmovdqa  \T1, HashKey_3_k(arg1)
														
 
															+
														
 
															+        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^4<<1 mod poly
														
 
															+        vmovdqa  \T5, HashKey_4(arg1)
														
 
															+        vpshufd  $0b01001110, \T5, \T1
														
 
															+        vpxor    \T5, \T1, \T1
														
 
															+        vmovdqa  \T1, HashKey_4_k(arg1)
														
 
															+
														
 
															+        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^5<<1 mod poly
														
 
															+        vmovdqa  \T5, HashKey_5(arg1)
														
 
															+        vpshufd  $0b01001110, \T5, \T1
														
 
															+        vpxor    \T5, \T1, \T1
														
 
															+        vmovdqa  \T1, HashKey_5_k(arg1)
														
 
															+
														
 
															+        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^6<<1 mod poly
														
 
															+        vmovdqa  \T5, HashKey_6(arg1)
														
 
															+        vpshufd  $0b01001110, \T5, \T1
														
 
															+        vpxor    \T5, \T1, \T1
														
 
															+        vmovdqa  \T1, HashKey_6_k(arg1)
														
 
															+
														
 
															+        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^7<<1 mod poly
														
 
															+        vmovdqa  \T5, HashKey_7(arg1)
														
 
															+        vpshufd  $0b01001110, \T5, \T1
														
 
															+        vpxor    \T5, \T1, \T1
														
 
															+        vmovdqa  \T1, HashKey_7_k(arg1)
														
 
															+
														
 
															+        GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2  #  T5 = HashKey^8<<1 mod poly
														
 
															+        vmovdqa  \T5, HashKey_8(arg1)
														
 
															+        vpshufd  $0b01001110, \T5, \T1
														
 
															+        vpxor    \T5, \T1, \T1
														
 
															+        vmovdqa  \T1, HashKey_8_k(arg1)
														
 
															+
														
 
															+.endm
														
 
															+
														
 
															+## if a = number of total plaintext bytes
														
 
															+## b = floor(a/16)
														
 
															+## num_initial_blocks = b mod 4#
														
 
															+## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
														
 
															+## r10, r11, r12, rax are clobbered
														
 
															+## arg1, arg2, arg3, r14 are used as a pointer only, not modified
														
 
															+
														
 
															+.macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC
														
 
															+	i = (8-\num_initial_blocks)
														
 
															+	setreg
														
 
															+
														
 
															+        mov     arg6, %r10                      # r10 = AAD
														
 
															+        mov     arg7, %r12                      # r12 = aadLen
														
 
															+
														
 
															+
														
 
															+        mov     %r12, %r11
														
 
															+
														
 
															+        vpxor   reg_i, reg_i, reg_i
														
 
															+_get_AAD_loop\@:
														
 
															+        vmovd   (%r10), \T1
														
 
															+        vpslldq $12, \T1, \T1
														
 
															+        vpsrldq $4, reg_i, reg_i
														
 
															+        vpxor   \T1, reg_i, reg_i
														
 
															+
														
 
															+        add     $4, %r10
														
 
															+        sub     $4, %r12
														
 
															+        jg      _get_AAD_loop\@
														
 
															+
														
 
															+
														
 
															+        cmp     $16, %r11
														
 
															+        je      _get_AAD_loop2_done\@
														
 
															+        mov     $16, %r12
														
 
															+
														
 
															+_get_AAD_loop2\@:
														
 
															+        vpsrldq $4, reg_i, reg_i
														
 
															+        sub     $4, %r12
														
 
															+        cmp     %r11, %r12
														
 
															+        jg      _get_AAD_loop2\@
														
 
															+
														
 
															+_get_AAD_loop2_done\@:
														
 
															+
														
 
															+        #byte-reflect the AAD data
														
 
															+        vpshufb SHUF_MASK(%rip), reg_i, reg_i
														
 
															+
														
 
															+	# initialize the data pointer offset as zero
														
 
															+	xor     %r11, %r11
														
 
															+
														
 
															+	# start AES for num_initial_blocks blocks
														
 
															+	mov     arg5, %rax                     # rax = *Y0
														
 
															+	vmovdqu (%rax), \CTR                   # CTR = Y0
														
 
															+	vpshufb SHUF_MASK(%rip), \CTR, \CTR
														
 
															+
														
 
															+
														
 
															+	i = (9-\num_initial_blocks)
														
 
															+	setreg
														
 
															+.rep \num_initial_blocks
														
 
															+                vpaddd  ONE(%rip), \CTR, \CTR		# INCR Y0
														
 
															+                vmovdqa \CTR, reg_i
														
 
															+                vpshufb SHUF_MASK(%rip), reg_i, reg_i   # perform a 16Byte swap
														
 
															+	i = (i+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+
														
 
															+	vmovdqa  (arg1), \T_key
														
 
															+	i = (9-\num_initial_blocks)
														
 
															+	setreg
														
 
															+.rep \num_initial_blocks
														
 
															+                vpxor   \T_key, reg_i, reg_i
														
 
															+	i = (i+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+
														
 
															+	j = 1
														
 
															+	setreg
														
 
															+.rep 9
														
 
															+	vmovdqa  16*j(arg1), \T_key
														
 
															+	i = (9-\num_initial_blocks)
														
 
															+	setreg
														
 
															+.rep \num_initial_blocks
														
 
															+        vaesenc \T_key, reg_i, reg_i
														
 
															+	i = (i+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+
														
 
															+	j = (j+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+
														
 
															+
														
 
															+	vmovdqa  16*10(arg1), \T_key
														
 
															+	i = (9-\num_initial_blocks)
														
 
															+	setreg
														
 
															+.rep \num_initial_blocks
														
 
															+        vaesenclast      \T_key, reg_i, reg_i
														
 
															+	i = (i+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+
														
 
															+	i = (9-\num_initial_blocks)
														
 
															+	setreg
														
 
															+.rep \num_initial_blocks
														
 
															+                vmovdqu (arg3, %r11), \T1
														
 
															+                vpxor   \T1, reg_i, reg_i
														
 
															+                vmovdqu reg_i, (arg2 , %r11)           # write back ciphertext for num_initial_blocks blocks
														
 
															+                add     $16, %r11
														
 
															+.if  \ENC_DEC == DEC
														
 
															+                vmovdqa \T1, reg_i
														
 
															+.endif
														
 
															+                vpshufb SHUF_MASK(%rip), reg_i, reg_i  # prepare ciphertext for GHASH computations
														
 
															+	i = (i+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+
														
 
															+
														
 
															+	i = (8-\num_initial_blocks)
														
 
															+	j = (9-\num_initial_blocks)
														
 
															+	setreg
														
 
															+        GHASH_MUL_AVX       reg_i, \T2, \T1, \T3, \T4, \T5, \T6
														
 
															+
														
 
															+.rep \num_initial_blocks
														
 
															+        vpxor    reg_i, reg_j, reg_j
														
 
															+        GHASH_MUL_AVX       reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks
														
 
															+	i = (i+1)
														
 
															+	j = (j+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+        # XMM8 has the combined result here
														
 
															+
														
 
															+        vmovdqa  \XMM8, TMP1(%rsp)
														
 
															+        vmovdqa  \XMM8, \T3
														
 
															+
														
 
															+        cmp     $128, %r13
														
 
															+        jl      _initial_blocks_done\@                  # no need for precomputed constants
														
 
															+
														
 
															+###############################################################################
														
 
															+# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM1
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM1, \XMM1  # perform a 16Byte swap
														
 
															+
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM2
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM2, \XMM2  # perform a 16Byte swap
														
 
															+
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM3
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM3, \XMM3  # perform a 16Byte swap
														
 
															+
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM4
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM4, \XMM4  # perform a 16Byte swap
														
 
															+
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM5
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM5, \XMM5  # perform a 16Byte swap
														
 
															+
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM6
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM6, \XMM6  # perform a 16Byte swap
														
 
															+
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM7
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM7, \XMM7  # perform a 16Byte swap
														
 
															+
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM8
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM8, \XMM8  # perform a 16Byte swap
														
 
															+
														
 
															+                vmovdqa  (arg1), \T_key
														
 
															+                vpxor    \T_key, \XMM1, \XMM1
														
 
															+                vpxor    \T_key, \XMM2, \XMM2
														
 
															+                vpxor    \T_key, \XMM3, \XMM3
														
 
															+                vpxor    \T_key, \XMM4, \XMM4
														
 
															+                vpxor    \T_key, \XMM5, \XMM5
														
 
															+                vpxor    \T_key, \XMM6, \XMM6
														
 
															+                vpxor    \T_key, \XMM7, \XMM7
														
 
															+                vpxor    \T_key, \XMM8, \XMM8
														
 
															+
														
 
															+		i = 1
														
 
															+		setreg
														
 
															+.rep    9       # do 9 rounds
														
 
															+                vmovdqa  16*i(arg1), \T_key
														
 
															+                vaesenc  \T_key, \XMM1, \XMM1
														
 
															+                vaesenc  \T_key, \XMM2, \XMM2
														
 
															+                vaesenc  \T_key, \XMM3, \XMM3
														
 
															+                vaesenc  \T_key, \XMM4, \XMM4
														
 
															+                vaesenc  \T_key, \XMM5, \XMM5
														
 
															+                vaesenc  \T_key, \XMM6, \XMM6
														
 
															+                vaesenc  \T_key, \XMM7, \XMM7
														
 
															+                vaesenc  \T_key, \XMM8, \XMM8
														
 
															+		i = (i+1)
														
 
															+		setreg
														
 
															+.endr
														
 
															+
														
 
															+
														
 
															+                vmovdqa  16*i(arg1), \T_key
														
 
															+                vaesenclast  \T_key, \XMM1, \XMM1
														
 
															+                vaesenclast  \T_key, \XMM2, \XMM2
														
 
															+                vaesenclast  \T_key, \XMM3, \XMM3
														
 
															+                vaesenclast  \T_key, \XMM4, \XMM4
														
 
															+                vaesenclast  \T_key, \XMM5, \XMM5
														
 
															+                vaesenclast  \T_key, \XMM6, \XMM6
														
 
															+                vaesenclast  \T_key, \XMM7, \XMM7
														
 
															+                vaesenclast  \T_key, \XMM8, \XMM8
														
 
															+
														
 
															+                vmovdqu  (arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM1, \XMM1
														
 
															+                vmovdqu  \XMM1, (arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM1
														
 
															+                .endif
														
 
															+
														
 
															+                vmovdqu  16*1(arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM2, \XMM2
														
 
															+                vmovdqu  \XMM2, 16*1(arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM2
														
 
															+                .endif
														
 
															+
														
 
															+                vmovdqu  16*2(arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM3, \XMM3
														
 
															+                vmovdqu  \XMM3, 16*2(arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM3
														
 
															+                .endif
														
 
															+
														
 
															+                vmovdqu  16*3(arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM4, \XMM4
														
 
															+                vmovdqu  \XMM4, 16*3(arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM4
														
 
															+                .endif
														
 
															+
														
 
															+                vmovdqu  16*4(arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM5, \XMM5
														
 
															+                vmovdqu  \XMM5, 16*4(arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM5
														
 
															+                .endif
														
 
															+
														
 
															+                vmovdqu  16*5(arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM6, \XMM6
														
 
															+                vmovdqu  \XMM6, 16*5(arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM6
														
 
															+                .endif
														
 
															+
														
 
															+                vmovdqu  16*6(arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM7, \XMM7
														
 
															+                vmovdqu  \XMM7, 16*6(arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM7
														
 
															+                .endif
														
 
															+
														
 
															+                vmovdqu  16*7(arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM8, \XMM8
														
 
															+                vmovdqu  \XMM8, 16*7(arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM8
														
 
															+                .endif
														
 
															+
														
 
															+                add     $128, %r11
														
 
															+
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM1, \XMM1     # perform a 16Byte swap
														
 
															+                vpxor    TMP1(%rsp), \XMM1, \XMM1          # combine GHASHed value with the corresponding ciphertext
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM2, \XMM2     # perform a 16Byte swap
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM3, \XMM3     # perform a 16Byte swap
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM4, \XMM4     # perform a 16Byte swap
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM5, \XMM5     # perform a 16Byte swap
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM6, \XMM6     # perform a 16Byte swap
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM7, \XMM7     # perform a 16Byte swap
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM8, \XMM8     # perform a 16Byte swap
														
 
															+
														
 
															+###############################################################################
														
 
															+
														
 
															+_initial_blocks_done\@:
														
 
															+
														
 
															+.endm
														
 
															+
														
 
															+# encrypt 8 blocks at a time
														
 
															+# ghash the 8 previously encrypted ciphertext blocks
														
 
															+# arg1, arg2, arg3 are used as pointers only, not modified
														
 
															+# r11 is the data offset value
														
 
															+.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
														
 
															+
														
 
															+        vmovdqa \XMM1, \T2
														
 
															+        vmovdqa \XMM2, TMP2(%rsp)
														
 
															+        vmovdqa \XMM3, TMP3(%rsp)
														
 
															+        vmovdqa \XMM4, TMP4(%rsp)
														
 
															+        vmovdqa \XMM5, TMP5(%rsp)
														
 
															+        vmovdqa \XMM6, TMP6(%rsp)
														
 
															+        vmovdqa \XMM7, TMP7(%rsp)
														
 
															+        vmovdqa \XMM8, TMP8(%rsp)
														
 
															+
														
 
															+.if \loop_idx == in_order
														
 
															+                vpaddd  ONE(%rip), \CTR, \XMM1           # INCR CNT
														
 
															+                vpaddd  ONE(%rip), \XMM1, \XMM2
														
 
															+                vpaddd  ONE(%rip), \XMM2, \XMM3
														
 
															+                vpaddd  ONE(%rip), \XMM3, \XMM4
														
 
															+                vpaddd  ONE(%rip), \XMM4, \XMM5
														
 
															+                vpaddd  ONE(%rip), \XMM5, \XMM6
														
 
															+                vpaddd  ONE(%rip), \XMM6, \XMM7
														
 
															+                vpaddd  ONE(%rip), \XMM7, \XMM8
														
 
															+                vmovdqa \XMM8, \CTR
														
 
															+
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM1, \XMM1    # perform a 16Byte swap
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM2, \XMM2    # perform a 16Byte swap
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM3, \XMM3    # perform a 16Byte swap
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM4, \XMM4    # perform a 16Byte swap
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM5, \XMM5    # perform a 16Byte swap
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM6, \XMM6    # perform a 16Byte swap
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM7, \XMM7    # perform a 16Byte swap
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM8, \XMM8    # perform a 16Byte swap
														
 
															+.else
														
 
															+                vpaddd  ONEf(%rip), \CTR, \XMM1           # INCR CNT
														
 
															+                vpaddd  ONEf(%rip), \XMM1, \XMM2
														
 
															+                vpaddd  ONEf(%rip), \XMM2, \XMM3
														
 
															+                vpaddd  ONEf(%rip), \XMM3, \XMM4
														
 
															+                vpaddd  ONEf(%rip), \XMM4, \XMM5
														
 
															+                vpaddd  ONEf(%rip), \XMM5, \XMM6
														
 
															+                vpaddd  ONEf(%rip), \XMM6, \XMM7
														
 
															+                vpaddd  ONEf(%rip), \XMM7, \XMM8
														
 
															+                vmovdqa \XMM8, \CTR
														
 
															+.endif
														
 
															+
														
 
															+
														
 
															+        #######################################################################
														
 
															+
														
 
															+                vmovdqu (arg1), \T1
														
 
															+                vpxor   \T1, \XMM1, \XMM1
														
 
															+                vpxor   \T1, \XMM2, \XMM2
														
 
															+                vpxor   \T1, \XMM3, \XMM3
														
 
															+                vpxor   \T1, \XMM4, \XMM4
														
 
															+                vpxor   \T1, \XMM5, \XMM5
														
 
															+                vpxor   \T1, \XMM6, \XMM6
														
 
															+                vpxor   \T1, \XMM7, \XMM7
														
 
															+                vpxor   \T1, \XMM8, \XMM8
														
 
															+
														
 
															+        #######################################################################
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+                vmovdqu 16*1(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+                vmovdqu 16*2(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+
														
 
															+        #######################################################################
														
 
															+
														
 
															+        vmovdqa         HashKey_8(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T2, \T4             # T4 = a1*b1
														
 
															+        vpclmulqdq      $0x00, \T5, \T2, \T7             # T7 = a0*b0
														
 
															+
														
 
															+        vpshufd         $0b01001110, \T2, \T6
														
 
															+        vpxor           \T2, \T6, \T6
														
 
															+
														
 
															+        vmovdqa         HashKey_8_k(arg1), \T5
														
 
															+        vpclmulqdq      $0x00, \T5, \T6, \T6
														
 
															+
														
 
															+                vmovdqu 16*3(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+        vmovdqa         TMP2(%rsp), \T1
														
 
															+        vmovdqa         HashKey_7(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T4, \T4
														
 
															+        vpclmulqdq      $0x00, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T7, \T7
														
 
															+
														
 
															+        vpshufd         $0b01001110, \T1, \T3
														
 
															+        vpxor           \T1, \T3, \T3
														
 
															+        vmovdqa         HashKey_7_k(arg1), \T5
														
 
															+        vpclmulqdq      $0x10, \T5, \T3, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+                vmovdqu 16*4(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+        #######################################################################
														
 
															+
														
 
															+        vmovdqa         TMP3(%rsp), \T1
														
 
															+        vmovdqa         HashKey_6(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T4, \T4
														
 
															+        vpclmulqdq      $0x00, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T7, \T7
														
 
															+
														
 
															+        vpshufd         $0b01001110, \T1, \T3
														
 
															+        vpxor           \T1, \T3, \T3
														
 
															+        vmovdqa         HashKey_6_k(arg1), \T5
														
 
															+        vpclmulqdq      $0x10, \T5, \T3, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+                vmovdqu 16*5(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+        vmovdqa         TMP4(%rsp), \T1
														
 
															+        vmovdqa         HashKey_5(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T4, \T4
														
 
															+        vpclmulqdq      $0x00, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T7, \T7
														
 
															+
														
 
															+        vpshufd         $0b01001110, \T1, \T3
														
 
															+        vpxor           \T1, \T3, \T3
														
 
															+        vmovdqa         HashKey_5_k(arg1), \T5
														
 
															+        vpclmulqdq      $0x10, \T5, \T3, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+                vmovdqu 16*6(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+
														
 
															+        vmovdqa         TMP5(%rsp), \T1
														
 
															+        vmovdqa         HashKey_4(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T4, \T4
														
 
															+        vpclmulqdq      $0x00, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T7, \T7
														
 
															+
														
 
															+        vpshufd         $0b01001110, \T1, \T3
														
 
															+        vpxor           \T1, \T3, \T3
														
 
															+        vmovdqa         HashKey_4_k(arg1), \T5
														
 
															+        vpclmulqdq      $0x10, \T5, \T3, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+                vmovdqu 16*7(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+        vmovdqa         TMP6(%rsp), \T1
														
 
															+        vmovdqa         HashKey_3(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T4, \T4
														
 
															+        vpclmulqdq      $0x00, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T7, \T7
														
 
															+
														
 
															+        vpshufd         $0b01001110, \T1, \T3
														
 
															+        vpxor           \T1, \T3, \T3
														
 
															+        vmovdqa         HashKey_3_k(arg1), \T5
														
 
															+        vpclmulqdq      $0x10, \T5, \T3, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+
														
 
															+                vmovdqu 16*8(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+        vmovdqa         TMP7(%rsp), \T1
														
 
															+        vmovdqa         HashKey_2(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T4, \T4
														
 
															+        vpclmulqdq      $0x00, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T7, \T7
														
 
															+
														
 
															+        vpshufd         $0b01001110, \T1, \T3
														
 
															+        vpxor           \T1, \T3, \T3
														
 
															+        vmovdqa         HashKey_2_k(arg1), \T5
														
 
															+        vpclmulqdq      $0x10, \T5, \T3, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+        #######################################################################
														
 
															+
														
 
															+                vmovdqu 16*9(arg1), \T5
														
 
															+                vaesenc \T5, \XMM1, \XMM1
														
 
															+                vaesenc \T5, \XMM2, \XMM2
														
 
															+                vaesenc \T5, \XMM3, \XMM3
														
 
															+                vaesenc \T5, \XMM4, \XMM4
														
 
															+                vaesenc \T5, \XMM5, \XMM5
														
 
															+                vaesenc \T5, \XMM6, \XMM6
														
 
															+                vaesenc \T5, \XMM7, \XMM7
														
 
															+                vaesenc \T5, \XMM8, \XMM8
														
 
															+
														
 
															+        vmovdqa         TMP8(%rsp), \T1
														
 
															+        vmovdqa         HashKey(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T4, \T4
														
 
															+        vpclmulqdq      $0x00, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T7, \T7
														
 
															+
														
 
															+        vpshufd         $0b01001110, \T1, \T3
														
 
															+        vpxor           \T1, \T3, \T3
														
 
															+        vmovdqa         HashKey_k(arg1), \T5
														
 
															+        vpclmulqdq      $0x10, \T5, \T3, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+        vpxor           \T7, \T6, \T6
														
 
															+
														
 
															+                vmovdqu 16*10(arg1), \T5
														
 
															+
														
 
															+	i = 0
														
 
															+	j = 1
														
 
															+	setreg
														
 
															+.rep 8
														
 
															+		vpxor	16*i(arg3, %r11), \T5, \T2
														
 
															+                .if \ENC_DEC == ENC
														
 
															+                vaesenclast     \T2, reg_j, reg_j
														
 
															+                .else
														
 
															+                vaesenclast     \T2, reg_j, \T3
														
 
															+                vmovdqu 16*i(arg3, %r11), reg_j
														
 
															+                vmovdqu \T3, 16*i(arg2, %r11)
														
 
															+                .endif
														
 
															+	i = (i+1)
														
 
															+	j = (j+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+	#######################################################################
														
 
															+
														
 
															+
														
 
															+	vpslldq	$8, \T6, \T3				# shift-L T3 2 DWs
														
 
															+	vpsrldq	$8, \T6, \T6				# shift-R T2 2 DWs
														
 
															+	vpxor	\T3, \T7, \T7
														
 
															+	vpxor	\T4, \T6, \T6				# accumulate the results in T6:T7
														
 
															+
														
 
															+
														
 
															+
														
 
															+	#######################################################################
														
 
															+	#first phase of the reduction
														
 
															+	#######################################################################
														
 
															+        vpslld  $31, \T7, \T2                           # packed right shifting << 31
														
 
															+        vpslld  $30, \T7, \T3                           # packed right shifting shift << 30
														
 
															+        vpslld  $25, \T7, \T4                           # packed right shifting shift << 25
														
 
															+
														
 
															+        vpxor   \T3, \T2, \T2                           # xor the shifted versions
														
 
															+        vpxor   \T4, \T2, \T2
														
 
															+
														
 
															+        vpsrldq $4, \T2, \T1                            # shift-R T1 1 DW
														
 
															+
														
 
															+        vpslldq $12, \T2, \T2                           # shift-L T2 3 DWs
														
 
															+        vpxor   \T2, \T7, \T7                           # first phase of the reduction complete
														
 
															+	#######################################################################
														
 
															+                .if \ENC_DEC == ENC
														
 
															+		vmovdqu	 \XMM1,	16*0(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+		vmovdqu	 \XMM2,	16*1(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+		vmovdqu	 \XMM3,	16*2(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+		vmovdqu	 \XMM4,	16*3(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+		vmovdqu	 \XMM5,	16*4(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+		vmovdqu	 \XMM6,	16*5(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+		vmovdqu	 \XMM7,	16*6(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+		vmovdqu	 \XMM8,	16*7(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+                .endif
														
 
															+
														
 
															+	#######################################################################
														
 
															+	#second phase of the reduction
														
 
															+        vpsrld  $1, \T7, \T2                            # packed left shifting >> 1
														
 
															+        vpsrld  $2, \T7, \T3                            # packed left shifting >> 2
														
 
															+        vpsrld  $7, \T7, \T4                            # packed left shifting >> 7
														
 
															+        vpxor   \T3, \T2, \T2                           # xor the shifted versions
														
 
															+        vpxor   \T4, \T2, \T2
														
 
															+
														
 
															+        vpxor   \T1, \T2, \T2
														
 
															+        vpxor   \T2, \T7, \T7
														
 
															+        vpxor   \T7, \T6, \T6                           # the result is in T6
														
 
															+	#######################################################################
														
 
															+
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM1, \XMM1	# perform a 16Byte swap
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM2, \XMM2	# perform a 16Byte swap
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM3, \XMM3	# perform a 16Byte swap
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM4, \XMM4	# perform a 16Byte swap
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM5, \XMM5	# perform a 16Byte swap
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM6, \XMM6	# perform a 16Byte swap
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM7, \XMM7	# perform a 16Byte swap
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM8, \XMM8	# perform a 16Byte swap
														
 
															+
														
 
															+
														
 
															+	vpxor	\T6, \XMM1, \XMM1
														
 
															+
														
 
															+
														
 
															+
														
 
															+.endm
														
 
															+
														
 
															+
														
 
															+# GHASH the last 4 ciphertext blocks.
														
 
															+.macro  GHASH_LAST_8_AVX T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8
														
 
															+
														
 
															+        ## Karatsuba Method
														
 
															+
														
 
															+
														
 
															+        vpshufd         $0b01001110, \XMM1, \T2
														
 
															+        vpxor           \XMM1, \T2, \T2
														
 
															+        vmovdqa         HashKey_8(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM1, \T6
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM1, \T7
														
 
															+
														
 
															+        vmovdqa         HashKey_8_k(arg1), \T3
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \XMM1
														
 
															+
														
 
															+        ######################
														
 
															+
														
 
															+        vpshufd         $0b01001110, \XMM2, \T2
														
 
															+        vpxor           \XMM2, \T2, \T2
														
 
															+        vmovdqa         HashKey_7(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM2, \T4
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM2, \T4
														
 
															+        vpxor           \T4, \T7, \T7
														
 
															+
														
 
															+        vmovdqa         HashKey_7_k(arg1), \T3
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2
														
 
															+        vpxor           \T2, \XMM1, \XMM1
														
 
															+
														
 
															+        ######################
														
 
															+
														
 
															+        vpshufd         $0b01001110, \XMM3, \T2
														
 
															+        vpxor           \XMM3, \T2, \T2
														
 
															+        vmovdqa         HashKey_6(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM3, \T4
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM3, \T4
														
 
															+        vpxor           \T4, \T7, \T7
														
 
															+
														
 
															+        vmovdqa         HashKey_6_k(arg1), \T3
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2
														
 
															+        vpxor           \T2, \XMM1, \XMM1
														
 
															+
														
 
															+        ######################
														
 
															+
														
 
															+        vpshufd         $0b01001110, \XMM4, \T2
														
 
															+        vpxor           \XMM4, \T2, \T2
														
 
															+        vmovdqa         HashKey_5(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM4, \T4
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM4, \T4
														
 
															+        vpxor           \T4, \T7, \T7
														
 
															+
														
 
															+        vmovdqa         HashKey_5_k(arg1), \T3
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2
														
 
															+        vpxor           \T2, \XMM1, \XMM1
														
 
															+
														
 
															+        ######################
														
 
															+
														
 
															+        vpshufd         $0b01001110, \XMM5, \T2
														
 
															+        vpxor           \XMM5, \T2, \T2
														
 
															+        vmovdqa         HashKey_4(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM5, \T4
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM5, \T4
														
 
															+        vpxor           \T4, \T7, \T7
														
 
															+
														
 
															+        vmovdqa         HashKey_4_k(arg1), \T3
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2
														
 
															+        vpxor           \T2, \XMM1, \XMM1
														
 
															+
														
 
															+        ######################
														
 
															+
														
 
															+        vpshufd         $0b01001110, \XMM6, \T2
														
 
															+        vpxor           \XMM6, \T2, \T2
														
 
															+        vmovdqa         HashKey_3(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM6, \T4
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM6, \T4
														
 
															+        vpxor           \T4, \T7, \T7
														
 
															+
														
 
															+        vmovdqa         HashKey_3_k(arg1), \T3
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2
														
 
															+        vpxor           \T2, \XMM1, \XMM1
														
 
															+
														
 
															+        ######################
														
 
															+
														
 
															+        vpshufd         $0b01001110, \XMM7, \T2
														
 
															+        vpxor           \XMM7, \T2, \T2
														
 
															+        vmovdqa         HashKey_2(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM7, \T4
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM7, \T4
														
 
															+        vpxor           \T4, \T7, \T7
														
 
															+
														
 
															+        vmovdqa         HashKey_2_k(arg1), \T3
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2
														
 
															+        vpxor           \T2, \XMM1, \XMM1
														
 
															+
														
 
															+        ######################
														
 
															+
														
 
															+        vpshufd         $0b01001110, \XMM8, \T2
														
 
															+        vpxor           \XMM8, \T2, \T2
														
 
															+        vmovdqa         HashKey(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM8, \T4
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM8, \T4
														
 
															+        vpxor           \T4, \T7, \T7
														
 
															+
														
 
															+        vmovdqa         HashKey_k(arg1), \T3
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2
														
 
															+
														
 
															+        vpxor           \T2, \XMM1, \XMM1
														
 
															+        vpxor           \T6, \XMM1, \XMM1
														
 
															+        vpxor           \T7, \XMM1, \T2
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+        vpslldq $8, \T2, \T4
														
 
															+        vpsrldq $8, \T2, \T2
														
 
															+
														
 
															+        vpxor   \T4, \T7, \T7
														
 
															+        vpxor   \T2, \T6, \T6   # <T6:T7> holds the result of
														
 
															+				# the accumulated carry-less multiplications
														
 
															+
														
 
															+        #######################################################################
														
 
															+        #first phase of the reduction
														
 
															+        vpslld  $31, \T7, \T2   # packed right shifting << 31
														
 
															+        vpslld  $30, \T7, \T3   # packed right shifting shift << 30
														
 
															+        vpslld  $25, \T7, \T4   # packed right shifting shift << 25
														
 
															+
														
 
															+        vpxor   \T3, \T2, \T2   # xor the shifted versions
														
 
															+        vpxor   \T4, \T2, \T2
														
 
															+
														
 
															+        vpsrldq $4, \T2, \T1    # shift-R T1 1 DW
														
 
															+
														
 
															+        vpslldq $12, \T2, \T2   # shift-L T2 3 DWs
														
 
															+        vpxor   \T2, \T7, \T7   # first phase of the reduction complete
														
 
															+        #######################################################################
														
 
															+
														
 
															+
														
 
															+        #second phase of the reduction
														
 
															+        vpsrld  $1, \T7, \T2    # packed left shifting >> 1
														
 
															+        vpsrld  $2, \T7, \T3    # packed left shifting >> 2
														
 
															+        vpsrld  $7, \T7, \T4    # packed left shifting >> 7
														
 
															+        vpxor   \T3, \T2, \T2   # xor the shifted versions
														
 
															+        vpxor   \T4, \T2, \T2
														
 
															+
														
 
															+        vpxor   \T1, \T2, \T2
														
 
															+        vpxor   \T2, \T7, \T7
														
 
															+        vpxor   \T7, \T6, \T6   # the result is in T6
														
 
															+
														
 
															+.endm
														
 
															+
														
 
															+
														
 
															+# combined for GCM encrypt and decrypt functions
														
 
															+# clobbering all xmm registers
														
 
															+# clobbering r10, r11, r12, r13, r14, r15
														
 
															+.macro  GCM_ENC_DEC_AVX     ENC_DEC
														
 
															+
														
 
															+        #the number of pushes must equal STACK_OFFSET
														
 
															+        push    %r12
														
 
															+        push    %r13
														
 
															+        push    %r14
														
 
															+        push    %r15
														
 
															+
														
 
															+        mov     %rsp, %r14
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+        sub     $VARIABLE_OFFSET, %rsp
														
 
															+        and     $~63, %rsp                  # align rsp to 64 bytes
														
 
															+
														
 
															+
														
 
															+        vmovdqu  HashKey(arg1), %xmm13      # xmm13 = HashKey
														
 
															+
														
 
															+        mov     arg4, %r13                  # save the number of bytes of plaintext/ciphertext
														
 
															+        and     $-16, %r13                  # r13 = r13 - (r13 mod 16)
														
 
															+
														
 
															+        mov     %r13, %r12
														
 
															+        shr     $4, %r12
														
 
															+        and     $7, %r12
														
 
															+        jz      _initial_num_blocks_is_0\@
														
 
															+
														
 
															+        cmp     $7, %r12
														
 
															+        je      _initial_num_blocks_is_7\@
														
 
															+        cmp     $6, %r12
														
 
															+        je      _initial_num_blocks_is_6\@
														
 
															+        cmp     $5, %r12
														
 
															+        je      _initial_num_blocks_is_5\@
														
 
															+        cmp     $4, %r12
														
 
															+        je      _initial_num_blocks_is_4\@
														
 
															+        cmp     $3, %r12
														
 
															+        je      _initial_num_blocks_is_3\@
														
 
															+        cmp     $2, %r12
														
 
															+        je      _initial_num_blocks_is_2\@
														
 
															+
														
 
															+        jmp     _initial_num_blocks_is_1\@
														
 
															+
														
 
															+_initial_num_blocks_is_7\@:
														
 
															+        INITIAL_BLOCKS_AVX  7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+        sub     $16*7, %r13
														
 
															+        jmp     _initial_blocks_encrypted\@
														
 
															+
														
 
															+_initial_num_blocks_is_6\@:
														
 
															+        INITIAL_BLOCKS_AVX  6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+        sub     $16*6, %r13
														
 
															+        jmp     _initial_blocks_encrypted\@
														
 
															+
														
 
															+_initial_num_blocks_is_5\@:
														
 
															+        INITIAL_BLOCKS_AVX  5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+        sub     $16*5, %r13
														
 
															+        jmp     _initial_blocks_encrypted\@
														
 
															+
														
 
															+_initial_num_blocks_is_4\@:
														
 
															+        INITIAL_BLOCKS_AVX  4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+        sub     $16*4, %r13
														
 
															+        jmp     _initial_blocks_encrypted\@
														
 
															+
														
 
															+_initial_num_blocks_is_3\@:
														
 
															+        INITIAL_BLOCKS_AVX  3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+        sub     $16*3, %r13
														
 
															+        jmp     _initial_blocks_encrypted\@
														
 
															+
														
 
															+_initial_num_blocks_is_2\@:
														
 
															+        INITIAL_BLOCKS_AVX  2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+        sub     $16*2, %r13
														
 
															+        jmp     _initial_blocks_encrypted\@
														
 
															+
														
 
															+_initial_num_blocks_is_1\@:
														
 
															+        INITIAL_BLOCKS_AVX  1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+        sub     $16*1, %r13
														
 
															+        jmp     _initial_blocks_encrypted\@
														
 
															+
														
 
															+_initial_num_blocks_is_0\@:
														
 
															+        INITIAL_BLOCKS_AVX  0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+
														
 
															+
														
 
															+_initial_blocks_encrypted\@:
														
 
															+        cmp     $0, %r13
														
 
															+        je      _zero_cipher_left\@
														
 
															+
														
 
															+        sub     $128, %r13
														
 
															+        je      _eight_cipher_left\@
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+        vmovd   %xmm9, %r15d
														
 
															+        and     $255, %r15d
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+
														
 
															+
														
 
															+_encrypt_by_8_new\@:
														
 
															+        cmp     $(255-8), %r15d
														
 
															+        jg      _encrypt_by_8\@
														
 
															+
														
 
															+
														
 
															+
														
 
															+        add     $8, %r15b
														
 
															+        GHASH_8_ENCRYPT_8_PARALLEL_AVX      %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC
														
 
															+        add     $128, %r11
														
 
															+        sub     $128, %r13
														
 
															+        jne     _encrypt_by_8_new\@
														
 
															+
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+        jmp     _eight_cipher_left\@
														
 
															+
														
 
															+_encrypt_by_8\@:
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+        add     $8, %r15b
														
 
															+        GHASH_8_ENCRYPT_8_PARALLEL_AVX      %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+        add     $128, %r11
														
 
															+        sub     $128, %r13
														
 
															+        jne     _encrypt_by_8_new\@
														
 
															+
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+_eight_cipher_left\@:
														
 
															+        GHASH_LAST_8_AVX    %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8
														
 
															+
														
 
															+
														
 
															+_zero_cipher_left\@:
														
 
															+        cmp     $16, arg4
														
 
															+        jl      _only_less_than_16\@
														
 
															+
														
 
															+        mov     arg4, %r13
														
 
															+        and     $15, %r13                            # r13 = (arg4 mod 16)
														
 
															+
														
 
															+        je      _multiple_of_16_bytes\@
														
 
															+
														
 
															+        # handle the last <16 Byte block seperately
														
 
															+
														
 
															+
														
 
															+        vpaddd   ONE(%rip), %xmm9, %xmm9             # INCR CNT to get Yn
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+        ENCRYPT_SINGLE_BLOCK    %xmm9                # E(K, Yn)
														
 
															+
														
 
															+        sub     $16, %r11
														
 
															+        add     %r13, %r11
														
 
															+        vmovdqu (arg3, %r11), %xmm1                  # receive the last <16 Byte block
														
 
															+
														
 
															+        lea     SHIFT_MASK+16(%rip), %r12
														
 
															+        sub     %r13, %r12                           # adjust the shuffle mask pointer to be
														
 
															+						     # able to shift 16-r13 bytes (r13 is the
														
 
															+						     # number of bytes in plaintext mod 16)
														
 
															+        vmovdqu (%r12), %xmm2                        # get the appropriate shuffle mask
														
 
															+        vpshufb %xmm2, %xmm1, %xmm1                  # shift right 16-r13 bytes
														
 
															+        jmp     _final_ghash_mul\@
														
 
															+
														
 
															+_only_less_than_16\@:
														
 
															+        # check for 0 length
														
 
															+        mov     arg4, %r13
														
 
															+        and     $15, %r13                            # r13 = (arg4 mod 16)
														
 
															+
														
 
															+        je      _multiple_of_16_bytes\@
														
 
															+
														
 
															+        # handle the last <16 Byte block seperately
														
 
															+
														
 
															+
														
 
															+        vpaddd  ONE(%rip), %xmm9, %xmm9              # INCR CNT to get Yn
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+        ENCRYPT_SINGLE_BLOCK    %xmm9                # E(K, Yn)
														
 
															+
														
 
															+
														
 
															+        lea     SHIFT_MASK+16(%rip), %r12
														
 
															+        sub     %r13, %r12                           # adjust the shuffle mask pointer to be
														
 
															+						     # able to shift 16-r13 bytes (r13 is the
														
 
															+						     # number of bytes in plaintext mod 16)
														
 
															+
														
 
															+_get_last_16_byte_loop\@:
														
 
															+        movb    (arg3, %r11),  %al
														
 
															+        movb    %al,  TMP1 (%rsp , %r11)
														
 
															+        add     $1, %r11
														
 
															+        cmp     %r13,  %r11
														
 
															+        jne     _get_last_16_byte_loop\@
														
 
															+
														
 
															+        vmovdqu  TMP1(%rsp), %xmm1
														
 
															+
														
 
															+        sub     $16, %r11
														
 
															+
														
 
															+_final_ghash_mul\@:
														
 
															+        .if  \ENC_DEC ==  DEC
														
 
															+        vmovdqa %xmm1, %xmm2
														
 
															+        vpxor   %xmm1, %xmm9, %xmm9                  # Plaintext XOR E(K, Yn)
														
 
															+        vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1        # get the appropriate mask to
														
 
															+						     # mask out top 16-r13 bytes of xmm9
														
 
															+        vpand   %xmm1, %xmm9, %xmm9                  # mask out top 16-r13 bytes of xmm9
														
 
															+        vpand   %xmm1, %xmm2, %xmm2
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm2, %xmm2
														
 
															+        vpxor   %xmm2, %xmm14, %xmm14
														
 
															+	#GHASH computation for the last <16 Byte block
														
 
															+        GHASH_MUL_AVX       %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
														
 
															+        sub     %r13, %r11
														
 
															+        add     $16, %r11
														
 
															+        .else
														
 
															+        vpxor   %xmm1, %xmm9, %xmm9                  # Plaintext XOR E(K, Yn)
														
 
															+        vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1        # get the appropriate mask to
														
 
															+						     # mask out top 16-r13 bytes of xmm9
														
 
															+        vpand   %xmm1, %xmm9, %xmm9                  # mask out top 16-r13 bytes of xmm9
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+        vpxor   %xmm9, %xmm14, %xmm14
														
 
															+	#GHASH computation for the last <16 Byte block
														
 
															+        GHASH_MUL_AVX       %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
														
 
															+        sub     %r13, %r11
														
 
															+        add     $16, %r11
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9        # shuffle xmm9 back to output as ciphertext
														
 
															+        .endif
														
 
															+
														
 
															+
														
 
															+        #############################
														
 
															+        # output r13 Bytes
														
 
															+        vmovq   %xmm9, %rax
														
 
															+        cmp     $8, %r13
														
 
															+        jle     _less_than_8_bytes_left\@
														
 
															+
														
 
															+        mov     %rax, (arg2 , %r11)
														
 
															+        add     $8, %r11
														
 
															+        vpsrldq $8, %xmm9, %xmm9
														
 
															+        vmovq   %xmm9, %rax
														
 
															+        sub     $8, %r13
														
 
															+
														
 
															+_less_than_8_bytes_left\@:
														
 
															+        movb    %al, (arg2 , %r11)
														
 
															+        add     $1, %r11
														
 
															+        shr     $8, %rax
														
 
															+        sub     $1, %r13
														
 
															+        jne     _less_than_8_bytes_left\@
														
 
															+        #############################
														
 
															+
														
 
															+_multiple_of_16_bytes\@:
														
 
															+        mov     arg7, %r12                           # r12 = aadLen (number of bytes)
														
 
															+        shl     $3, %r12                             # convert into number of bits
														
 
															+        vmovd   %r12d, %xmm15                        # len(A) in xmm15
														
 
															+
														
 
															+        shl     $3, arg4                             # len(C) in bits  (*128)
														
 
															+        vmovq   arg4, %xmm1
														
 
															+        vpslldq $8, %xmm15, %xmm15                   # xmm15 = len(A)|| 0x0000000000000000
														
 
															+        vpxor   %xmm1, %xmm15, %xmm15                # xmm15 = len(A)||len(C)
														
 
															+
														
 
															+        vpxor   %xmm15, %xmm14, %xmm14
														
 
															+        GHASH_MUL_AVX       %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6    # final GHASH computation
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm14, %xmm14      # perform a 16Byte swap
														
 
															+
														
 
															+        mov     arg5, %rax                           # rax = *Y0
														
 
															+        vmovdqu (%rax), %xmm9                        # xmm9 = Y0
														
 
															+
														
 
															+        ENCRYPT_SINGLE_BLOCK    %xmm9                # E(K, Y0)
														
 
															+
														
 
															+        vpxor   %xmm14, %xmm9, %xmm9
														
 
															+
														
 
															+
														
 
															+
														
 
															+_return_T\@:
														
 
															+        mov     arg8, %r10              # r10 = authTag
														
 
															+        mov     arg9, %r11              # r11 = auth_tag_len
														
 
															+
														
 
															+        cmp     $16, %r11
														
 
															+        je      _T_16\@
														
 
															+
														
 
															+        cmp     $12, %r11
														
 
															+        je      _T_12\@
														
 
															+
														
 
															+_T_8\@:
														
 
															+        vmovq   %xmm9, %rax
														
 
															+        mov     %rax, (%r10)
														
 
															+        jmp     _return_T_done\@
														
 
															+_T_12\@:
														
 
															+        vmovq   %xmm9, %rax
														
 
															+        mov     %rax, (%r10)
														
 
															+        vpsrldq $8, %xmm9, %xmm9
														
 
															+        vmovd   %xmm9, %eax
														
 
															+        mov     %eax, 8(%r10)
														
 
															+        jmp     _return_T_done\@
														
 
															+
														
 
															+_T_16\@:
														
 
															+        vmovdqu %xmm9, (%r10)
														
 
															+
														
 
															+_return_T_done\@:
														
 
															+        mov     %r14, %rsp
														
 
															+
														
 
															+        pop     %r15
														
 
															+        pop     %r14
														
 
															+        pop     %r13
														
 
															+        pop     %r12
														
 
															+.endm
														
 
															+
														
 
															+
														
 
															+#############################################################
														
 
															+#void   aesni_gcm_precomp_avx_gen2
														
 
															+#        (gcm_data     *my_ctx_data,
														
 
															+#        u8     *hash_subkey)# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */
														
 
															+#############################################################
														
 
															+ENTRY(aesni_gcm_precomp_avx_gen2)
														
 
															+        #the number of pushes must equal STACK_OFFSET
														
 
															+        push    %r12
														
 
															+        push    %r13
														
 
															+        push    %r14
														
 
															+        push    %r15
														
 
															+
														
 
															+        mov     %rsp, %r14
														
 
															+
														
 
															+
														
 
															+
														
 
															+        sub     $VARIABLE_OFFSET, %rsp
														
 
															+        and     $~63, %rsp                  # align rsp to 64 bytes
														
 
															+
														
 
															+        vmovdqu  (arg2), %xmm6              # xmm6 = HashKey
														
 
															+
														
 
															+        vpshufb  SHUF_MASK(%rip), %xmm6, %xmm6
														
 
															+        ###############  PRECOMPUTATION of HashKey<<1 mod poly from the HashKey
														
 
															+        vmovdqa  %xmm6, %xmm2
														
 
															+        vpsllq   $1, %xmm6, %xmm6
														
 
															+        vpsrlq   $63, %xmm2, %xmm2
														
 
															+        vmovdqa  %xmm2, %xmm1
														
 
															+        vpslldq  $8, %xmm2, %xmm2
														
 
															+        vpsrldq  $8, %xmm1, %xmm1
														
 
															+        vpor     %xmm2, %xmm6, %xmm6
														
 
															+        #reduction
														
 
															+        vpshufd  $0b00100100, %xmm1, %xmm2
														
 
															+        vpcmpeqd TWOONE(%rip), %xmm2, %xmm2
														
 
															+        vpand    POLY(%rip), %xmm2, %xmm2
														
 
															+        vpxor    %xmm2, %xmm6, %xmm6        # xmm6 holds the HashKey<<1 mod poly
														
 
															+        #######################################################################
														
 
															+        vmovdqa  %xmm6, HashKey(arg1)       # store HashKey<<1 mod poly
														
 
															+
														
 
															+
														
 
															+        PRECOMPUTE_AVX  %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
														
 
															+
														
 
															+        mov     %r14, %rsp
														
 
															+
														
 
															+        pop     %r15
														
 
															+        pop     %r14
														
 
															+        pop     %r13
														
 
															+        pop     %r12
														
 
															+        ret
														
 
															+ENDPROC(aesni_gcm_precomp_avx_gen2)
														
 
															+
														
 
															+###############################################################################
														
 
															+#void   aesni_gcm_enc_avx_gen2(
														
 
															+#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
														
 
															+#        u8      *out, /* Ciphertext output. Encrypt in-place is allowed.  */
														
 
															+#        const   u8 *in, /* Plaintext input */
														
 
															+#        u64     plaintext_len, /* Length of data in Bytes for encryption. */
														
 
															+#        u8      *iv, /* Pre-counter block j0: 4 byte salt
														
 
															+#			(from Security Association) concatenated with 8 byte
														
 
															+#			Initialisation Vector (from IPSec ESP Payload)
														
 
															+#			concatenated with 0x00000001. 16-byte aligned pointer. */
														
 
															+#        const   u8 *aad, /* Additional Authentication Data (AAD)*/
														
 
															+#        u64     aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
														
 
															+#        u8      *auth_tag, /* Authenticated Tag output. */
														
 
															+#        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
														
 
															+#				Valid values are 16 (most likely), 12 or 8. */
														
 
															+###############################################################################
														
 
															+ENTRY(aesni_gcm_enc_avx_gen2)
														
 
															+        GCM_ENC_DEC_AVX     ENC
														
 
															+	ret
														
 
															+ENDPROC(aesni_gcm_enc_avx_gen2)
														
 
															+
														
 
															+###############################################################################
														
 
															+#void   aesni_gcm_dec_avx_gen2(
														
 
															+#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
														
 
															+#        u8      *out, /* Plaintext output. Decrypt in-place is allowed.  */
														
 
															+#        const   u8 *in, /* Ciphertext input */
														
 
															+#        u64     plaintext_len, /* Length of data in Bytes for encryption. */
														
 
															+#        u8      *iv, /* Pre-counter block j0: 4 byte salt
														
 
															+#			(from Security Association) concatenated with 8 byte
														
 
															+#			Initialisation Vector (from IPSec ESP Payload)
														
 
															+#			concatenated with 0x00000001. 16-byte aligned pointer. */
														
 
															+#        const   u8 *aad, /* Additional Authentication Data (AAD)*/
														
 
															+#        u64     aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
														
 
															+#        u8      *auth_tag, /* Authenticated Tag output. */
														
 
															+#        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
														
 
															+#				Valid values are 16 (most likely), 12 or 8. */
														
 
															+###############################################################################
														
 
															+ENTRY(aesni_gcm_dec_avx_gen2)
														
 
															+        GCM_ENC_DEC_AVX     DEC
														
 
															+	ret
														
 
															+ENDPROC(aesni_gcm_dec_avx_gen2)
														
 
															+#endif /* CONFIG_AS_AVX */
														
 
															+
														
 
															+#ifdef CONFIG_AS_AVX2
														
 
															+###############################################################################
														
 
															+# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
														
 
															+# Input: A and B (128-bits each, bit-reflected)
														
 
															+# Output: C = A*B*x mod poly, (i.e. >>1 )
														
 
															+# To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
														
 
															+# GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
														
 
															+###############################################################################
														
 
															+.macro  GHASH_MUL_AVX2 GH HK T1 T2 T3 T4 T5
														
 
															+
														
 
															+        vpclmulqdq      $0x11,\HK,\GH,\T1      # T1 = a1*b1
														
 
															+        vpclmulqdq      $0x00,\HK,\GH,\T2      # T2 = a0*b0
														
 
															+        vpclmulqdq      $0x01,\HK,\GH,\T3      # T3 = a1*b0
														
 
															+        vpclmulqdq      $0x10,\HK,\GH,\GH      # GH = a0*b1
														
 
															+        vpxor           \T3, \GH, \GH
														
 
															+
														
 
															+
														
 
															+        vpsrldq         $8 , \GH, \T3          # shift-R GH 2 DWs
														
 
															+        vpslldq         $8 , \GH, \GH          # shift-L GH 2 DWs
														
 
															+
														
 
															+        vpxor           \T3, \T1, \T1
														
 
															+        vpxor           \T2, \GH, \GH
														
 
															+
														
 
															+        #######################################################################
														
 
															+        #first phase of the reduction
														
 
															+        vmovdqa         POLY2(%rip), \T3
														
 
															+
														
 
															+        vpclmulqdq      $0x01, \GH, \T3, \T2
														
 
															+        vpslldq         $8, \T2, \T2           # shift-L T2 2 DWs
														
 
															+
														
 
															+        vpxor           \T2, \GH, \GH          # first phase of the reduction complete
														
 
															+        #######################################################################
														
 
															+        #second phase of the reduction
														
 
															+        vpclmulqdq      $0x00, \GH, \T3, \T2
														
 
															+        vpsrldq         $4, \T2, \T2           # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
														
 
															+
														
 
															+        vpclmulqdq      $0x10, \GH, \T3, \GH
														
 
															+        vpslldq         $4, \GH, \GH           # shift-L GH 1 DW (Shift-L 1-DW to obtain result with no shifts)
														
 
															+
														
 
															+        vpxor           \T2, \GH, \GH          # second phase of the reduction complete
														
 
															+        #######################################################################
														
 
															+        vpxor           \T1, \GH, \GH          # the result is in GH
														
 
															+
														
 
															+
														
 
															+.endm
														
 
															+
														
 
															+.macro PRECOMPUTE_AVX2 HK T1 T2 T3 T4 T5 T6
														
 
															+
														
 
															+        # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
														
 
															+        vmovdqa  \HK, \T5
														
 
															+        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^2<<1 mod poly
														
 
															+        vmovdqa  \T5, HashKey_2(arg1)                       #  [HashKey_2] = HashKey^2<<1 mod poly
														
 
															+
														
 
															+        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^3<<1 mod poly
														
 
															+        vmovdqa  \T5, HashKey_3(arg1)
														
 
															+
														
 
															+        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^4<<1 mod poly
														
 
															+        vmovdqa  \T5, HashKey_4(arg1)
														
 
															+
														
 
															+        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^5<<1 mod poly
														
 
															+        vmovdqa  \T5, HashKey_5(arg1)
														
 
															+
														
 
															+        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^6<<1 mod poly
														
 
															+        vmovdqa  \T5, HashKey_6(arg1)
														
 
															+
														
 
															+        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^7<<1 mod poly
														
 
															+        vmovdqa  \T5, HashKey_7(arg1)
														
 
															+
														
 
															+        GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2    #  T5 = HashKey^8<<1 mod poly
														
 
															+        vmovdqa  \T5, HashKey_8(arg1)
														
 
															+
														
 
															+.endm
														
 
															+
														
 
															+
														
 
															+## if a = number of total plaintext bytes
														
 
															+## b = floor(a/16)
														
 
															+## num_initial_blocks = b mod 4#
														
 
															+## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
														
 
															+## r10, r11, r12, rax are clobbered
														
 
															+## arg1, arg2, arg3, r14 are used as a pointer only, not modified
														
 
															+
														
 
															+.macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
														
 
															+	i = (8-\num_initial_blocks)
														
 
															+	setreg
														
 
															+
														
 
															+        mov     arg6, %r10                       # r10 = AAD
														
 
															+        mov     arg7, %r12                       # r12 = aadLen
														
 
															+
														
 
															+
														
 
															+        mov     %r12, %r11
														
 
															+
														
 
															+        vpxor   reg_i, reg_i, reg_i
														
 
															+_get_AAD_loop\@:
														
 
															+        vmovd   (%r10), \T1
														
 
															+        vpslldq $12, \T1, \T1
														
 
															+        vpsrldq $4, reg_i, reg_i
														
 
															+        vpxor   \T1, reg_i, reg_i
														
 
															+
														
 
															+        add     $4, %r10
														
 
															+        sub     $4, %r12
														
 
															+        jg      _get_AAD_loop\@
														
 
															+
														
 
															+
														
 
															+        cmp     $16, %r11
														
 
															+        je      _get_AAD_loop2_done\@
														
 
															+        mov     $16, %r12
														
 
															+
														
 
															+_get_AAD_loop2\@:
														
 
															+        vpsrldq $4, reg_i, reg_i
														
 
															+        sub     $4, %r12
														
 
															+        cmp     %r11, %r12
														
 
															+        jg      _get_AAD_loop2\@
														
 
															+
														
 
															+_get_AAD_loop2_done\@:
														
 
															+
														
 
															+        #byte-reflect the AAD data
														
 
															+        vpshufb SHUF_MASK(%rip), reg_i, reg_i
														
 
															+
														
 
															+	# initialize the data pointer offset as zero
														
 
															+	xor     %r11, %r11
														
 
															+
														
 
															+	# start AES for num_initial_blocks blocks
														
 
															+	mov     arg5, %rax                     # rax = *Y0
														
 
															+	vmovdqu (%rax), \CTR                   # CTR = Y0
														
 
															+	vpshufb SHUF_MASK(%rip), \CTR, \CTR
														
 
															+
														
 
															+
														
 
															+	i = (9-\num_initial_blocks)
														
 
															+	setreg
														
 
															+.rep \num_initial_blocks
														
 
															+                vpaddd  ONE(%rip), \CTR, \CTR   # INCR Y0
														
 
															+                vmovdqa \CTR, reg_i
														
 
															+                vpshufb SHUF_MASK(%rip), reg_i, reg_i     # perform a 16Byte swap
														
 
															+	i = (i+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+
														
 
															+	vmovdqa  (arg1), \T_key
														
 
															+	i = (9-\num_initial_blocks)
														
 
															+	setreg
														
 
															+.rep \num_initial_blocks
														
 
															+                vpxor   \T_key, reg_i, reg_i
														
 
															+	i = (i+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+
														
 
															+	j = 1
														
 
															+	setreg
														
 
															+.rep 9
														
 
															+	vmovdqa  16*j(arg1), \T_key
														
 
															+	i = (9-\num_initial_blocks)
														
 
															+	setreg
														
 
															+.rep \num_initial_blocks
														
 
															+        vaesenc \T_key, reg_i, reg_i
														
 
															+	i = (i+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+
														
 
															+	j = (j+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+
														
 
															+
														
 
															+	vmovdqa  16*10(arg1), \T_key
														
 
															+	i = (9-\num_initial_blocks)
														
 
															+	setreg
														
 
															+.rep \num_initial_blocks
														
 
															+        vaesenclast      \T_key, reg_i, reg_i
														
 
															+	i = (i+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+
														
 
															+	i = (9-\num_initial_blocks)
														
 
															+	setreg
														
 
															+.rep \num_initial_blocks
														
 
															+                vmovdqu (arg3, %r11), \T1
														
 
															+                vpxor   \T1, reg_i, reg_i
														
 
															+                vmovdqu reg_i, (arg2 , %r11)           # write back ciphertext for
														
 
															+						       # num_initial_blocks blocks
														
 
															+                add     $16, %r11
														
 
															+.if  \ENC_DEC == DEC
														
 
															+                vmovdqa \T1, reg_i
														
 
															+.endif
														
 
															+                vpshufb SHUF_MASK(%rip), reg_i, reg_i  # prepare ciphertext for GHASH computations
														
 
															+	i = (i+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+
														
 
															+
														
 
															+	i = (8-\num_initial_blocks)
														
 
															+	j = (9-\num_initial_blocks)
														
 
															+	setreg
														
 
															+        GHASH_MUL_AVX2       reg_i, \T2, \T1, \T3, \T4, \T5, \T6
														
 
															+
														
 
															+.rep \num_initial_blocks
														
 
															+        vpxor    reg_i, reg_j, reg_j
														
 
															+        GHASH_MUL_AVX2       reg_j, \T2, \T1, \T3, \T4, \T5, \T6  # apply GHASH on num_initial_blocks blocks
														
 
															+	i = (i+1)
														
 
															+	j = (j+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+        # XMM8 has the combined result here
														
 
															+
														
 
															+        vmovdqa  \XMM8, TMP1(%rsp)
														
 
															+        vmovdqa  \XMM8, \T3
														
 
															+
														
 
															+        cmp     $128, %r13
														
 
															+        jl      _initial_blocks_done\@                  # no need for precomputed constants
														
 
															+
														
 
															+###############################################################################
														
 
															+# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM1
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM1, \XMM1  # perform a 16Byte swap
														
 
															+
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM2
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM2, \XMM2  # perform a 16Byte swap
														
 
															+
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM3
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM3, \XMM3  # perform a 16Byte swap
														
 
															+
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM4
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM4, \XMM4  # perform a 16Byte swap
														
 
															+
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM5
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM5, \XMM5  # perform a 16Byte swap
														
 
															+
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM6
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM6, \XMM6  # perform a 16Byte swap
														
 
															+
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM7
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM7, \XMM7  # perform a 16Byte swap
														
 
															+
														
 
															+                vpaddd   ONE(%rip), \CTR, \CTR          # INCR Y0
														
 
															+                vmovdqa  \CTR, \XMM8
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM8, \XMM8  # perform a 16Byte swap
														
 
															+
														
 
															+                vmovdqa  (arg1), \T_key
														
 
															+                vpxor    \T_key, \XMM1, \XMM1
														
 
															+                vpxor    \T_key, \XMM2, \XMM2
														
 
															+                vpxor    \T_key, \XMM3, \XMM3
														
 
															+                vpxor    \T_key, \XMM4, \XMM4
														
 
															+                vpxor    \T_key, \XMM5, \XMM5
														
 
															+                vpxor    \T_key, \XMM6, \XMM6
														
 
															+                vpxor    \T_key, \XMM7, \XMM7
														
 
															+                vpxor    \T_key, \XMM8, \XMM8
														
 
															+
														
 
															+		i = 1
														
 
															+		setreg
														
 
															+.rep    9       # do 9 rounds
														
 
															+                vmovdqa  16*i(arg1), \T_key
														
 
															+                vaesenc  \T_key, \XMM1, \XMM1
														
 
															+                vaesenc  \T_key, \XMM2, \XMM2
														
 
															+                vaesenc  \T_key, \XMM3, \XMM3
														
 
															+                vaesenc  \T_key, \XMM4, \XMM4
														
 
															+                vaesenc  \T_key, \XMM5, \XMM5
														
 
															+                vaesenc  \T_key, \XMM6, \XMM6
														
 
															+                vaesenc  \T_key, \XMM7, \XMM7
														
 
															+                vaesenc  \T_key, \XMM8, \XMM8
														
 
															+		i = (i+1)
														
 
															+		setreg
														
 
															+.endr
														
 
															+
														
 
															+
														
 
															+                vmovdqa  16*i(arg1), \T_key
														
 
															+                vaesenclast  \T_key, \XMM1, \XMM1
														
 
															+                vaesenclast  \T_key, \XMM2, \XMM2
														
 
															+                vaesenclast  \T_key, \XMM3, \XMM3
														
 
															+                vaesenclast  \T_key, \XMM4, \XMM4
														
 
															+                vaesenclast  \T_key, \XMM5, \XMM5
														
 
															+                vaesenclast  \T_key, \XMM6, \XMM6
														
 
															+                vaesenclast  \T_key, \XMM7, \XMM7
														
 
															+                vaesenclast  \T_key, \XMM8, \XMM8
														
 
															+
														
 
															+                vmovdqu  (arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM1, \XMM1
														
 
															+                vmovdqu  \XMM1, (arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM1
														
 
															+                .endif
														
 
															+
														
 
															+                vmovdqu  16*1(arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM2, \XMM2
														
 
															+                vmovdqu  \XMM2, 16*1(arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM2
														
 
															+                .endif
														
 
															+
														
 
															+                vmovdqu  16*2(arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM3, \XMM3
														
 
															+                vmovdqu  \XMM3, 16*2(arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM3
														
 
															+                .endif
														
 
															+
														
 
															+                vmovdqu  16*3(arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM4, \XMM4
														
 
															+                vmovdqu  \XMM4, 16*3(arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM4
														
 
															+                .endif
														
 
															+
														
 
															+                vmovdqu  16*4(arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM5, \XMM5
														
 
															+                vmovdqu  \XMM5, 16*4(arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM5
														
 
															+                .endif
														
 
															+
														
 
															+                vmovdqu  16*5(arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM6, \XMM6
														
 
															+                vmovdqu  \XMM6, 16*5(arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM6
														
 
															+                .endif
														
 
															+
														
 
															+                vmovdqu  16*6(arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM7, \XMM7
														
 
															+                vmovdqu  \XMM7, 16*6(arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM7
														
 
															+                .endif
														
 
															+
														
 
															+                vmovdqu  16*7(arg3, %r11), \T1
														
 
															+                vpxor    \T1, \XMM8, \XMM8
														
 
															+                vmovdqu  \XMM8, 16*7(arg2 , %r11)
														
 
															+                .if   \ENC_DEC == DEC
														
 
															+                vmovdqa  \T1, \XMM8
														
 
															+                .endif
														
 
															+
														
 
															+                add     $128, %r11
														
 
															+
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM1, \XMM1     # perform a 16Byte swap
														
 
															+                vpxor    TMP1(%rsp), \XMM1, \XMM1          # combine GHASHed value with
														
 
															+							   # the corresponding ciphertext
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM2, \XMM2     # perform a 16Byte swap
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM3, \XMM3     # perform a 16Byte swap
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM4, \XMM4     # perform a 16Byte swap
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM5, \XMM5     # perform a 16Byte swap
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM6, \XMM6     # perform a 16Byte swap
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM7, \XMM7     # perform a 16Byte swap
														
 
															+                vpshufb  SHUF_MASK(%rip), \XMM8, \XMM8     # perform a 16Byte swap
														
 
															+
														
 
															+###############################################################################
														
 
															+
														
 
															+_initial_blocks_done\@:
														
 
															+
														
 
															+
														
 
															+.endm
														
 
															+
														
 
															+
														
 
															+
														
 
															+# encrypt 8 blocks at a time
														
 
															+# ghash the 8 previously encrypted ciphertext blocks
														
 
															+# arg1, arg2, arg3 are used as pointers only, not modified
														
 
															+# r11 is the data offset value
														
 
															+.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
														
 
															+
														
 
															+        vmovdqa \XMM1, \T2
														
 
															+        vmovdqa \XMM2, TMP2(%rsp)
														
 
															+        vmovdqa \XMM3, TMP3(%rsp)
														
 
															+        vmovdqa \XMM4, TMP4(%rsp)
														
 
															+        vmovdqa \XMM5, TMP5(%rsp)
														
 
															+        vmovdqa \XMM6, TMP6(%rsp)
														
 
															+        vmovdqa \XMM7, TMP7(%rsp)
														
 
															+        vmovdqa \XMM8, TMP8(%rsp)
														
 
															+
														
 
															+.if \loop_idx == in_order
														
 
															+                vpaddd  ONE(%rip), \CTR, \XMM1            # INCR CNT
														
 
															+                vpaddd  ONE(%rip), \XMM1, \XMM2
														
 
															+                vpaddd  ONE(%rip), \XMM2, \XMM3
														
 
															+                vpaddd  ONE(%rip), \XMM3, \XMM4
														
 
															+                vpaddd  ONE(%rip), \XMM4, \XMM5
														
 
															+                vpaddd  ONE(%rip), \XMM5, \XMM6
														
 
															+                vpaddd  ONE(%rip), \XMM6, \XMM7
														
 
															+                vpaddd  ONE(%rip), \XMM7, \XMM8
														
 
															+                vmovdqa \XMM8, \CTR
														
 
															+
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM1, \XMM1     # perform a 16Byte swap
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM2, \XMM2     # perform a 16Byte swap
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM3, \XMM3     # perform a 16Byte swap
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM4, \XMM4     # perform a 16Byte swap
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM5, \XMM5     # perform a 16Byte swap
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM6, \XMM6     # perform a 16Byte swap
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM7, \XMM7     # perform a 16Byte swap
														
 
															+                vpshufb SHUF_MASK(%rip), \XMM8, \XMM8     # perform a 16Byte swap
														
 
															+.else
														
 
															+                vpaddd  ONEf(%rip), \CTR, \XMM1            # INCR CNT
														
 
															+                vpaddd  ONEf(%rip), \XMM1, \XMM2
														
 
															+                vpaddd  ONEf(%rip), \XMM2, \XMM3
														
 
															+                vpaddd  ONEf(%rip), \XMM3, \XMM4
														
 
															+                vpaddd  ONEf(%rip), \XMM4, \XMM5
														
 
															+                vpaddd  ONEf(%rip), \XMM5, \XMM6
														
 
															+                vpaddd  ONEf(%rip), \XMM6, \XMM7
														
 
															+                vpaddd  ONEf(%rip), \XMM7, \XMM8
														
 
															+                vmovdqa \XMM8, \CTR
														
 
															+.endif
														
 
															+
														
 
															+
														
 
															+        #######################################################################
														
 
															+
														
 
															+                vmovdqu (arg1), \T1
														
 
															+                vpxor   \T1, \XMM1, \XMM1
														
 
															+                vpxor   \T1, \XMM2, \XMM2
														
 
															+                vpxor   \T1, \XMM3, \XMM3
														
 
															+                vpxor   \T1, \XMM4, \XMM4
														
 
															+                vpxor   \T1, \XMM5, \XMM5
														
 
															+                vpxor   \T1, \XMM6, \XMM6
														
 
															+                vpxor   \T1, \XMM7, \XMM7
														
 
															+                vpxor   \T1, \XMM8, \XMM8
														
 
															+
														
 
															+        #######################################################################
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+                vmovdqu 16*1(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+                vmovdqu 16*2(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+
														
 
															+        #######################################################################
														
 
															+
														
 
															+        vmovdqa         HashKey_8(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T2, \T4              # T4 = a1*b1
														
 
															+        vpclmulqdq      $0x00, \T5, \T2, \T7              # T7 = a0*b0
														
 
															+        vpclmulqdq      $0x01, \T5, \T2, \T6              # T6 = a1*b0
														
 
															+        vpclmulqdq      $0x10, \T5, \T2, \T5              # T5 = a0*b1
														
 
															+        vpxor           \T5, \T6, \T6
														
 
															+
														
 
															+                vmovdqu 16*3(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+        vmovdqa         TMP2(%rsp), \T1
														
 
															+        vmovdqa         HashKey_7(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T4, \T4
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T7, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x01, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x10, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+                vmovdqu 16*4(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+        #######################################################################
														
 
															+
														
 
															+        vmovdqa         TMP3(%rsp), \T1
														
 
															+        vmovdqa         HashKey_6(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T4, \T4
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T7, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x01, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x10, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+                vmovdqu 16*5(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+        vmovdqa         TMP4(%rsp), \T1
														
 
															+        vmovdqa         HashKey_5(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T4, \T4
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T7, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x01, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x10, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+                vmovdqu 16*6(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+
														
 
															+        vmovdqa         TMP5(%rsp), \T1
														
 
															+        vmovdqa         HashKey_4(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T4, \T4
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T7, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x01, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x10, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+                vmovdqu 16*7(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+        vmovdqa         TMP6(%rsp), \T1
														
 
															+        vmovdqa         HashKey_3(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T4, \T4
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T7, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x01, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x10, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+                vmovdqu 16*8(arg1), \T1
														
 
															+                vaesenc \T1, \XMM1, \XMM1
														
 
															+                vaesenc \T1, \XMM2, \XMM2
														
 
															+                vaesenc \T1, \XMM3, \XMM3
														
 
															+                vaesenc \T1, \XMM4, \XMM4
														
 
															+                vaesenc \T1, \XMM5, \XMM5
														
 
															+                vaesenc \T1, \XMM6, \XMM6
														
 
															+                vaesenc \T1, \XMM7, \XMM7
														
 
															+                vaesenc \T1, \XMM8, \XMM8
														
 
															+
														
 
															+        vmovdqa         TMP7(%rsp), \T1
														
 
															+        vmovdqa         HashKey_2(arg1), \T5
														
 
															+        vpclmulqdq      $0x11, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T4, \T4
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T7, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x01, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x10, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+
														
 
															+        #######################################################################
														
 
															+
														
 
															+                vmovdqu 16*9(arg1), \T5
														
 
															+                vaesenc \T5, \XMM1, \XMM1
														
 
															+                vaesenc \T5, \XMM2, \XMM2
														
 
															+                vaesenc \T5, \XMM3, \XMM3
														
 
															+                vaesenc \T5, \XMM4, \XMM4
														
 
															+                vaesenc \T5, \XMM5, \XMM5
														
 
															+                vaesenc \T5, \XMM6, \XMM6
														
 
															+                vaesenc \T5, \XMM7, \XMM7
														
 
															+                vaesenc \T5, \XMM8, \XMM8
														
 
															+
														
 
															+        vmovdqa         TMP8(%rsp), \T1
														
 
															+        vmovdqa         HashKey(arg1), \T5
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T7, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x01, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x10, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x11, \T5, \T1, \T3
														
 
															+        vpxor           \T3, \T4, \T1
														
 
															+
														
 
															+
														
 
															+                vmovdqu 16*10(arg1), \T5
														
 
															+
														
 
															+	i = 0
														
 
															+	j = 1
														
 
															+	setreg
														
 
															+.rep 8
														
 
															+		vpxor	16*i(arg3, %r11), \T5, \T2
														
 
															+                .if \ENC_DEC == ENC
														
 
															+                vaesenclast     \T2, reg_j, reg_j
														
 
															+                .else
														
 
															+                vaesenclast     \T2, reg_j, \T3
														
 
															+                vmovdqu 16*i(arg3, %r11), reg_j
														
 
															+                vmovdqu \T3, 16*i(arg2, %r11)
														
 
															+                .endif
														
 
															+	i = (i+1)
														
 
															+	j = (j+1)
														
 
															+	setreg
														
 
															+.endr
														
 
															+	#######################################################################
														
 
															+
														
 
															+
														
 
															+	vpslldq	$8, \T6, \T3				# shift-L T3 2 DWs
														
 
															+	vpsrldq	$8, \T6, \T6				# shift-R T2 2 DWs
														
 
															+	vpxor	\T3, \T7, \T7
														
 
															+	vpxor	\T6, \T1, \T1				# accumulate the results in T1:T7
														
 
															+
														
 
															+
														
 
															+
														
 
															+	#######################################################################
														
 
															+	#first phase of the reduction
														
 
															+	vmovdqa         POLY2(%rip), \T3
														
 
															+
														
 
															+	vpclmulqdq	$0x01, \T7, \T3, \T2
														
 
															+	vpslldq		$8, \T2, \T2			# shift-L xmm2 2 DWs
														
 
															+
														
 
															+	vpxor		\T2, \T7, \T7			# first phase of the reduction complete
														
 
															+	#######################################################################
														
 
															+                .if \ENC_DEC == ENC
														
 
															+		vmovdqu	 \XMM1,	16*0(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+		vmovdqu	 \XMM2,	16*1(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+		vmovdqu	 \XMM3,	16*2(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+		vmovdqu	 \XMM4,	16*3(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+		vmovdqu	 \XMM5,	16*4(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+		vmovdqu	 \XMM6,	16*5(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+		vmovdqu	 \XMM7,	16*6(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+		vmovdqu	 \XMM8,	16*7(arg2,%r11)		# Write to the Ciphertext buffer
														
 
															+                .endif
														
 
															+
														
 
															+	#######################################################################
														
 
															+	#second phase of the reduction
														
 
															+	vpclmulqdq	$0x00, \T7, \T3, \T2
														
 
															+	vpsrldq		$4, \T2, \T2			# shift-R xmm2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
														
 
															+
														
 
															+	vpclmulqdq	$0x10, \T7, \T3, \T4
														
 
															+	vpslldq		$4, \T4, \T4			# shift-L xmm0 1 DW (Shift-L 1-DW to obtain result with no shifts)
														
 
															+
														
 
															+	vpxor		\T2, \T4, \T4			# second phase of the reduction complete
														
 
															+	#######################################################################
														
 
															+	vpxor		\T4, \T1, \T1			# the result is in T1
														
 
															+
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM1, \XMM1	# perform a 16Byte swap
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM2, \XMM2	# perform a 16Byte swap
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM3, \XMM3	# perform a 16Byte swap
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM4, \XMM4	# perform a 16Byte swap
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM5, \XMM5	# perform a 16Byte swap
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM6, \XMM6	# perform a 16Byte swap
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM7, \XMM7	# perform a 16Byte swap
														
 
															+		vpshufb	SHUF_MASK(%rip), \XMM8, \XMM8	# perform a 16Byte swap
														
 
															+
														
 
															+
														
 
															+	vpxor	\T1, \XMM1, \XMM1
														
 
															+
														
 
															+
														
 
															+
														
 
															+.endm
														
 
															+
														
 
															+
														
 
															+# GHASH the last 4 ciphertext blocks.
														
 
															+.macro  GHASH_LAST_8_AVX2 T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8
														
 
															+
														
 
															+        ## Karatsuba Method
														
 
															+
														
 
															+        vmovdqa         HashKey_8(arg1), \T5
														
 
															+
														
 
															+        vpshufd         $0b01001110, \XMM1, \T2
														
 
															+        vpshufd         $0b01001110, \T5, \T3
														
 
															+        vpxor           \XMM1, \T2, \T2
														
 
															+        vpxor           \T5, \T3, \T3
														
 
															+
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM1, \T6
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM1, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \XMM1
														
 
															+
														
 
															+        ######################
														
 
															+
														
 
															+        vmovdqa         HashKey_7(arg1), \T5
														
 
															+        vpshufd         $0b01001110, \XMM2, \T2
														
 
															+        vpshufd         $0b01001110, \T5, \T3
														
 
															+        vpxor           \XMM2, \T2, \T2
														
 
															+        vpxor           \T5, \T3, \T3
														
 
															+
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM2, \T4
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM2, \T4
														
 
															+        vpxor           \T4, \T7, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2
														
 
															+
														
 
															+        vpxor           \T2, \XMM1, \XMM1
														
 
															+
														
 
															+        ######################
														
 
															+
														
 
															+        vmovdqa         HashKey_6(arg1), \T5
														
 
															+        vpshufd         $0b01001110, \XMM3, \T2
														
 
															+        vpshufd         $0b01001110, \T5, \T3
														
 
															+        vpxor           \XMM3, \T2, \T2
														
 
															+        vpxor           \T5, \T3, \T3
														
 
															+
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM3, \T4
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM3, \T4
														
 
															+        vpxor           \T4, \T7, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2
														
 
															+
														
 
															+        vpxor           \T2, \XMM1, \XMM1
														
 
															+
														
 
															+        ######################
														
 
															+
														
 
															+        vmovdqa         HashKey_5(arg1), \T5
														
 
															+        vpshufd         $0b01001110, \XMM4, \T2
														
 
															+        vpshufd         $0b01001110, \T5, \T3
														
 
															+        vpxor           \XMM4, \T2, \T2
														
 
															+        vpxor           \T5, \T3, \T3
														
 
															+
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM4, \T4
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM4, \T4
														
 
															+        vpxor           \T4, \T7, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2
														
 
															+
														
 
															+        vpxor           \T2, \XMM1, \XMM1
														
 
															+
														
 
															+        ######################
														
 
															+
														
 
															+        vmovdqa         HashKey_4(arg1), \T5
														
 
															+        vpshufd         $0b01001110, \XMM5, \T2
														
 
															+        vpshufd         $0b01001110, \T5, \T3
														
 
															+        vpxor           \XMM5, \T2, \T2
														
 
															+        vpxor           \T5, \T3, \T3
														
 
															+
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM5, \T4
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM5, \T4
														
 
															+        vpxor           \T4, \T7, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2
														
 
															+
														
 
															+        vpxor           \T2, \XMM1, \XMM1
														
 
															+
														
 
															+        ######################
														
 
															+
														
 
															+        vmovdqa         HashKey_3(arg1), \T5
														
 
															+        vpshufd         $0b01001110, \XMM6, \T2
														
 
															+        vpshufd         $0b01001110, \T5, \T3
														
 
															+        vpxor           \XMM6, \T2, \T2
														
 
															+        vpxor           \T5, \T3, \T3
														
 
															+
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM6, \T4
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM6, \T4
														
 
															+        vpxor           \T4, \T7, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2
														
 
															+
														
 
															+        vpxor           \T2, \XMM1, \XMM1
														
 
															+
														
 
															+        ######################
														
 
															+
														
 
															+        vmovdqa         HashKey_2(arg1), \T5
														
 
															+        vpshufd         $0b01001110, \XMM7, \T2
														
 
															+        vpshufd         $0b01001110, \T5, \T3
														
 
															+        vpxor           \XMM7, \T2, \T2
														
 
															+        vpxor           \T5, \T3, \T3
														
 
															+
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM7, \T4
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM7, \T4
														
 
															+        vpxor           \T4, \T7, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2
														
 
															+
														
 
															+        vpxor           \T2, \XMM1, \XMM1
														
 
															+
														
 
															+        ######################
														
 
															+
														
 
															+        vmovdqa         HashKey(arg1), \T5
														
 
															+        vpshufd         $0b01001110, \XMM8, \T2
														
 
															+        vpshufd         $0b01001110, \T5, \T3
														
 
															+        vpxor           \XMM8, \T2, \T2
														
 
															+        vpxor           \T5, \T3, \T3
														
 
															+
														
 
															+        vpclmulqdq      $0x11, \T5, \XMM8, \T4
														
 
															+        vpxor           \T4, \T6, \T6
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T5, \XMM8, \T4
														
 
															+        vpxor           \T4, \T7, \T7
														
 
															+
														
 
															+        vpclmulqdq      $0x00, \T3, \T2, \T2
														
 
															+
														
 
															+        vpxor           \T2, \XMM1, \XMM1
														
 
															+        vpxor           \T6, \XMM1, \XMM1
														
 
															+        vpxor           \T7, \XMM1, \T2
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+        vpslldq $8, \T2, \T4
														
 
															+        vpsrldq $8, \T2, \T2
														
 
															+
														
 
															+        vpxor   \T4, \T7, \T7
														
 
															+        vpxor   \T2, \T6, \T6                      # <T6:T7> holds the result of the
														
 
															+						   # accumulated carry-less multiplications
														
 
															+
														
 
															+        #######################################################################
														
 
															+        #first phase of the reduction
														
 
															+        vmovdqa         POLY2(%rip), \T3
														
 
															+
														
 
															+        vpclmulqdq      $0x01, \T7, \T3, \T2
														
 
															+        vpslldq         $8, \T2, \T2               # shift-L xmm2 2 DWs
														
 
															+
														
 
															+        vpxor           \T2, \T7, \T7              # first phase of the reduction complete
														
 
															+        #######################################################################
														
 
															+
														
 
															+
														
 
															+        #second phase of the reduction
														
 
															+        vpclmulqdq      $0x00, \T7, \T3, \T2
														
 
															+        vpsrldq         $4, \T2, \T2               # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R)
														
 
															+
														
 
															+        vpclmulqdq      $0x10, \T7, \T3, \T4
														
 
															+        vpslldq         $4, \T4, \T4               # shift-L T4 1 DW (Shift-L 1-DW to obtain result with no shifts)
														
 
															+
														
 
															+        vpxor           \T2, \T4, \T4              # second phase of the reduction complete
														
 
															+        #######################################################################
														
 
															+        vpxor           \T4, \T6, \T6              # the result is in T6
														
 
															+.endm
														
 
															+
														
 
															+
														
 
															+
														
 
															+# combined for GCM encrypt and decrypt functions
														
 
															+# clobbering all xmm registers
														
 
															+# clobbering r10, r11, r12, r13, r14, r15
														
 
															+.macro  GCM_ENC_DEC_AVX2     ENC_DEC
														
 
															+
														
 
															+        #the number of pushes must equal STACK_OFFSET
														
 
															+        push    %r12
														
 
															+        push    %r13
														
 
															+        push    %r14
														
 
															+        push    %r15
														
 
															+
														
 
															+        mov     %rsp, %r14
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+        sub     $VARIABLE_OFFSET, %rsp
														
 
															+        and     $~63, %rsp                         # align rsp to 64 bytes
														
 
															+
														
 
															+
														
 
															+        vmovdqu  HashKey(arg1), %xmm13             # xmm13 = HashKey
														
 
															+
														
 
															+        mov     arg4, %r13                         # save the number of bytes of plaintext/ciphertext
														
 
															+        and     $-16, %r13                         # r13 = r13 - (r13 mod 16)
														
 
															+
														
 
															+        mov     %r13, %r12
														
 
															+        shr     $4, %r12
														
 
															+        and     $7, %r12
														
 
															+        jz      _initial_num_blocks_is_0\@
														
 
															+
														
 
															+        cmp     $7, %r12
														
 
															+        je      _initial_num_blocks_is_7\@
														
 
															+        cmp     $6, %r12
														
 
															+        je      _initial_num_blocks_is_6\@
														
 
															+        cmp     $5, %r12
														
 
															+        je      _initial_num_blocks_is_5\@
														
 
															+        cmp     $4, %r12
														
 
															+        je      _initial_num_blocks_is_4\@
														
 
															+        cmp     $3, %r12
														
 
															+        je      _initial_num_blocks_is_3\@
														
 
															+        cmp     $2, %r12
														
 
															+        je      _initial_num_blocks_is_2\@
														
 
															+
														
 
															+        jmp     _initial_num_blocks_is_1\@
														
 
															+
														
 
															+_initial_num_blocks_is_7\@:
														
 
															+        INITIAL_BLOCKS_AVX2  7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+        sub     $16*7, %r13
														
 
															+        jmp     _initial_blocks_encrypted\@
														
 
															+
														
 
															+_initial_num_blocks_is_6\@:
														
 
															+        INITIAL_BLOCKS_AVX2  6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+        sub     $16*6, %r13
														
 
															+        jmp     _initial_blocks_encrypted\@
														
 
															+
														
 
															+_initial_num_blocks_is_5\@:
														
 
															+        INITIAL_BLOCKS_AVX2  5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+        sub     $16*5, %r13
														
 
															+        jmp     _initial_blocks_encrypted\@
														
 
															+
														
 
															+_initial_num_blocks_is_4\@:
														
 
															+        INITIAL_BLOCKS_AVX2  4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+        sub     $16*4, %r13
														
 
															+        jmp     _initial_blocks_encrypted\@
														
 
															+
														
 
															+_initial_num_blocks_is_3\@:
														
 
															+        INITIAL_BLOCKS_AVX2  3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+        sub     $16*3, %r13
														
 
															+        jmp     _initial_blocks_encrypted\@
														
 
															+
														
 
															+_initial_num_blocks_is_2\@:
														
 
															+        INITIAL_BLOCKS_AVX2  2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+        sub     $16*2, %r13
														
 
															+        jmp     _initial_blocks_encrypted\@
														
 
															+
														
 
															+_initial_num_blocks_is_1\@:
														
 
															+        INITIAL_BLOCKS_AVX2  1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+        sub     $16*1, %r13
														
 
															+        jmp     _initial_blocks_encrypted\@
														
 
															+
														
 
															+_initial_num_blocks_is_0\@:
														
 
															+        INITIAL_BLOCKS_AVX2  0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
														
 
															+
														
 
															+
														
 
															+_initial_blocks_encrypted\@:
														
 
															+        cmp     $0, %r13
														
 
															+        je      _zero_cipher_left\@
														
 
															+
														
 
															+        sub     $128, %r13
														
 
															+        je      _eight_cipher_left\@
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+        vmovd   %xmm9, %r15d
														
 
															+        and     $255, %r15d
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+
														
 
															+
														
 
															+_encrypt_by_8_new\@:
														
 
															+        cmp     $(255-8), %r15d
														
 
															+        jg      _encrypt_by_8\@
														
 
															+
														
 
															+
														
 
															+
														
 
															+        add     $8, %r15b
														
 
															+        GHASH_8_ENCRYPT_8_PARALLEL_AVX2      %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC
														
 
															+        add     $128, %r11
														
 
															+        sub     $128, %r13
														
 
															+        jne     _encrypt_by_8_new\@
														
 
															+
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+        jmp     _eight_cipher_left\@
														
 
															+
														
 
															+_encrypt_by_8\@:
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+        add     $8, %r15b
														
 
															+        GHASH_8_ENCRYPT_8_PARALLEL_AVX2      %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+        add     $128, %r11
														
 
															+        sub     $128, %r13
														
 
															+        jne     _encrypt_by_8_new\@
														
 
															+
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+_eight_cipher_left\@:
														
 
															+        GHASH_LAST_8_AVX2    %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8
														
 
															+
														
 
															+
														
 
															+_zero_cipher_left\@:
														
 
															+        cmp     $16, arg4
														
 
															+        jl      _only_less_than_16\@
														
 
															+
														
 
															+        mov     arg4, %r13
														
 
															+        and     $15, %r13                            # r13 = (arg4 mod 16)
														
 
															+
														
 
															+        je      _multiple_of_16_bytes\@
														
 
															+
														
 
															+        # handle the last <16 Byte block seperately
														
 
															+
														
 
															+
														
 
															+        vpaddd   ONE(%rip), %xmm9, %xmm9             # INCR CNT to get Yn
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+        ENCRYPT_SINGLE_BLOCK    %xmm9                # E(K, Yn)
														
 
															+
														
 
															+        sub     $16, %r11
														
 
															+        add     %r13, %r11
														
 
															+        vmovdqu (arg3, %r11), %xmm1                  # receive the last <16 Byte block
														
 
															+
														
 
															+        lea     SHIFT_MASK+16(%rip), %r12
														
 
															+        sub     %r13, %r12                           # adjust the shuffle mask pointer
														
 
															+						     # to be able to shift 16-r13 bytes
														
 
															+						     # (r13 is the number of bytes in plaintext mod 16)
														
 
															+        vmovdqu (%r12), %xmm2                        # get the appropriate shuffle mask
														
 
															+        vpshufb %xmm2, %xmm1, %xmm1                  # shift right 16-r13 bytes
														
 
															+        jmp     _final_ghash_mul\@
														
 
															+
														
 
															+_only_less_than_16\@:
														
 
															+        # check for 0 length
														
 
															+        mov     arg4, %r13
														
 
															+        and     $15, %r13                            # r13 = (arg4 mod 16)
														
 
															+
														
 
															+        je      _multiple_of_16_bytes\@
														
 
															+
														
 
															+        # handle the last <16 Byte block seperately
														
 
															+
														
 
															+
														
 
															+        vpaddd  ONE(%rip), %xmm9, %xmm9              # INCR CNT to get Yn
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+        ENCRYPT_SINGLE_BLOCK    %xmm9                # E(K, Yn)
														
 
															+
														
 
															+
														
 
															+        lea     SHIFT_MASK+16(%rip), %r12
														
 
															+        sub     %r13, %r12                           # adjust the shuffle mask pointer to be
														
 
															+						     # able to shift 16-r13 bytes (r13 is the
														
 
															+						     # number of bytes in plaintext mod 16)
														
 
															+
														
 
															+_get_last_16_byte_loop\@:
														
 
															+        movb    (arg3, %r11),  %al
														
 
															+        movb    %al,  TMP1 (%rsp , %r11)
														
 
															+        add     $1, %r11
														
 
															+        cmp     %r13,  %r11
														
 
															+        jne     _get_last_16_byte_loop\@
														
 
															+
														
 
															+        vmovdqu  TMP1(%rsp), %xmm1
														
 
															+
														
 
															+        sub     $16, %r11
														
 
															+
														
 
															+_final_ghash_mul\@:
														
 
															+        .if  \ENC_DEC ==  DEC
														
 
															+        vmovdqa %xmm1, %xmm2
														
 
															+        vpxor   %xmm1, %xmm9, %xmm9                  # Plaintext XOR E(K, Yn)
														
 
															+        vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1        # get the appropriate mask to mask out top 16-r13 bytes of xmm9
														
 
															+        vpand   %xmm1, %xmm9, %xmm9                  # mask out top 16-r13 bytes of xmm9
														
 
															+        vpand   %xmm1, %xmm2, %xmm2
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm2, %xmm2
														
 
															+        vpxor   %xmm2, %xmm14, %xmm14
														
 
															+	#GHASH computation for the last <16 Byte block
														
 
															+        GHASH_MUL_AVX2       %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
														
 
															+        sub     %r13, %r11
														
 
															+        add     $16, %r11
														
 
															+        .else
														
 
															+        vpxor   %xmm1, %xmm9, %xmm9                  # Plaintext XOR E(K, Yn)
														
 
															+        vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1        # get the appropriate mask to mask out top 16-r13 bytes of xmm9
														
 
															+        vpand   %xmm1, %xmm9, %xmm9                  # mask out top 16-r13 bytes of xmm9
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
														
 
															+        vpxor   %xmm9, %xmm14, %xmm14
														
 
															+	#GHASH computation for the last <16 Byte block
														
 
															+        GHASH_MUL_AVX2       %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
														
 
															+        sub     %r13, %r11
														
 
															+        add     $16, %r11
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9        # shuffle xmm9 back to output as ciphertext
														
 
															+        .endif
														
 
															+
														
 
															+
														
 
															+        #############################
														
 
															+        # output r13 Bytes
														
 
															+        vmovq   %xmm9, %rax
														
 
															+        cmp     $8, %r13
														
 
															+        jle     _less_than_8_bytes_left\@
														
 
															+
														
 
															+        mov     %rax, (arg2 , %r11)
														
 
															+        add     $8, %r11
														
 
															+        vpsrldq $8, %xmm9, %xmm9
														
 
															+        vmovq   %xmm9, %rax
														
 
															+        sub     $8, %r13
														
 
															+
														
 
															+_less_than_8_bytes_left\@:
														
 
															+        movb    %al, (arg2 , %r11)
														
 
															+        add     $1, %r11
														
 
															+        shr     $8, %rax
														
 
															+        sub     $1, %r13
														
 
															+        jne     _less_than_8_bytes_left\@
														
 
															+        #############################
														
 
															+
														
 
															+_multiple_of_16_bytes\@:
														
 
															+        mov     arg7, %r12                           # r12 = aadLen (number of bytes)
														
 
															+        shl     $3, %r12                             # convert into number of bits
														
 
															+        vmovd   %r12d, %xmm15                        # len(A) in xmm15
														
 
															+
														
 
															+        shl     $3, arg4                             # len(C) in bits  (*128)
														
 
															+        vmovq   arg4, %xmm1
														
 
															+        vpslldq $8, %xmm15, %xmm15                   # xmm15 = len(A)|| 0x0000000000000000
														
 
															+        vpxor   %xmm1, %xmm15, %xmm15                # xmm15 = len(A)||len(C)
														
 
															+
														
 
															+        vpxor   %xmm15, %xmm14, %xmm14
														
 
															+        GHASH_MUL_AVX2       %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6    # final GHASH computation
														
 
															+        vpshufb SHUF_MASK(%rip), %xmm14, %xmm14              # perform a 16Byte swap
														
 
															+
														
 
															+        mov     arg5, %rax                           # rax = *Y0
														
 
															+        vmovdqu (%rax), %xmm9                        # xmm9 = Y0
														
 
															+
														
 
															+        ENCRYPT_SINGLE_BLOCK    %xmm9                # E(K, Y0)
														
 
															+
														
 
															+        vpxor   %xmm14, %xmm9, %xmm9
														
 
															+
														
 
															+
														
 
															+
														
 
															+_return_T\@:
														
 
															+        mov     arg8, %r10              # r10 = authTag
														
 
															+        mov     arg9, %r11              # r11 = auth_tag_len
														
 
															+
														
 
															+        cmp     $16, %r11
														
 
															+        je      _T_16\@
														
 
															+
														
 
															+        cmp     $12, %r11
														
 
															+        je      _T_12\@
														
 
															+
														
 
															+_T_8\@:
														
 
															+        vmovq   %xmm9, %rax
														
 
															+        mov     %rax, (%r10)
														
 
															+        jmp     _return_T_done\@
														
 
															+_T_12\@:
														
 
															+        vmovq   %xmm9, %rax
														
 
															+        mov     %rax, (%r10)
														
 
															+        vpsrldq $8, %xmm9, %xmm9
														
 
															+        vmovd   %xmm9, %eax
														
 
															+        mov     %eax, 8(%r10)
														
 
															+        jmp     _return_T_done\@
														
 
															+
														
 
															+_T_16\@:
														
 
															+        vmovdqu %xmm9, (%r10)
														
 
															+
														
 
															+_return_T_done\@:
														
 
															+        mov     %r14, %rsp
														
 
															+
														
 
															+        pop     %r15
														
 
															+        pop     %r14
														
 
															+        pop     %r13
														
 
															+        pop     %r12
														
 
															+.endm
														
 
															+
														
 
															+
														
 
															+#############################################################
														
 
															+#void   aesni_gcm_precomp_avx_gen4
														
 
															+#        (gcm_data     *my_ctx_data,
														
 
															+#        u8     *hash_subkey)# /* H, the Hash sub key input.
														
 
															+#				Data starts on a 16-byte boundary. */
														
 
															+#############################################################
														
 
															+ENTRY(aesni_gcm_precomp_avx_gen4)
														
 
															+        #the number of pushes must equal STACK_OFFSET
														
 
															+        push    %r12
														
 
															+        push    %r13
														
 
															+        push    %r14
														
 
															+        push    %r15
														
 
															+
														
 
															+        mov     %rsp, %r14
														
 
															+
														
 
															+
														
 
															+
														
 
															+        sub     $VARIABLE_OFFSET, %rsp
														
 
															+        and     $~63, %rsp                    # align rsp to 64 bytes
														
 
															+
														
 
															+        vmovdqu  (arg2), %xmm6                # xmm6 = HashKey
														
 
															+
														
 
															+        vpshufb  SHUF_MASK(%rip), %xmm6, %xmm6
														
 
															+        ###############  PRECOMPUTATION of HashKey<<1 mod poly from the HashKey
														
 
															+        vmovdqa  %xmm6, %xmm2
														
 
															+        vpsllq   $1, %xmm6, %xmm6
														
 
															+        vpsrlq   $63, %xmm2, %xmm2
														
 
															+        vmovdqa  %xmm2, %xmm1
														
 
															+        vpslldq  $8, %xmm2, %xmm2
														
 
															+        vpsrldq  $8, %xmm1, %xmm1
														
 
															+        vpor     %xmm2, %xmm6, %xmm6
														
 
															+        #reduction
														
 
															+        vpshufd  $0b00100100, %xmm1, %xmm2
														
 
															+        vpcmpeqd TWOONE(%rip), %xmm2, %xmm2
														
 
															+        vpand    POLY(%rip), %xmm2, %xmm2
														
 
															+        vpxor    %xmm2, %xmm6, %xmm6          # xmm6 holds the HashKey<<1 mod poly
														
 
															+        #######################################################################
														
 
															+        vmovdqa  %xmm6, HashKey(arg1)         # store HashKey<<1 mod poly
														
 
															+
														
 
															+
														
 
															+        PRECOMPUTE_AVX2  %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
														
 
															+
														
 
															+        mov     %r14, %rsp
														
 
															+
														
 
															+        pop     %r15
														
 
															+        pop     %r14
														
 
															+        pop     %r13
														
 
															+        pop     %r12
														
 
															+        ret
														
 
															+ENDPROC(aesni_gcm_precomp_avx_gen4)
														
 
															+
														
 
															+
														
 
															+###############################################################################
														
 
															+#void   aesni_gcm_enc_avx_gen4(
														
 
															+#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
														
 
															+#        u8      *out, /* Ciphertext output. Encrypt in-place is allowed.  */
														
 
															+#        const   u8 *in, /* Plaintext input */
														
 
															+#        u64     plaintext_len, /* Length of data in Bytes for encryption. */
														
 
															+#        u8      *iv, /* Pre-counter block j0: 4 byte salt
														
 
															+#			(from Security Association) concatenated with 8 byte
														
 
															+#			 Initialisation Vector (from IPSec ESP Payload)
														
 
															+#			 concatenated with 0x00000001. 16-byte aligned pointer. */
														
 
															+#        const   u8 *aad, /* Additional Authentication Data (AAD)*/
														
 
															+#        u64     aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
														
 
															+#        u8      *auth_tag, /* Authenticated Tag output. */
														
 
															+#        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
														
 
															+#				Valid values are 16 (most likely), 12 or 8. */
														
 
															+###############################################################################
														
 
															+ENTRY(aesni_gcm_enc_avx_gen4)
														
 
															+        GCM_ENC_DEC_AVX2     ENC
														
 
															+	ret
														
 
															+ENDPROC(aesni_gcm_enc_avx_gen4)
														
 
															+
														
 
															+###############################################################################
														
 
															+#void   aesni_gcm_dec_avx_gen4(
														
 
															+#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
														
 
															+#        u8      *out, /* Plaintext output. Decrypt in-place is allowed.  */
														
 
															+#        const   u8 *in, /* Ciphertext input */
														
 
															+#        u64     plaintext_len, /* Length of data in Bytes for encryption. */
														
 
															+#        u8      *iv, /* Pre-counter block j0: 4 byte salt
														
 
															+#			(from Security Association) concatenated with 8 byte
														
 
															+#			Initialisation Vector (from IPSec ESP Payload)
														
 
															+#			concatenated with 0x00000001. 16-byte aligned pointer. */
														
 
															+#        const   u8 *aad, /* Additional Authentication Data (AAD)*/
														
 
															+#        u64     aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
														
 
															+#        u8      *auth_tag, /* Authenticated Tag output. */
														
 
															+#        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
														
 
															+#				Valid values are 16 (most likely), 12 or 8. */
														
 
															+###############################################################################
														
 
															+ENTRY(aesni_gcm_dec_avx_gen4)
														
 
															+        GCM_ENC_DEC_AVX2     DEC
														
 
															+	ret
														
 
															+ENDPROC(aesni_gcm_dec_avx_gen4)
														
 
															+
														
 
															+#endif /* CONFIG_AS_AVX2 */
														
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -101,6 +101,9 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
 
															 int crypto_fpu_init(void);
														
 
															 void crypto_fpu_exit(void);
														
 
															+#define AVX_GEN2_OPTSIZE 640
														
 
															+#define AVX_GEN4_OPTSIZE 4096
														
 
															+
														
 
															 #ifdef CONFIG_X86_64
														
 
															 asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
														
 
															 			      const u8 *in, unsigned int len, u8 *iv);
														
@@ -150,6 +153,123 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
 
															 			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
														
 
															 			u8 *auth_tag, unsigned long auth_tag_len);
														
 
															+
														
 
															+#ifdef CONFIG_AS_AVX
														
 
															+/*
														
 
															+ * asmlinkage void aesni_gcm_precomp_avx_gen2()
														
 
															+ * gcm_data *my_ctx_data, context data
														
 
															+ * u8 *hash_subkey,  the Hash sub key input. Data starts on a 16-byte boundary.
														
 
															+ */
														
 
															+asmlinkage void aesni_gcm_precomp_avx_gen2(void *my_ctx_data, u8 *hash_subkey);
														
 
															+
														
 
															+asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx, u8 *out,
														
 
															+			const u8 *in, unsigned long plaintext_len, u8 *iv,
														
 
															+			const u8 *aad, unsigned long aad_len,
														
 
															+			u8 *auth_tag, unsigned long auth_tag_len);
														
 
															+
														
 
															+asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out,
														
 
															+			const u8 *in, unsigned long ciphertext_len, u8 *iv,
														
 
															+			const u8 *aad, unsigned long aad_len,
														
 
															+			u8 *auth_tag, unsigned long auth_tag_len);
														
 
															+
														
 
															+static void aesni_gcm_enc_avx(void *ctx, u8 *out,
														
 
															+			const u8 *in, unsigned long plaintext_len, u8 *iv,
														
 
															+			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
														
 
															+			u8 *auth_tag, unsigned long auth_tag_len)
														
 
															+{
														
 
															+	if (plaintext_len < AVX_GEN2_OPTSIZE) {
														
 
															+		aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
														
 
															+				aad_len, auth_tag, auth_tag_len);
														
 
															+	} else {
														
 
															+		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
														
 
															+		aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
														
 
															+					aad_len, auth_tag, auth_tag_len);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void aesni_gcm_dec_avx(void *ctx, u8 *out,
														
 
															+			const u8 *in, unsigned long ciphertext_len, u8 *iv,
														
 
															+			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
														
 
															+			u8 *auth_tag, unsigned long auth_tag_len)
														
 
															+{
														
 
															+	if (ciphertext_len < AVX_GEN2_OPTSIZE) {
														
 
															+		aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad,
														
 
															+				aad_len, auth_tag, auth_tag_len);
														
 
															+	} else {
														
 
															+		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
														
 
															+		aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
														
 
															+					aad_len, auth_tag, auth_tag_len);
														
 
															+	}
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+#ifdef CONFIG_AS_AVX2
														
 
															+/*
														
 
															+ * asmlinkage void aesni_gcm_precomp_avx_gen4()
														
 
															+ * gcm_data *my_ctx_data, context data
														
 
															+ * u8 *hash_subkey,  the Hash sub key input. Data starts on a 16-byte boundary.
														
 
															+ */
														
 
															+asmlinkage void aesni_gcm_precomp_avx_gen4(void *my_ctx_data, u8 *hash_subkey);
														
 
															+
														
 
															+asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx, u8 *out,
														
 
															+			const u8 *in, unsigned long plaintext_len, u8 *iv,
														
 
															+			const u8 *aad, unsigned long aad_len,
														
 
															+			u8 *auth_tag, unsigned long auth_tag_len);
														
 
															+
														
 
															+asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out,
														
 
															+			const u8 *in, unsigned long ciphertext_len, u8 *iv,
														
 
															+			const u8 *aad, unsigned long aad_len,
														
 
															+			u8 *auth_tag, unsigned long auth_tag_len);
														
 
															+
														
 
															+static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
														
 
															+			const u8 *in, unsigned long plaintext_len, u8 *iv,
														
 
															+			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
														
 
															+			u8 *auth_tag, unsigned long auth_tag_len)
														
 
															+{
														
 
															+	if (plaintext_len < AVX_GEN2_OPTSIZE) {
														
 
															+		aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
														
 
															+				aad_len, auth_tag, auth_tag_len);
														
 
															+	} else if (plaintext_len < AVX_GEN4_OPTSIZE) {
														
 
															+		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
														
 
															+		aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
														
 
															+					aad_len, auth_tag, auth_tag_len);
														
 
															+	} else {
														
 
															+		aesni_gcm_precomp_avx_gen4(ctx, hash_subkey);
														
 
															+		aesni_gcm_enc_avx_gen4(ctx, out, in, plaintext_len, iv, aad,
														
 
															+					aad_len, auth_tag, auth_tag_len);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
														
 
															+			const u8 *in, unsigned long ciphertext_len, u8 *iv,
														
 
															+			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
														
 
															+			u8 *auth_tag, unsigned long auth_tag_len)
														
 
															+{
														
 
															+	if (ciphertext_len < AVX_GEN2_OPTSIZE) {
														
 
															+		aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey,
														
 
															+				aad, aad_len, auth_tag, auth_tag_len);
														
 
															+	} else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
														
 
															+		aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
														
 
															+		aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
														
 
															+					aad_len, auth_tag, auth_tag_len);
														
 
															+	} else {
														
 
															+		aesni_gcm_precomp_avx_gen4(ctx, hash_subkey);
														
 
															+		aesni_gcm_dec_avx_gen4(ctx, out, in, ciphertext_len, iv, aad,
														
 
															+					aad_len, auth_tag, auth_tag_len);
														
 
															+	}
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out,
														
 
															+			const u8 *in, unsigned long plaintext_len, u8 *iv,
														
 
															+			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
														
 
															+			u8 *auth_tag, unsigned long auth_tag_len);
														
 
															+
														
 
															+static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out,
														
 
															+			const u8 *in, unsigned long ciphertext_len, u8 *iv,
														
 
															+			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
														
 
															+			u8 *auth_tag, unsigned long auth_tag_len);
														
 
															+
														
 
															 static inline struct
														
 
															 aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
														
 
															 {
														
@@ -915,7 +1035,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
 
															 		dst = src;
														
 
															 	}
														
 
															-	aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv,
														
 
															+	aesni_gcm_enc_tfm(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv,
														
 
															 		ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst
														
 
															 		+ ((unsigned long)req->cryptlen), auth_tag_len);
														
@@ -996,12 +1116,12 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
 
															 		dst = src;
														
 
															 	}
														
 
															-	aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv,
														
 
															+	aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
														
 
															 		ctx->hash_subkey, assoc, (unsigned long)req->assoclen,
														
 
															 		authTag, auth_tag_len);
														
 
															 	/* Compare generated tag with passed in tag. */
														
 
															-	retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ?
														
 
															+	retval = crypto_memneq(src + tempCipherLen, authTag, auth_tag_len) ?
														
 
															 		-EBADMSG : 0;
														
 
															 	if (one_entry_in_sg) {
														
@@ -1353,6 +1473,27 @@ static int __init aesni_init(void)
 
															 	if (!x86_match_cpu(aesni_cpu_id))
														
 
															 		return -ENODEV;
														
 
															+#ifdef CONFIG_X86_64
														
 
															+#ifdef CONFIG_AS_AVX2
														
 
															+	if (boot_cpu_has(X86_FEATURE_AVX2)) {
														
 
															+		pr_info("AVX2 version of gcm_enc/dec engaged.\n");
														
 
															+		aesni_gcm_enc_tfm = aesni_gcm_enc_avx2;
														
 
															+		aesni_gcm_dec_tfm = aesni_gcm_dec_avx2;
														
 
															+	} else
														
 
															+#endif
														
 
															+#ifdef CONFIG_AS_AVX
														
 
															+	if (boot_cpu_has(X86_FEATURE_AVX)) {
														
 
															+		pr_info("AVX version of gcm_enc/dec engaged.\n");
														
 
															+		aesni_gcm_enc_tfm = aesni_gcm_enc_avx;
														
 
															+		aesni_gcm_dec_tfm = aesni_gcm_dec_avx;
														
 
															+	} else
														
 
															+#endif
														
 
															+	{
														
 
															+		pr_info("SSE version of gcm_enc/dec engaged.\n");
														
 
															+		aesni_gcm_enc_tfm = aesni_gcm_enc;
														
 
															+		aesni_gcm_dec_tfm = aesni_gcm_dec;
														
 
															+	}
														
 
															+#endif
														
 
															 	err = crypto_fpu_init();
														
 
															 	if (err)
														
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -2,11 +2,6 @@
 
															 # Cryptographic API
														
 
															 #
														
 
															-# memneq MUST be built with -Os or -O0 to prevent early-return optimizations
														
 
															-# that will defeat memneq's actual purpose to prevent timing attacks.
														
 
															-CFLAGS_REMOVE_memneq.o := -O1 -O2 -O3
														
 
															-CFLAGS_memneq.o := -Os
														
 
															-
														
 
															 obj-$(CONFIG_CRYPTO) += crypto.o
														
 
															 crypto-y := api.o cipher.o compress.o memneq.o
														
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -213,7 +213,10 @@ static void ahash_op_unaligned_done(struct crypto_async_request *req, int err)
 
															 	ahash_op_unaligned_finish(areq, err);
														
 
															-	complete(data, err);
														
 
															+	areq->base.complete = complete;
														
 
															+	areq->base.data = data;
														
 
															+
														
 
															+	complete(&areq->base, err);
														
 
															 }
														
 
															 static int ahash_op_unaligned(struct ahash_request *req,
														
--- a/crypto/memneq.c
+++ b/crypto/memneq.c
@@ -72,6 +72,7 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size)
 
															 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
														
 
															 	while (size >= sizeof(unsigned long)) {
														
 
															 		neq |= *(unsigned long *)a ^ *(unsigned long *)b;
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															 		a += sizeof(unsigned long);
														
 
															 		b += sizeof(unsigned long);
														
 
															 		size -= sizeof(unsigned long);
														
@@ -79,6 +80,7 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size)
 
															 #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
														
 
															 	while (size > 0) {
														
 
															 		neq |= *(unsigned char *)a ^ *(unsigned char *)b;
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															 		a += 1;
														
 
															 		b += 1;
														
 
															 		size -= 1;
														
@@ -89,33 +91,61 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size)
 
															 /* Loop-free fast-path for frequently used 16-byte size */
														
 
															 static inline unsigned long __crypto_memneq_16(const void *a, const void *b)
														
 
															 {
														
 
															+	unsigned long neq = 0;
														
 
															+
														
 
															 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
														
 
															-	if (sizeof(unsigned long) == 8)
														
 
															-		return ((*(unsigned long *)(a)   ^ *(unsigned long *)(b))
														
 
															-		      | (*(unsigned long *)(a+8) ^ *(unsigned long *)(b+8)));
														
 
															-	else if (sizeof(unsigned int) == 4)
														
 
															-		return ((*(unsigned int *)(a)    ^ *(unsigned int *)(b))
														
 
															-                      | (*(unsigned int *)(a+4)  ^ *(unsigned int *)(b+4))
														
 
															-		      | (*(unsigned int *)(a+8)  ^ *(unsigned int *)(b+8))
														
 
															-	              | (*(unsigned int *)(a+12) ^ *(unsigned int *)(b+12)));
														
 
															-	else
														
 
															+	if (sizeof(unsigned long) == 8) {
														
 
															+		neq |= *(unsigned long *)(a)   ^ *(unsigned long *)(b);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+	} else if (sizeof(unsigned int) == 4) {
														
 
															+		neq |= *(unsigned int *)(a)    ^ *(unsigned int *)(b);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned int *)(a+4)  ^ *(unsigned int *)(b+4);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned int *)(a+8)  ^ *(unsigned int *)(b+8);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+	} else
														
 
															 #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
														
 
															-		return ((*(unsigned char *)(a)    ^ *(unsigned char *)(b))
														
 
															-		      | (*(unsigned char *)(a+1)  ^ *(unsigned char *)(b+1))
														
 
															-		      | (*(unsigned char *)(a+2)  ^ *(unsigned char *)(b+2))
														
 
															-		      | (*(unsigned char *)(a+3)  ^ *(unsigned char *)(b+3))
														
 
															-		      | (*(unsigned char *)(a+4)  ^ *(unsigned char *)(b+4))
														
 
															-		      | (*(unsigned char *)(a+5)  ^ *(unsigned char *)(b+5))
														
 
															-		      | (*(unsigned char *)(a+6)  ^ *(unsigned char *)(b+6))
														
 
															-		      | (*(unsigned char *)(a+7)  ^ *(unsigned char *)(b+7))
														
 
															-		      | (*(unsigned char *)(a+8)  ^ *(unsigned char *)(b+8))
														
 
															-		      | (*(unsigned char *)(a+9)  ^ *(unsigned char *)(b+9))
														
 
															-		      | (*(unsigned char *)(a+10) ^ *(unsigned char *)(b+10))
														
 
															-		      | (*(unsigned char *)(a+11) ^ *(unsigned char *)(b+11))
														
 
															-		      | (*(unsigned char *)(a+12) ^ *(unsigned char *)(b+12))
														
 
															-		      | (*(unsigned char *)(a+13) ^ *(unsigned char *)(b+13))
														
 
															-		      | (*(unsigned char *)(a+14) ^ *(unsigned char *)(b+14))
														
 
															-		      | (*(unsigned char *)(a+15) ^ *(unsigned char *)(b+15)));
														
 
															+	{
														
 
															+		neq |= *(unsigned char *)(a)    ^ *(unsigned char *)(b);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+1)  ^ *(unsigned char *)(b+1);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+2)  ^ *(unsigned char *)(b+2);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+3)  ^ *(unsigned char *)(b+3);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+4)  ^ *(unsigned char *)(b+4);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+5)  ^ *(unsigned char *)(b+5);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+6)  ^ *(unsigned char *)(b+6);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+7)  ^ *(unsigned char *)(b+7);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+8)  ^ *(unsigned char *)(b+8);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+9)  ^ *(unsigned char *)(b+9);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+		neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15);
														
 
															+		OPTIMIZER_HIDE_VAR(neq);
														
 
															+	}
														
 
															+
														
 
															+	return neq;
														
 
															 }
														
 
															 /* Compare two areas of memory without leaking timing information,
														
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -78,7 +78,7 @@ static int pcrypt_do_parallel(struct padata_priv *padata, unsigned int *cb_cpu,
 
															 	cpu = *cb_cpu;
														
 
															 	rcu_read_lock_bh();
														
 
															-	cpumask = rcu_dereference(pcrypt->cb_cpumask);
														
 
															+	cpumask = rcu_dereference_bh(pcrypt->cb_cpumask);
														
 
															 	if (cpumask_test_cpu(cpu, cpumask->mask))
														
 
															 			goto out;
														
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -137,7 +137,272 @@ static int test_cipher_cycles(struct blkcipher_desc *desc, int enc,
 
															 	return ret;
														
 
															 }
														
 
															+static int test_aead_jiffies(struct aead_request *req, int enc,
														
 
															+				int blen, int sec)
														
 
															+{
														
 
															+	unsigned long start, end;
														
 
															+	int bcount;
														
 
															+	int ret;
														
 
															+
														
 
															+	for (start = jiffies, end = start + sec * HZ, bcount = 0;
														
 
															+	     time_before(jiffies, end); bcount++) {
														
 
															+		if (enc)
														
 
															+			ret = crypto_aead_encrypt(req);
														
 
															+		else
														
 
															+			ret = crypto_aead_decrypt(req);
														
 
															+
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+	}
														
 
															+
														
 
															+	printk("%d operations in %d seconds (%ld bytes)\n",
														
 
															+	       bcount, sec, (long)bcount * blen);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int test_aead_cycles(struct aead_request *req, int enc, int blen)
														
 
															+{
														
 
															+	unsigned long cycles = 0;
														
 
															+	int ret = 0;
														
 
															+	int i;
														
 
															+
														
 
															+	local_irq_disable();
														
 
															+
														
 
															+	/* Warm-up run. */
														
 
															+	for (i = 0; i < 4; i++) {
														
 
															+		if (enc)
														
 
															+			ret = crypto_aead_encrypt(req);
														
 
															+		else
														
 
															+			ret = crypto_aead_decrypt(req);
														
 
															+
														
 
															+		if (ret)
														
 
															+			goto out;
														
 
															+	}
														
 
															+
														
 
															+	/* The real thing. */
														
 
															+	for (i = 0; i < 8; i++) {
														
 
															+		cycles_t start, end;
														
 
															+
														
 
															+		start = get_cycles();
														
 
															+		if (enc)
														
 
															+			ret = crypto_aead_encrypt(req);
														
 
															+		else
														
 
															+			ret = crypto_aead_decrypt(req);
														
 
															+		end = get_cycles();
														
 
															+
														
 
															+		if (ret)
														
 
															+			goto out;
														
 
															+
														
 
															+		cycles += end - start;
														
 
															+	}
														
 
															+
														
 
															+out:
														
 
															+	local_irq_enable();
														
 
															+
														
 
															+	if (ret == 0)
														
 
															+		printk("1 operation in %lu cycles (%d bytes)\n",
														
 
															+		       (cycles + 4) / 8, blen);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															 static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 };
														
 
															+static u32 aead_sizes[] = { 16, 64, 256, 512, 1024, 2048, 4096, 8192, 0 };
														
 
															+
														
 
															+#define XBUFSIZE 8
														
 
															+#define MAX_IVLEN 32
														
 
															+
														
 
															+static int testmgr_alloc_buf(char *buf[XBUFSIZE])
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	for (i = 0; i < XBUFSIZE; i++) {
														
 
															+		buf[i] = (void *)__get_free_page(GFP_KERNEL);
														
 
															+		if (!buf[i])
														
 
															+			goto err_free_buf;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+err_free_buf:
														
 
															+	while (i-- > 0)
														
 
															+		free_page((unsigned long)buf[i]);
														
 
															+
														
 
															+	return -ENOMEM;
														
 
															+}
														
 
															+
														
 
															+static void testmgr_free_buf(char *buf[XBUFSIZE])
														
 
															+{
														
 
															+	int i;
														
 
															+
														
 
															+	for (i = 0; i < XBUFSIZE; i++)
														
 
															+		free_page((unsigned long)buf[i]);
														
 
															+}
														
 
															+
														
 
															+static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE],
														
 
															+			unsigned int buflen)
														
 
															+{
														
 
															+	int np = (buflen + PAGE_SIZE - 1)/PAGE_SIZE;
														
 
															+	int k, rem;
														
 
															+
														
 
															+	np = (np > XBUFSIZE) ? XBUFSIZE : np;
														
 
															+	rem = buflen % PAGE_SIZE;
														
 
															+	if (np > XBUFSIZE) {
														
 
															+		rem = PAGE_SIZE;
														
 
															+		np = XBUFSIZE;
														
 
															+	}
														
 
															+	sg_init_table(sg, np);
														
 
															+	for (k = 0; k < np; ++k) {
														
 
															+		if (k == (np-1))
														
 
															+			sg_set_buf(&sg[k], xbuf[k], rem);
														
 
															+		else
														
 
															+			sg_set_buf(&sg[k], xbuf[k], PAGE_SIZE);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void test_aead_speed(const char *algo, int enc, unsigned int sec,
														
 
															+			    struct aead_speed_template *template,
														
 
															+			    unsigned int tcount, u8 authsize,
														
 
															+			    unsigned int aad_size, u8 *keysize)
														
 
															+{
														
 
															+	unsigned int i, j;
														
 
															+	struct crypto_aead *tfm;
														
 
															+	int ret = -ENOMEM;
														
 
															+	const char *key;
														
 
															+	struct aead_request *req;
														
 
															+	struct scatterlist *sg;
														
 
															+	struct scatterlist *asg;
														
 
															+	struct scatterlist *sgout;
														
 
															+	const char *e;
														
 
															+	void *assoc;
														
 
															+	char iv[MAX_IVLEN];
														
 
															+	char *xbuf[XBUFSIZE];
														
 
															+	char *xoutbuf[XBUFSIZE];
														
 
															+	char *axbuf[XBUFSIZE];
														
 
															+	unsigned int *b_size;
														
 
															+	unsigned int iv_len;
														
 
															+
														
 
															+	if (enc == ENCRYPT)
														
 
															+		e = "encryption";
														
 
															+	else
														
 
															+		e = "decryption";
														
 
															+
														
 
															+	if (testmgr_alloc_buf(xbuf))
														
 
															+		goto out_noxbuf;
														
 
															+	if (testmgr_alloc_buf(axbuf))
														
 
															+		goto out_noaxbuf;
														
 
															+	if (testmgr_alloc_buf(xoutbuf))
														
 
															+		goto out_nooutbuf;
														
 
															+
														
 
															+	sg = kmalloc(sizeof(*sg) * 8 * 3, GFP_KERNEL);
														
 
															+	if (!sg)
														
 
															+		goto out_nosg;
														
 
															+	asg = &sg[8];
														
 
															+	sgout = &asg[8];
														
 
															+
														
 
															+
														
 
															+	printk(KERN_INFO "\ntesting speed of %s %s\n", algo, e);
														
 
															+
														
 
															+	tfm = crypto_alloc_aead(algo, 0, 0);
														
 
															+
														
 
															+	if (IS_ERR(tfm)) {
														
 
															+		pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo,
														
 
															+		       PTR_ERR(tfm));
														
 
															+		return;
														
 
															+	}
														
 
															+
														
 
															+	req = aead_request_alloc(tfm, GFP_KERNEL);
														
 
															+	if (!req) {
														
 
															+		pr_err("alg: aead: Failed to allocate request for %s\n",
														
 
															+		       algo);
														
 
															+		goto out;
														
 
															+	}
														
 
															+
														
 
															+	i = 0;
														
 
															+	do {
														
 
															+		b_size = aead_sizes;
														
 
															+		do {
														
 
															+			assoc = axbuf[0];
														
 
															+
														
 
															+			if (aad_size < PAGE_SIZE)
														
 
															+				memset(assoc, 0xff, aad_size);
														
 
															+			else {
														
 
															+				pr_err("associate data length (%u) too big\n",
														
 
															+					aad_size);
														
 
															+				goto out_nosg;
														
 
															+			}
														
 
															+			sg_init_one(&asg[0], assoc, aad_size);
														
 
															+
														
 
															+			if ((*keysize + *b_size) > TVMEMSIZE * PAGE_SIZE) {
														
 
															+				pr_err("template (%u) too big for tvmem (%lu)\n",
														
 
															+				       *keysize + *b_size,
														
 
															+					TVMEMSIZE * PAGE_SIZE);
														
 
															+				goto out;
														
 
															+			}
														
 
															+
														
 
															+			key = tvmem[0];
														
 
															+			for (j = 0; j < tcount; j++) {
														
 
															+				if (template[j].klen == *keysize) {
														
 
															+					key = template[j].key;
														
 
															+					break;
														
 
															+				}
														
 
															+			}
														
 
															+			ret = crypto_aead_setkey(tfm, key, *keysize);
														
 
															+			ret = crypto_aead_setauthsize(tfm, authsize);
														
 
															+
														
 
															+			iv_len = crypto_aead_ivsize(tfm);
														
 
															+			if (iv_len)
														
 
															+				memset(&iv, 0xff, iv_len);
														
 
															+
														
 
															+			crypto_aead_clear_flags(tfm, ~0);
														
 
															+			printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ",
														
 
															+					i, *keysize * 8, *b_size);
														
 
															+
														
 
															+
														
 
															+			memset(tvmem[0], 0xff, PAGE_SIZE);
														
 
															+
														
 
															+			if (ret) {
														
 
															+				pr_err("setkey() failed flags=%x\n",
														
 
															+						crypto_aead_get_flags(tfm));
														
 
															+				goto out;
														
 
															+			}
														
 
															+
														
 
															+			sg_init_aead(&sg[0], xbuf,
														
 
															+				    *b_size + (enc ? authsize : 0));
														
 
															+
														
 
															+			sg_init_aead(&sgout[0], xoutbuf,
														
 
															+				    *b_size + (enc ? authsize : 0));
														
 
															+
														
 
															+			aead_request_set_crypt(req, sg, sgout, *b_size, iv);
														
 
															+			aead_request_set_assoc(req, asg, aad_size);
														
 
															+
														
 
															+			if (sec)
														
 
															+				ret = test_aead_jiffies(req, enc, *b_size, sec);
														
 
															+			else
														
 
															+				ret = test_aead_cycles(req, enc, *b_size);
														
 
															+
														
 
															+			if (ret) {
														
 
															+				pr_err("%s() failed return code=%d\n", e, ret);
														
 
															+				break;
														
 
															+			}
														
 
															+			b_size++;
														
 
															+			i++;
														
 
															+		} while (*b_size);
														
 
															+		keysize++;
														
 
															+	} while (*keysize);
														
 
															+
														
 
															+out:
														
 
															+	crypto_free_aead(tfm);
														
 
															+	kfree(sg);
														
 
															+out_nosg:
														
 
															+	testmgr_free_buf(xoutbuf);
														
 
															+out_nooutbuf:
														
 
															+	testmgr_free_buf(axbuf);
														
 
															+out_noaxbuf:
														
 
															+	testmgr_free_buf(xbuf);
														
 
															+out_noxbuf:
														
 
															+	return;
														
 
															+}
														
 
															 static void test_cipher_speed(const char *algo, int enc, unsigned int sec,
														
 
															 			      struct cipher_speed_template *template,
														
@@ -1427,6 +1692,11 @@ static int do_test(int m)
 
															 				  speed_template_32_64);
														
 
															 		break;
														
 
															+	case 211:
														
 
															+		test_aead_speed("rfc4106(gcm(aes))", ENCRYPT, sec,
														
 
															+				NULL, 0, 16, 8, aead_speed_template_20);
														
 
															+		break;
														
 
															+
														
 
															 	case 300:
														
 
															 		/* fall through */
														
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
@@ -22,6 +22,11 @@ struct cipher_speed_template {
 
															 	unsigned int klen;
														
 
															 };
														
 
															+struct aead_speed_template {
														
 
															+	const char *key;
														
 
															+	unsigned int klen;
														
 
															+};
														
 
															+
														
 
															 struct hash_speed {
														
 
															 	unsigned int blen;	/* buffer length */
														
 
															 	unsigned int plen;	/* per-update length */
														
@@ -57,6 +62,11 @@ static u8 speed_template_32_48[] = {32, 48, 0};
 
															 static u8 speed_template_32_48_64[] = {32, 48, 64, 0};
														
 
															 static u8 speed_template_32_64[] = {32, 64, 0};
														
 
															+/*
														
 
															+ * AEAD speed tests
														
 
															+ */
														
 
															+static u8 aead_speed_template_20[] = {20, 0};
														
 
															+
														
 
															 /*
														
 
															  * Digest speed tests
														
 
															  */
														
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -289,16 +289,6 @@ config CRYPTO_DEV_SAHARA
 
															 	  This option enables support for the SAHARA HW crypto accelerator
														
 
															 	  found in some Freescale i.MX chips.
														
 
															-config CRYPTO_DEV_DCP
														
 
															-	tristate "Support for the DCP engine"
														
 
															-	depends on ARCH_MXS && OF
														
 
															-	select CRYPTO_BLKCIPHER
														
 
															-	select CRYPTO_AES
														
 
															-	select CRYPTO_CBC
														
 
															-	help
														
 
															-	  This options enables support for the hardware crypto-acceleration
														
 
															-	  capabilities of the DCP co-processor
														
 
															-
														
 
															 config CRYPTO_DEV_S5P
														
 
															 	tristate "Support for Samsung S5PV210 crypto accelerator"
														
 
															 	depends on ARCH_S5PV210
														
@@ -399,4 +389,33 @@ config CRYPTO_DEV_ATMEL_SHA
 
															 	  To compile this driver as a module, choose M here: the module
														
 
															 	  will be called atmel-sha.
														
 
															+config CRYPTO_DEV_CCP
														
 
															+	bool "Support for AMD Cryptographic Coprocessor"
														
 
															+	depends on X86 && PCI
														
 
															+	default n
														
 
															+	help
														
 
															+	  The AMD Cryptographic Coprocessor provides hardware support
														
 
															+	  for encryption, hashing and related operations.
														
 
															+
														
 
															+if CRYPTO_DEV_CCP
														
 
															+	source "drivers/crypto/ccp/Kconfig"
														
 
															+endif
														
 
															+
														
 
															+config CRYPTO_DEV_MXS_DCP
														
 
															+	tristate "Support for Freescale MXS DCP"
														
 
															+	depends on ARCH_MXS
														
 
															+	select CRYPTO_SHA1
														
 
															+	select CRYPTO_SHA256
														
 
															+	select CRYPTO_CBC
														
 
															+	select CRYPTO_ECB
														
 
															+	select CRYPTO_AES
														
 
															+	select CRYPTO_BLKCIPHER
														
 
															+	select CRYPTO_ALGAPI
														
 
															+	help
														
 
															+	  The Freescale i.MX23/i.MX28 has SHA1/SHA256 and AES128 CBC/ECB
														
 
															+	  co-processor on the die.
														
 
															+
														
 
															+	  To compile this driver as a module, choose M here: the module
														
 
															+	  will be called mxs-dcp.
														
 
															+
														
 
															 endif # CRYPTO_HW
														
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -1,24 +1,25 @@
 
															-obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
														
 
															-obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
														
 
															+obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
														
 
															 obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
														
 
															-obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
														
 
															-n2_crypto-y := n2_core.o n2_asm.o
														
 
															 obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
														
 
															-obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
														
 
															-obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
														
 
															-obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
														
 
															 obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
														
 
															-obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/
														
 
															-obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o
														
 
															+n2_crypto-y := n2_core.o n2_asm.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_NX) += nx/
														
 
															 obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
														
 
															 obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o
														
 
															-obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
														
 
															-obj-$(CONFIG_CRYPTO_DEV_DCP) += dcp.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/
														
 
															 obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
														
 
															+obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
														
 
															 obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o
														
 
															 obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
														
 
															-obj-$(CONFIG_CRYPTO_DEV_BFIN_CRC) += bfin_crc.o
														
 
															-obj-$(CONFIG_CRYPTO_DEV_NX) += nx/
														
 
															-obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
														
 
															-obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
														
 
															-obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
														
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -724,7 +724,6 @@ static void crypto4xx_stop_all(struct crypto4xx_core_device *core_dev)
 
															 	crypto4xx_destroy_pdr(core_dev->dev);
														
 
															 	crypto4xx_destroy_gdr(core_dev->dev);
														
 
															 	crypto4xx_destroy_sdr(core_dev->dev);
														
 
															-	dev_set_drvdata(core_dev->device, NULL);
														
 
															 	iounmap(core_dev->dev->ce_base);
														
 
															 	kfree(core_dev->dev);
														
 
															 	kfree(core_dev);
														
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -467,24 +467,10 @@ static int aead_setkey(struct crypto_aead *aead,
 
															 	static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 };
														
 
															 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
														
 
															 	struct device *jrdev = ctx->jrdev;
														
 
															-	struct rtattr *rta = (void *)key;
														
 
															-	struct crypto_authenc_key_param *param;
														
 
															-	unsigned int authkeylen;
														
 
															-	unsigned int enckeylen;
														
 
															+	struct crypto_authenc_keys keys;
														
 
															 	int ret = 0;
														
 
															-	param = RTA_DATA(rta);
														
 
															-	enckeylen = be32_to_cpu(param->enckeylen);
														
 
															-
														
 
															-	key += RTA_ALIGN(rta->rta_len);
														
 
															-	keylen -= RTA_ALIGN(rta->rta_len);
														
 
															-
														
 
															-	if (keylen < enckeylen)
														
 
															-		goto badkey;
														
 
															-
														
 
															-	authkeylen = keylen - enckeylen;
														
 
															-
														
 
															-	if (keylen > CAAM_MAX_KEY_SIZE)
														
 
															+	if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
														
 
															 		goto badkey;
														
 
															 	/* Pick class 2 key length from algorithm submask */
														
@@ -492,25 +478,29 @@ static int aead_setkey(struct crypto_aead *aead,
 
															 				      OP_ALG_ALGSEL_SHIFT] * 2;
														
 
															 	ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16);
														
 
															+	if (ctx->split_key_pad_len + keys.enckeylen > CAAM_MAX_KEY_SIZE)
														
 
															+		goto badkey;
														
 
															+
														
 
															 #ifdef DEBUG
														
 
															 	printk(KERN_ERR "keylen %d enckeylen %d authkeylen %d\n",
														
 
															-	       keylen, enckeylen, authkeylen);
														
 
															+	       keys.authkeylen + keys.enckeylen, keys.enckeylen,
														
 
															+	       keys.authkeylen);
														
 
															 	printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n",
														
 
															 	       ctx->split_key_len, ctx->split_key_pad_len);
														
 
															 	print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
														
 
															 		       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
														
 
															 #endif
														
 
															-	ret = gen_split_aead_key(ctx, key, authkeylen);
														
 
															+	ret = gen_split_aead_key(ctx, keys.authkey, keys.authkeylen);
														
 
															 	if (ret) {
														
 
															 		goto badkey;
														
 
															 	}
														
 
															 	/* postpend encryption key to auth split key */
														
 
															-	memcpy(ctx->key + ctx->split_key_pad_len, key + authkeylen, enckeylen);
														
 
															+	memcpy(ctx->key + ctx->split_key_pad_len, keys.enckey, keys.enckeylen);
														
 
															 	ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len +
														
 
															-				       enckeylen, DMA_TO_DEVICE);
														
 
															+				      keys.enckeylen, DMA_TO_DEVICE);
														
 
															 	if (dma_mapping_error(jrdev, ctx->key_dma)) {
														
 
															 		dev_err(jrdev, "unable to map key i/o memory\n");
														
 
															 		return -ENOMEM;
														
@@ -518,15 +508,15 @@ static int aead_setkey(struct crypto_aead *aead,
 
															 #ifdef DEBUG
														
 
															 	print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
														
 
															 		       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
														
 
															-		       ctx->split_key_pad_len + enckeylen, 1);
														
 
															+		       ctx->split_key_pad_len + keys.enckeylen, 1);
														
 
															 #endif
														
 
															-	ctx->enckeylen = enckeylen;
														
 
															+	ctx->enckeylen = keys.enckeylen;
														
 
															 	ret = aead_set_sh_desc(aead);
														
 
															 	if (ret) {
														
 
															 		dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len +
														
 
															-				 enckeylen, DMA_TO_DEVICE);
														
 
															+				 keys.enckeylen, DMA_TO_DEVICE);
														
 
															 	}
														
 
															 	return ret;
														
--- a/drivers/crypto/ccp/Kconfig
+++ b/drivers/crypto/ccp/Kconfig
@@ -0,0 +1,24 @@
 
															+config CRYPTO_DEV_CCP_DD
														
 
															+	tristate "Cryptographic Coprocessor device driver"
														
 
															+	depends on CRYPTO_DEV_CCP
														
 
															+	default m
														
 
															+	select HW_RANDOM
														
 
															+	help
														
 
															+	  Provides the interface to use the AMD Cryptographic Coprocessor
														
 
															+	  which can be used to accelerate or offload encryption operations
														
 
															+	  such as SHA, AES and more. If you choose 'M' here, this module
														
 
															+	  will be called ccp.
														
 
															+
														
 
															+config CRYPTO_DEV_CCP_CRYPTO
														
 
															+	tristate "Encryption and hashing acceleration support"
														
 
															+	depends on CRYPTO_DEV_CCP_DD
														
 
															+	default m
														
 
															+	select CRYPTO_ALGAPI
														
 
															+	select CRYPTO_HASH
														
 
															+	select CRYPTO_BLKCIPHER
														
 
															+	select CRYPTO_AUTHENC
														
 
															+	help
														
 
															+	  Support for using the cryptographic API with the AMD Cryptographic
														
 
															+	  Coprocessor. This module supports acceleration and offload of SHA
														
 
															+	  and AES algorithms.  If you choose 'M' here, this module will be
														
 
															+	  called ccp_crypto.
														
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -0,0 +1,10 @@
 
															+obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
														
 
															+ccp-objs := ccp-dev.o ccp-ops.o
														
 
															+ccp-objs += ccp-pci.o
														
 
															+
														
 
															+obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
														
 
															+ccp-crypto-objs := ccp-crypto-main.o \
														
 
															+		   ccp-crypto-aes.o \
														
 
															+		   ccp-crypto-aes-cmac.o \
														
 
															+		   ccp-crypto-aes-xts.o \
														
 
															+		   ccp-crypto-sha.o
														
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -0,0 +1,365 @@
 
															+/*
														
 
															+ * AMD Cryptographic Coprocessor (CCP) AES CMAC crypto API support
														
 
															+ *
														
 
															+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
														
 
															+ *
														
 
															+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/sched.h>
														
 
															+#include <linux/delay.h>
														
 
															+#include <linux/scatterlist.h>
														
 
															+#include <linux/crypto.h>
														
 
															+#include <crypto/algapi.h>
														
 
															+#include <crypto/aes.h>
														
 
															+#include <crypto/hash.h>
														
 
															+#include <crypto/internal/hash.h>
														
 
															+#include <crypto/scatterwalk.h>
														
 
															+
														
 
															+#include "ccp-crypto.h"
														
 
															+
														
 
															+
														
 
															+static int ccp_aes_cmac_complete(struct crypto_async_request *async_req,
														
 
															+				 int ret)
														
 
															+{
														
 
															+	struct ahash_request *req = ahash_request_cast(async_req);
														
 
															+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
														
 
															+	unsigned int digest_size = crypto_ahash_digestsize(tfm);
														
 
															+
														
 
															+	if (ret)
														
 
															+		goto e_free;
														
 
															+
														
 
															+	if (rctx->hash_rem) {
														
 
															+		/* Save remaining data to buffer */
														
 
															+		unsigned int offset = rctx->nbytes - rctx->hash_rem;
														
 
															+		scatterwalk_map_and_copy(rctx->buf, rctx->src,
														
 
															+					 offset, rctx->hash_rem, 0);
														
 
															+		rctx->buf_count = rctx->hash_rem;
														
 
															+	} else
														
 
															+		rctx->buf_count = 0;
														
 
															+
														
 
															+	/* Update result area if supplied */
														
 
															+	if (req->result)
														
 
															+		memcpy(req->result, rctx->iv, digest_size);
														
 
															+
														
 
															+e_free:
														
 
															+	sg_free_table(&rctx->data_sg);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_do_cmac_update(struct ahash_request *req, unsigned int nbytes,
														
 
															+			      unsigned int final)
														
 
															+{
														
 
															+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															+	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
														
 
															+	struct scatterlist *sg, *cmac_key_sg = NULL;
														
 
															+	unsigned int block_size =
														
 
															+		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
														
 
															+	unsigned int need_pad, sg_count;
														
 
															+	gfp_t gfp;
														
 
															+	u64 len;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!ctx->u.aes.key_len)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (nbytes)
														
 
															+		rctx->null_msg = 0;
														
 
															+
														
 
															+	len = (u64)rctx->buf_count + (u64)nbytes;
														
 
															+
														
 
															+	if (!final && (len <= block_size)) {
														
 
															+		scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src,
														
 
															+					 0, nbytes, 0);
														
 
															+		rctx->buf_count += nbytes;
														
 
															+
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	rctx->src = req->src;
														
 
															+	rctx->nbytes = nbytes;
														
 
															+
														
 
															+	rctx->final = final;
														
 
															+	rctx->hash_rem = final ? 0 : len & (block_size - 1);
														
 
															+	rctx->hash_cnt = len - rctx->hash_rem;
														
 
															+	if (!final && !rctx->hash_rem) {
														
 
															+		/* CCP can't do zero length final, so keep some data around */
														
 
															+		rctx->hash_cnt -= block_size;
														
 
															+		rctx->hash_rem = block_size;
														
 
															+	}
														
 
															+
														
 
															+	if (final && (rctx->null_msg || (len & (block_size - 1))))
														
 
															+		need_pad = 1;
														
 
															+	else
														
 
															+		need_pad = 0;
														
 
															+
														
 
															+	sg_init_one(&rctx->iv_sg, rctx->iv, sizeof(rctx->iv));
														
 
															+
														
 
															+	/* Build the data scatterlist table - allocate enough entries for all
														
 
															+	 * possible data pieces (buffer, input data, padding)
														
 
															+	 */
														
 
															+	sg_count = (nbytes) ? sg_nents(req->src) + 2 : 2;
														
 
															+	gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
														
 
															+		GFP_KERNEL : GFP_ATOMIC;
														
 
															+	ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	sg = NULL;
														
 
															+	if (rctx->buf_count) {
														
 
															+		sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count);
														
 
															+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg);
														
 
															+	}
														
 
															+
														
 
															+	if (nbytes)
														
 
															+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src);
														
 
															+
														
 
															+	if (need_pad) {
														
 
															+		int pad_length = block_size - (len & (block_size - 1));
														
 
															+
														
 
															+		rctx->hash_cnt += pad_length;
														
 
															+
														
 
															+		memset(rctx->pad, 0, sizeof(rctx->pad));
														
 
															+		rctx->pad[0] = 0x80;
														
 
															+		sg_init_one(&rctx->pad_sg, rctx->pad, pad_length);
														
 
															+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->pad_sg);
														
 
															+	}
														
 
															+	if (sg) {
														
 
															+		sg_mark_end(sg);
														
 
															+		sg = rctx->data_sg.sgl;
														
 
															+	}
														
 
															+
														
 
															+	/* Initialize the K1/K2 scatterlist */
														
 
															+	if (final)
														
 
															+		cmac_key_sg = (need_pad) ? &ctx->u.aes.k2_sg
														
 
															+					 : &ctx->u.aes.k1_sg;
														
 
															+
														
 
															+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
														
 
															+	INIT_LIST_HEAD(&rctx->cmd.entry);
														
 
															+	rctx->cmd.engine = CCP_ENGINE_AES;
														
 
															+	rctx->cmd.u.aes.type = ctx->u.aes.type;
														
 
															+	rctx->cmd.u.aes.mode = ctx->u.aes.mode;
														
 
															+	rctx->cmd.u.aes.action = CCP_AES_ACTION_ENCRYPT;
														
 
															+	rctx->cmd.u.aes.key = &ctx->u.aes.key_sg;
														
 
															+	rctx->cmd.u.aes.key_len = ctx->u.aes.key_len;
														
 
															+	rctx->cmd.u.aes.iv = &rctx->iv_sg;
														
 
															+	rctx->cmd.u.aes.iv_len = AES_BLOCK_SIZE;
														
 
															+	rctx->cmd.u.aes.src = sg;
														
 
															+	rctx->cmd.u.aes.src_len = rctx->hash_cnt;
														
 
															+	rctx->cmd.u.aes.dst = NULL;
														
 
															+	rctx->cmd.u.aes.cmac_key = cmac_key_sg;
														
 
															+	rctx->cmd.u.aes.cmac_key_len = ctx->u.aes.kn_len;
														
 
															+	rctx->cmd.u.aes.cmac_final = final;
														
 
															+
														
 
															+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_cmac_init(struct ahash_request *req)
														
 
															+{
														
 
															+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
														
 
															+
														
 
															+	memset(rctx, 0, sizeof(*rctx));
														
 
															+
														
 
															+	rctx->null_msg = 1;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_cmac_update(struct ahash_request *req)
														
 
															+{
														
 
															+	return ccp_do_cmac_update(req, req->nbytes, 0);
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_cmac_final(struct ahash_request *req)
														
 
															+{
														
 
															+	return ccp_do_cmac_update(req, 0, 1);
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_cmac_finup(struct ahash_request *req)
														
 
															+{
														
 
															+	return ccp_do_cmac_update(req, req->nbytes, 1);
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_cmac_digest(struct ahash_request *req)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = ccp_aes_cmac_init(req);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	return ccp_aes_cmac_finup(req);
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
														
 
															+			   unsigned int key_len)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
														
 
															+	struct ccp_crypto_ahash_alg *alg =
														
 
															+		ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm));
														
 
															+	u64 k0_hi, k0_lo, k1_hi, k1_lo, k2_hi, k2_lo;
														
 
															+	u64 rb_hi = 0x00, rb_lo = 0x87;
														
 
															+	__be64 *gk;
														
 
															+	int ret;
														
 
															+
														
 
															+	switch (key_len) {
														
 
															+	case AES_KEYSIZE_128:
														
 
															+		ctx->u.aes.type = CCP_AES_TYPE_128;
														
 
															+		break;
														
 
															+	case AES_KEYSIZE_192:
														
 
															+		ctx->u.aes.type = CCP_AES_TYPE_192;
														
 
															+		break;
														
 
															+	case AES_KEYSIZE_256:
														
 
															+		ctx->u.aes.type = CCP_AES_TYPE_256;
														
 
															+		break;
														
 
															+	default:
														
 
															+		crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+	ctx->u.aes.mode = alg->mode;
														
 
															+
														
 
															+	/* Set to zero until complete */
														
 
															+	ctx->u.aes.key_len = 0;
														
 
															+
														
 
															+	/* Set the key for the AES cipher used to generate the keys */
														
 
															+	ret = crypto_cipher_setkey(ctx->u.aes.tfm_cipher, key, key_len);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	/* Encrypt a block of zeroes - use key area in context */
														
 
															+	memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key));
														
 
															+	crypto_cipher_encrypt_one(ctx->u.aes.tfm_cipher, ctx->u.aes.key,
														
 
															+				  ctx->u.aes.key);
														
 
															+
														
 
															+	/* Generate K1 and K2 */
														
 
															+	k0_hi = be64_to_cpu(*((__be64 *)ctx->u.aes.key));
														
 
															+	k0_lo = be64_to_cpu(*((__be64 *)ctx->u.aes.key + 1));
														
 
															+
														
 
															+	k1_hi = (k0_hi << 1) | (k0_lo >> 63);
														
 
															+	k1_lo = k0_lo << 1;
														
 
															+	if (ctx->u.aes.key[0] & 0x80) {
														
 
															+		k1_hi ^= rb_hi;
														
 
															+		k1_lo ^= rb_lo;
														
 
															+	}
														
 
															+	gk = (__be64 *)ctx->u.aes.k1;
														
 
															+	*gk = cpu_to_be64(k1_hi);
														
 
															+	gk++;
														
 
															+	*gk = cpu_to_be64(k1_lo);
														
 
															+
														
 
															+	k2_hi = (k1_hi << 1) | (k1_lo >> 63);
														
 
															+	k2_lo = k1_lo << 1;
														
 
															+	if (ctx->u.aes.k1[0] & 0x80) {
														
 
															+		k2_hi ^= rb_hi;
														
 
															+		k2_lo ^= rb_lo;
														
 
															+	}
														
 
															+	gk = (__be64 *)ctx->u.aes.k2;
														
 
															+	*gk = cpu_to_be64(k2_hi);
														
 
															+	gk++;
														
 
															+	*gk = cpu_to_be64(k2_lo);
														
 
															+
														
 
															+	ctx->u.aes.kn_len = sizeof(ctx->u.aes.k1);
														
 
															+	sg_init_one(&ctx->u.aes.k1_sg, ctx->u.aes.k1, sizeof(ctx->u.aes.k1));
														
 
															+	sg_init_one(&ctx->u.aes.k2_sg, ctx->u.aes.k2, sizeof(ctx->u.aes.k2));
														
 
															+
														
 
															+	/* Save the supplied key */
														
 
															+	memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key));
														
 
															+	memcpy(ctx->u.aes.key, key, key_len);
														
 
															+	ctx->u.aes.key_len = key_len;
														
 
															+	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
														
 
															+	struct crypto_cipher *cipher_tfm;
														
 
															+
														
 
															+	ctx->complete = ccp_aes_cmac_complete;
														
 
															+	ctx->u.aes.key_len = 0;
														
 
															+
														
 
															+	crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx));
														
 
															+
														
 
															+	cipher_tfm = crypto_alloc_cipher("aes", 0,
														
 
															+			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
														
 
															+	if (IS_ERR(cipher_tfm)) {
														
 
															+		pr_warn("could not load aes cipher driver\n");
														
 
															+		return PTR_ERR(cipher_tfm);
														
 
															+	}
														
 
															+	ctx->u.aes.tfm_cipher = cipher_tfm;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void ccp_aes_cmac_cra_exit(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															+
														
 
															+	if (ctx->u.aes.tfm_cipher)
														
 
															+		crypto_free_cipher(ctx->u.aes.tfm_cipher);
														
 
															+	ctx->u.aes.tfm_cipher = NULL;
														
 
															+}
														
 
															+
														
 
															+int ccp_register_aes_cmac_algs(struct list_head *head)
														
 
															+{
														
 
															+	struct ccp_crypto_ahash_alg *ccp_alg;
														
 
															+	struct ahash_alg *alg;
														
 
															+	struct hash_alg_common *halg;
														
 
															+	struct crypto_alg *base;
														
 
															+	int ret;
														
 
															+
														
 
															+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
														
 
															+	if (!ccp_alg)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	INIT_LIST_HEAD(&ccp_alg->entry);
														
 
															+	ccp_alg->mode = CCP_AES_MODE_CMAC;
														
 
															+
														
 
															+	alg = &ccp_alg->alg;
														
 
															+	alg->init = ccp_aes_cmac_init;
														
 
															+	alg->update = ccp_aes_cmac_update;
														
 
															+	alg->final = ccp_aes_cmac_final;
														
 
															+	alg->finup = ccp_aes_cmac_finup;
														
 
															+	alg->digest = ccp_aes_cmac_digest;
														
 
															+	alg->setkey = ccp_aes_cmac_setkey;
														
 
															+
														
 
															+	halg = &alg->halg;
														
 
															+	halg->digestsize = AES_BLOCK_SIZE;
														
 
															+
														
 
															+	base = &halg->base;
														
 
															+	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "cmac(aes)");
														
 
															+	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "cmac-aes-ccp");
														
 
															+	base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC |
														
 
															+			  CRYPTO_ALG_KERN_DRIVER_ONLY |
														
 
															+			  CRYPTO_ALG_NEED_FALLBACK;
														
 
															+	base->cra_blocksize = AES_BLOCK_SIZE;
														
 
															+	base->cra_ctxsize = sizeof(struct ccp_ctx);
														
 
															+	base->cra_priority = CCP_CRA_PRIORITY;
														
 
															+	base->cra_type = &crypto_ahash_type;
														
 
															+	base->cra_init = ccp_aes_cmac_cra_init;
														
 
															+	base->cra_exit = ccp_aes_cmac_cra_exit;
														
 
															+	base->cra_module = THIS_MODULE;
														
 
															+
														
 
															+	ret = crypto_register_ahash(alg);
														
 
															+	if (ret) {
														
 
															+		pr_err("%s ahash algorithm registration error (%d)\n",
														
 
															+			base->cra_name, ret);
														
 
															+		kfree(ccp_alg);
														
 
															+		return ret;
														
 
															+	}
														
 
															+
														
 
															+	list_add(&ccp_alg->entry, head);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -0,0 +1,279 @@
 
															+/*
														
 
															+ * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support
														
 
															+ *
														
 
															+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
														
 
															+ *
														
 
															+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/sched.h>
														
 
															+#include <linux/delay.h>
														
 
															+#include <linux/scatterlist.h>
														
 
															+#include <linux/crypto.h>
														
 
															+#include <crypto/algapi.h>
														
 
															+#include <crypto/aes.h>
														
 
															+#include <crypto/scatterwalk.h>
														
 
															+
														
 
															+#include "ccp-crypto.h"
														
 
															+
														
 
															+
														
 
															+struct ccp_aes_xts_def {
														
 
															+	const char *name;
														
 
															+	const char *drv_name;
														
 
															+};
														
 
															+
														
 
															+static struct ccp_aes_xts_def aes_xts_algs[] = {
														
 
															+	{
														
 
															+		.name		= "xts(aes)",
														
 
															+		.drv_name	= "xts-aes-ccp",
														
 
															+	},
														
 
															+};
														
 
															+
														
 
															+struct ccp_unit_size_map {
														
 
															+	unsigned int size;
														
 
															+	u32 value;
														
 
															+};
														
 
															+
														
 
															+static struct ccp_unit_size_map unit_size_map[] = {
														
 
															+	{
														
 
															+		.size	= 4096,
														
 
															+		.value	= CCP_XTS_AES_UNIT_SIZE_4096,
														
 
															+	},
														
 
															+	{
														
 
															+		.size	= 2048,
														
 
															+		.value	= CCP_XTS_AES_UNIT_SIZE_2048,
														
 
															+	},
														
 
															+	{
														
 
															+		.size	= 1024,
														
 
															+		.value	= CCP_XTS_AES_UNIT_SIZE_1024,
														
 
															+	},
														
 
															+	{
														
 
															+		.size	= 512,
														
 
															+		.value	= CCP_XTS_AES_UNIT_SIZE_512,
														
 
															+	},
														
 
															+	{
														
 
															+		.size	= 256,
														
 
															+		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
														
 
															+	},
														
 
															+	{
														
 
															+		.size	= 128,
														
 
															+		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
														
 
															+	},
														
 
															+	{
														
 
															+		.size	= 64,
														
 
															+		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
														
 
															+	},
														
 
															+	{
														
 
															+		.size	= 32,
														
 
															+		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
														
 
															+	},
														
 
															+	{
														
 
															+		.size	= 16,
														
 
															+		.value	= CCP_XTS_AES_UNIT_SIZE_16,
														
 
															+	},
														
 
															+	{
														
 
															+		.size	= 1,
														
 
															+		.value	= CCP_XTS_AES_UNIT_SIZE__LAST,
														
 
															+	},
														
 
															+};
														
 
															+
														
 
															+static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret)
														
 
															+{
														
 
															+	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
														
 
															+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
														
 
															+
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	memcpy(req->info, rctx->iv, AES_BLOCK_SIZE);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
														
 
															+			      unsigned int key_len)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
														
 
															+
														
 
															+	/* Only support 128-bit AES key with a 128-bit Tweak key,
														
 
															+	 * otherwise use the fallback
														
 
															+	 */
														
 
															+	switch (key_len) {
														
 
															+	case AES_KEYSIZE_128 * 2:
														
 
															+		memcpy(ctx->u.aes.key, key, key_len);
														
 
															+		break;
														
 
															+	}
														
 
															+	ctx->u.aes.key_len = key_len / 2;
														
 
															+	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
														
 
															+
														
 
															+	return crypto_ablkcipher_setkey(ctx->u.aes.tfm_ablkcipher, key,
														
 
															+					key_len);
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
														
 
															+			     unsigned int encrypt)
														
 
															+{
														
 
															+	struct crypto_tfm *tfm =
														
 
															+		crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
														
 
															+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
														
 
															+	unsigned int unit;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!ctx->u.aes.key_len)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (req->nbytes & (AES_BLOCK_SIZE - 1))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (!req->info)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	for (unit = 0; unit < ARRAY_SIZE(unit_size_map); unit++)
														
 
															+		if (!(req->nbytes & (unit_size_map[unit].size - 1)))
														
 
															+			break;
														
 
															+
														
 
															+	if ((unit_size_map[unit].value == CCP_XTS_AES_UNIT_SIZE__LAST) ||
														
 
															+	    (ctx->u.aes.key_len != AES_KEYSIZE_128)) {
														
 
															+		/* Use the fallback to process the request for any
														
 
															+		 * unsupported unit sizes or key sizes
														
 
															+		 */
														
 
															+		ablkcipher_request_set_tfm(req, ctx->u.aes.tfm_ablkcipher);
														
 
															+		ret = (encrypt) ? crypto_ablkcipher_encrypt(req) :
														
 
															+				  crypto_ablkcipher_decrypt(req);
														
 
															+		ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
														
 
															+
														
 
															+		return ret;
														
 
															+	}
														
 
															+
														
 
															+	memcpy(rctx->iv, req->info, AES_BLOCK_SIZE);
														
 
															+	sg_init_one(&rctx->iv_sg, rctx->iv, AES_BLOCK_SIZE);
														
 
															+
														
 
															+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
														
 
															+	INIT_LIST_HEAD(&rctx->cmd.entry);
														
 
															+	rctx->cmd.engine = CCP_ENGINE_XTS_AES_128;
														
 
															+	rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT
														
 
															+					   : CCP_AES_ACTION_DECRYPT;
														
 
															+	rctx->cmd.u.xts.unit_size = unit_size_map[unit].value;
														
 
															+	rctx->cmd.u.xts.key = &ctx->u.aes.key_sg;
														
 
															+	rctx->cmd.u.xts.key_len = ctx->u.aes.key_len;
														
 
															+	rctx->cmd.u.xts.iv = &rctx->iv_sg;
														
 
															+	rctx->cmd.u.xts.iv_len = AES_BLOCK_SIZE;
														
 
															+	rctx->cmd.u.xts.src = req->src;
														
 
															+	rctx->cmd.u.xts.src_len = req->nbytes;
														
 
															+	rctx->cmd.u.xts.dst = req->dst;
														
 
															+
														
 
															+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_xts_encrypt(struct ablkcipher_request *req)
														
 
															+{
														
 
															+	return ccp_aes_xts_crypt(req, 1);
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_xts_decrypt(struct ablkcipher_request *req)
														
 
															+{
														
 
															+	return ccp_aes_xts_crypt(req, 0);
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															+	struct crypto_ablkcipher *fallback_tfm;
														
 
															+
														
 
															+	ctx->complete = ccp_aes_xts_complete;
														
 
															+	ctx->u.aes.key_len = 0;
														
 
															+
														
 
															+	fallback_tfm = crypto_alloc_ablkcipher(tfm->__crt_alg->cra_name, 0,
														
 
															+					       CRYPTO_ALG_ASYNC |
														
 
															+					       CRYPTO_ALG_NEED_FALLBACK);
														
 
															+	if (IS_ERR(fallback_tfm)) {
														
 
															+		pr_warn("could not load fallback driver %s\n",
														
 
															+			tfm->__crt_alg->cra_name);
														
 
															+		return PTR_ERR(fallback_tfm);
														
 
															+	}
														
 
															+	ctx->u.aes.tfm_ablkcipher = fallback_tfm;
														
 
															+
														
 
															+	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx) +
														
 
															+				      fallback_tfm->base.crt_ablkcipher.reqsize;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															+
														
 
															+	if (ctx->u.aes.tfm_ablkcipher)
														
 
															+		crypto_free_ablkcipher(ctx->u.aes.tfm_ablkcipher);
														
 
															+	ctx->u.aes.tfm_ablkcipher = NULL;
														
 
															+}
														
 
															+
														
 
															+
														
 
															+static int ccp_register_aes_xts_alg(struct list_head *head,
														
 
															+				    const struct ccp_aes_xts_def *def)
														
 
															+{
														
 
															+	struct ccp_crypto_ablkcipher_alg *ccp_alg;
														
 
															+	struct crypto_alg *alg;
														
 
															+	int ret;
														
 
															+
														
 
															+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
														
 
															+	if (!ccp_alg)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	INIT_LIST_HEAD(&ccp_alg->entry);
														
 
															+
														
 
															+	alg = &ccp_alg->alg;
														
 
															+
														
 
															+	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
														
 
															+	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
														
 
															+		 def->drv_name);
														
 
															+	alg->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
														
 
															+			 CRYPTO_ALG_KERN_DRIVER_ONLY |
														
 
															+			 CRYPTO_ALG_NEED_FALLBACK;
														
 
															+	alg->cra_blocksize = AES_BLOCK_SIZE;
														
 
															+	alg->cra_ctxsize = sizeof(struct ccp_ctx);
														
 
															+	alg->cra_priority = CCP_CRA_PRIORITY;
														
 
															+	alg->cra_type = &crypto_ablkcipher_type;
														
 
															+	alg->cra_ablkcipher.setkey = ccp_aes_xts_setkey;
														
 
															+	alg->cra_ablkcipher.encrypt = ccp_aes_xts_encrypt;
														
 
															+	alg->cra_ablkcipher.decrypt = ccp_aes_xts_decrypt;
														
 
															+	alg->cra_ablkcipher.min_keysize = AES_MIN_KEY_SIZE * 2;
														
 
															+	alg->cra_ablkcipher.max_keysize = AES_MAX_KEY_SIZE * 2;
														
 
															+	alg->cra_ablkcipher.ivsize = AES_BLOCK_SIZE;
														
 
															+	alg->cra_init = ccp_aes_xts_cra_init;
														
 
															+	alg->cra_exit = ccp_aes_xts_cra_exit;
														
 
															+	alg->cra_module = THIS_MODULE;
														
 
															+
														
 
															+	ret = crypto_register_alg(alg);
														
 
															+	if (ret) {
														
 
															+		pr_err("%s ablkcipher algorithm registration error (%d)\n",
														
 
															+			alg->cra_name, ret);
														
 
															+		kfree(ccp_alg);
														
 
															+		return ret;
														
 
															+	}
														
 
															+
														
 
															+	list_add(&ccp_alg->entry, head);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int ccp_register_aes_xts_algs(struct list_head *head)
														
 
															+{
														
 
															+	int i, ret;
														
 
															+
														
 
															+	for (i = 0; i < ARRAY_SIZE(aes_xts_algs); i++) {
														
 
															+		ret = ccp_register_aes_xts_alg(head, &aes_xts_algs[i]);
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
--- a/drivers/crypto/ccp/ccp-crypto-aes.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -0,0 +1,369 @@
 
															+/*
														
 
															+ * AMD Cryptographic Coprocessor (CCP) AES crypto API support
														
 
															+ *
														
 
															+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
														
 
															+ *
														
 
															+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/sched.h>
														
 
															+#include <linux/delay.h>
														
 
															+#include <linux/scatterlist.h>
														
 
															+#include <linux/crypto.h>
														
 
															+#include <crypto/algapi.h>
														
 
															+#include <crypto/aes.h>
														
 
															+#include <crypto/ctr.h>
														
 
															+#include <crypto/scatterwalk.h>
														
 
															+
														
 
															+#include "ccp-crypto.h"
														
 
															+
														
 
															+
														
 
															+static int ccp_aes_complete(struct crypto_async_request *async_req, int ret)
														
 
															+{
														
 
															+	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
														
 
															+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
														
 
															+
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	if (ctx->u.aes.mode != CCP_AES_MODE_ECB)
														
 
															+		memcpy(req->info, rctx->iv, AES_BLOCK_SIZE);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
														
 
															+			  unsigned int key_len)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
														
 
															+	struct ccp_crypto_ablkcipher_alg *alg =
														
 
															+		ccp_crypto_ablkcipher_alg(crypto_ablkcipher_tfm(tfm));
														
 
															+
														
 
															+	switch (key_len) {
														
 
															+	case AES_KEYSIZE_128:
														
 
															+		ctx->u.aes.type = CCP_AES_TYPE_128;
														
 
															+		break;
														
 
															+	case AES_KEYSIZE_192:
														
 
															+		ctx->u.aes.type = CCP_AES_TYPE_192;
														
 
															+		break;
														
 
															+	case AES_KEYSIZE_256:
														
 
															+		ctx->u.aes.type = CCP_AES_TYPE_256;
														
 
															+		break;
														
 
															+	default:
														
 
															+		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+	ctx->u.aes.mode = alg->mode;
														
 
															+	ctx->u.aes.key_len = key_len;
														
 
															+
														
 
															+	memcpy(ctx->u.aes.key, key, key_len);
														
 
															+	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_crypt(struct ablkcipher_request *req, bool encrypt)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
														
 
															+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
														
 
															+	struct scatterlist *iv_sg = NULL;
														
 
															+	unsigned int iv_len = 0;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!ctx->u.aes.key_len)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (((ctx->u.aes.mode == CCP_AES_MODE_ECB) ||
														
 
															+	     (ctx->u.aes.mode == CCP_AES_MODE_CBC) ||
														
 
															+	     (ctx->u.aes.mode == CCP_AES_MODE_CFB)) &&
														
 
															+	    (req->nbytes & (AES_BLOCK_SIZE - 1)))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (ctx->u.aes.mode != CCP_AES_MODE_ECB) {
														
 
															+		if (!req->info)
														
 
															+			return -EINVAL;
														
 
															+
														
 
															+		memcpy(rctx->iv, req->info, AES_BLOCK_SIZE);
														
 
															+		iv_sg = &rctx->iv_sg;
														
 
															+		iv_len = AES_BLOCK_SIZE;
														
 
															+		sg_init_one(iv_sg, rctx->iv, iv_len);
														
 
															+	}
														
 
															+
														
 
															+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
														
 
															+	INIT_LIST_HEAD(&rctx->cmd.entry);
														
 
															+	rctx->cmd.engine = CCP_ENGINE_AES;
														
 
															+	rctx->cmd.u.aes.type = ctx->u.aes.type;
														
 
															+	rctx->cmd.u.aes.mode = ctx->u.aes.mode;
														
 
															+	rctx->cmd.u.aes.action =
														
 
															+		(encrypt) ? CCP_AES_ACTION_ENCRYPT : CCP_AES_ACTION_DECRYPT;
														
 
															+	rctx->cmd.u.aes.key = &ctx->u.aes.key_sg;
														
 
															+	rctx->cmd.u.aes.key_len = ctx->u.aes.key_len;
														
 
															+	rctx->cmd.u.aes.iv = iv_sg;
														
 
															+	rctx->cmd.u.aes.iv_len = iv_len;
														
 
															+	rctx->cmd.u.aes.src = req->src;
														
 
															+	rctx->cmd.u.aes.src_len = req->nbytes;
														
 
															+	rctx->cmd.u.aes.dst = req->dst;
														
 
															+
														
 
															+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_encrypt(struct ablkcipher_request *req)
														
 
															+{
														
 
															+	return ccp_aes_crypt(req, true);
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_decrypt(struct ablkcipher_request *req)
														
 
															+{
														
 
															+	return ccp_aes_crypt(req, false);
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_cra_init(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															+
														
 
															+	ctx->complete = ccp_aes_complete;
														
 
															+	ctx->u.aes.key_len = 0;
														
 
															+
														
 
															+	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void ccp_aes_cra_exit(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req,
														
 
															+				    int ret)
														
 
															+{
														
 
															+	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
														
 
															+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
														
 
															+
														
 
															+	/* Restore the original pointer */
														
 
															+	req->info = rctx->rfc3686_info;
														
 
															+
														
 
															+	return ccp_aes_complete(async_req, ret);
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_rfc3686_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
														
 
															+				  unsigned int key_len)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
														
 
															+
														
 
															+	if (key_len < CTR_RFC3686_NONCE_SIZE)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	key_len -= CTR_RFC3686_NONCE_SIZE;
														
 
															+	memcpy(ctx->u.aes.nonce, key + key_len, CTR_RFC3686_NONCE_SIZE);
														
 
															+
														
 
															+	return ccp_aes_setkey(tfm, key, key_len);
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_rfc3686_crypt(struct ablkcipher_request *req, bool encrypt)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
														
 
															+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
														
 
															+	u8 *iv;
														
 
															+
														
 
															+	/* Initialize the CTR block */
														
 
															+	iv = rctx->rfc3686_iv;
														
 
															+	memcpy(iv, ctx->u.aes.nonce, CTR_RFC3686_NONCE_SIZE);
														
 
															+
														
 
															+	iv += CTR_RFC3686_NONCE_SIZE;
														
 
															+	memcpy(iv, req->info, CTR_RFC3686_IV_SIZE);
														
 
															+
														
 
															+	iv += CTR_RFC3686_IV_SIZE;
														
 
															+	*(__be32 *)iv = cpu_to_be32(1);
														
 
															+
														
 
															+	/* Point to the new IV */
														
 
															+	rctx->rfc3686_info = req->info;
														
 
															+	req->info = rctx->rfc3686_iv;
														
 
															+
														
 
															+	return ccp_aes_crypt(req, encrypt);
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_rfc3686_encrypt(struct ablkcipher_request *req)
														
 
															+{
														
 
															+	return ccp_aes_rfc3686_crypt(req, true);
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_rfc3686_decrypt(struct ablkcipher_request *req)
														
 
															+{
														
 
															+	return ccp_aes_rfc3686_crypt(req, false);
														
 
															+}
														
 
															+
														
 
															+static int ccp_aes_rfc3686_cra_init(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															+
														
 
															+	ctx->complete = ccp_aes_rfc3686_complete;
														
 
															+	ctx->u.aes.key_len = 0;
														
 
															+
														
 
															+	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void ccp_aes_rfc3686_cra_exit(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+static struct crypto_alg ccp_aes_defaults = {
														
 
															+	.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
														
 
															+			  CRYPTO_ALG_ASYNC |
														
 
															+			  CRYPTO_ALG_KERN_DRIVER_ONLY |
														
 
															+			  CRYPTO_ALG_NEED_FALLBACK,
														
 
															+	.cra_blocksize	= AES_BLOCK_SIZE,
														
 
															+	.cra_ctxsize	= sizeof(struct ccp_ctx),
														
 
															+	.cra_priority	= CCP_CRA_PRIORITY,
														
 
															+	.cra_type	= &crypto_ablkcipher_type,
														
 
															+	.cra_init	= ccp_aes_cra_init,
														
 
															+	.cra_exit	= ccp_aes_cra_exit,
														
 
															+	.cra_module	= THIS_MODULE,
														
 
															+	.cra_ablkcipher	= {
														
 
															+		.setkey		= ccp_aes_setkey,
														
 
															+		.encrypt	= ccp_aes_encrypt,
														
 
															+		.decrypt	= ccp_aes_decrypt,
														
 
															+		.min_keysize	= AES_MIN_KEY_SIZE,
														
 
															+		.max_keysize	= AES_MAX_KEY_SIZE,
														
 
															+	},
														
 
															+};
														
 
															+
														
 
															+static struct crypto_alg ccp_aes_rfc3686_defaults = {
														
 
															+	.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
														
 
															+			   CRYPTO_ALG_ASYNC |
														
 
															+			   CRYPTO_ALG_KERN_DRIVER_ONLY |
														
 
															+			   CRYPTO_ALG_NEED_FALLBACK,
														
 
															+	.cra_blocksize	= CTR_RFC3686_BLOCK_SIZE,
														
 
															+	.cra_ctxsize	= sizeof(struct ccp_ctx),
														
 
															+	.cra_priority	= CCP_CRA_PRIORITY,
														
 
															+	.cra_type	= &crypto_ablkcipher_type,
														
 
															+	.cra_init	= ccp_aes_rfc3686_cra_init,
														
 
															+	.cra_exit	= ccp_aes_rfc3686_cra_exit,
														
 
															+	.cra_module	= THIS_MODULE,
														
 
															+	.cra_ablkcipher	= {
														
 
															+		.setkey		= ccp_aes_rfc3686_setkey,
														
 
															+		.encrypt	= ccp_aes_rfc3686_encrypt,
														
 
															+		.decrypt	= ccp_aes_rfc3686_decrypt,
														
 
															+		.min_keysize	= AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
														
 
															+		.max_keysize	= AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
														
 
															+	},
														
 
															+};
														
 
															+
														
 
															+struct ccp_aes_def {
														
 
															+	enum ccp_aes_mode mode;
														
 
															+	const char *name;
														
 
															+	const char *driver_name;
														
 
															+	unsigned int blocksize;
														
 
															+	unsigned int ivsize;
														
 
															+	struct crypto_alg *alg_defaults;
														
 
															+};
														
 
															+
														
 
															+static struct ccp_aes_def aes_algs[] = {
														
 
															+	{
														
 
															+		.mode		= CCP_AES_MODE_ECB,
														
 
															+		.name		= "ecb(aes)",
														
 
															+		.driver_name	= "ecb-aes-ccp",
														
 
															+		.blocksize	= AES_BLOCK_SIZE,
														
 
															+		.ivsize		= 0,
														
 
															+		.alg_defaults	= &ccp_aes_defaults,
														
 
															+	},
														
 
															+	{
														
 
															+		.mode		= CCP_AES_MODE_CBC,
														
 
															+		.name		= "cbc(aes)",
														
 
															+		.driver_name	= "cbc-aes-ccp",
														
 
															+		.blocksize	= AES_BLOCK_SIZE,
														
 
															+		.ivsize		= AES_BLOCK_SIZE,
														
 
															+		.alg_defaults	= &ccp_aes_defaults,
														
 
															+	},
														
 
															+	{
														
 
															+		.mode		= CCP_AES_MODE_CFB,
														
 
															+		.name		= "cfb(aes)",
														
 
															+		.driver_name	= "cfb-aes-ccp",
														
 
															+		.blocksize	= AES_BLOCK_SIZE,
														
 
															+		.ivsize		= AES_BLOCK_SIZE,
														
 
															+		.alg_defaults	= &ccp_aes_defaults,
														
 
															+	},
														
 
															+	{
														
 
															+		.mode		= CCP_AES_MODE_OFB,
														
 
															+		.name		= "ofb(aes)",
														
 
															+		.driver_name	= "ofb-aes-ccp",
														
 
															+		.blocksize	= 1,
														
 
															+		.ivsize		= AES_BLOCK_SIZE,
														
 
															+		.alg_defaults	= &ccp_aes_defaults,
														
 
															+	},
														
 
															+	{
														
 
															+		.mode		= CCP_AES_MODE_CTR,
														
 
															+		.name		= "ctr(aes)",
														
 
															+		.driver_name	= "ctr-aes-ccp",
														
 
															+		.blocksize	= 1,
														
 
															+		.ivsize		= AES_BLOCK_SIZE,
														
 
															+		.alg_defaults	= &ccp_aes_defaults,
														
 
															+	},
														
 
															+	{
														
 
															+		.mode		= CCP_AES_MODE_CTR,
														
 
															+		.name		= "rfc3686(ctr(aes))",
														
 
															+		.driver_name	= "rfc3686-ctr-aes-ccp",
														
 
															+		.blocksize	= 1,
														
 
															+		.ivsize		= CTR_RFC3686_IV_SIZE,
														
 
															+		.alg_defaults	= &ccp_aes_rfc3686_defaults,
														
 
															+	},
														
 
															+};
														
 
															+
														
 
															+static int ccp_register_aes_alg(struct list_head *head,
														
 
															+				const struct ccp_aes_def *def)
														
 
															+{
														
 
															+	struct ccp_crypto_ablkcipher_alg *ccp_alg;
														
 
															+	struct crypto_alg *alg;
														
 
															+	int ret;
														
 
															+
														
 
															+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
														
 
															+	if (!ccp_alg)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	INIT_LIST_HEAD(&ccp_alg->entry);
														
 
															+
														
 
															+	ccp_alg->mode = def->mode;
														
 
															+
														
 
															+	/* Copy the defaults and override as necessary */
														
 
															+	alg = &ccp_alg->alg;
														
 
															+	*alg = *def->alg_defaults;
														
 
															+	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
														
 
															+	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
														
 
															+		 def->driver_name);
														
 
															+	alg->cra_blocksize = def->blocksize;
														
 
															+	alg->cra_ablkcipher.ivsize = def->ivsize;
														
 
															+
														
 
															+	ret = crypto_register_alg(alg);
														
 
															+	if (ret) {
														
 
															+		pr_err("%s ablkcipher algorithm registration error (%d)\n",
														
 
															+			alg->cra_name, ret);
														
 
															+		kfree(ccp_alg);
														
 
															+		return ret;
														
 
															+	}
														
 
															+
														
 
															+	list_add(&ccp_alg->entry, head);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+int ccp_register_aes_algs(struct list_head *head)
														
 
															+{
														
 
															+	int i, ret;
														
 
															+
														
 
															+	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
														
 
															+		ret = ccp_register_aes_alg(head, &aes_algs[i]);
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -0,0 +1,432 @@
 
															+/*
														
 
															+ * AMD Cryptographic Coprocessor (CCP) crypto API support
														
 
															+ *
														
 
															+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
														
 
															+ *
														
 
															+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/list.h>
														
 
															+#include <linux/ccp.h>
														
 
															+#include <linux/scatterlist.h>
														
 
															+#include <crypto/internal/hash.h>
														
 
															+
														
 
															+#include "ccp-crypto.h"
														
 
															+
														
 
															+MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
														
 
															+MODULE_LICENSE("GPL");
														
 
															+MODULE_VERSION("1.0.0");
														
 
															+MODULE_DESCRIPTION("AMD Cryptographic Coprocessor crypto API support");
														
 
															+
														
 
															+
														
 
															+/* List heads for the supported algorithms */
														
 
															+static LIST_HEAD(hash_algs);
														
 
															+static LIST_HEAD(cipher_algs);
														
 
															+
														
 
															+/* For any tfm, requests for that tfm on the same CPU must be returned
														
 
															+ * in the order received.  With multiple queues available, the CCP can
														
 
															+ * process more than one cmd at a time.  Therefore we must maintain
														
 
															+ * a cmd list to insure the proper ordering of requests on a given tfm/cpu
														
 
															+ * combination.
														
 
															+ */
														
 
															+struct ccp_crypto_cpu_queue {
														
 
															+	struct list_head cmds;
														
 
															+	struct list_head *backlog;
														
 
															+	unsigned int cmd_count;
														
 
															+};
														
 
															+#define CCP_CRYPTO_MAX_QLEN	50
														
 
															+
														
 
															+struct ccp_crypto_percpu_queue {
														
 
															+	struct ccp_crypto_cpu_queue __percpu *cpu_queue;
														
 
															+};
														
 
															+static struct ccp_crypto_percpu_queue req_queue;
														
 
															+
														
 
															+struct ccp_crypto_cmd {
														
 
															+	struct list_head entry;
														
 
															+
														
 
															+	struct ccp_cmd *cmd;
														
 
															+
														
 
															+	/* Save the crypto_tfm and crypto_async_request addresses
														
 
															+	 * separately to avoid any reference to a possibly invalid
														
 
															+	 * crypto_async_request structure after invoking the request
														
 
															+	 * callback
														
 
															+	 */
														
 
															+	struct crypto_async_request *req;
														
 
															+	struct crypto_tfm *tfm;
														
 
															+
														
 
															+	/* Used for held command processing to determine state */
														
 
															+	int ret;
														
 
															+
														
 
															+	int cpu;
														
 
															+};
														
 
															+
														
 
															+struct ccp_crypto_cpu {
														
 
															+	struct work_struct work;
														
 
															+	struct completion completion;
														
 
															+	struct ccp_crypto_cmd *crypto_cmd;
														
 
															+	int err;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+static inline bool ccp_crypto_success(int err)
														
 
															+{
														
 
															+	if (err && (err != -EINPROGRESS) && (err != -EBUSY))
														
 
															+		return false;
														
 
															+
														
 
															+	return true;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * ccp_crypto_cmd_complete must be called while running on the appropriate
														
 
															+ * cpu and the caller must have done a get_cpu to disable preemption
														
 
															+ */
														
 
															+static struct ccp_crypto_cmd *ccp_crypto_cmd_complete(
														
 
															+	struct ccp_crypto_cmd *crypto_cmd, struct ccp_crypto_cmd **backlog)
														
 
															+{
														
 
															+	struct ccp_crypto_cpu_queue *cpu_queue;
														
 
															+	struct ccp_crypto_cmd *held = NULL, *tmp;
														
 
															+
														
 
															+	*backlog = NULL;
														
 
															+
														
 
															+	cpu_queue = this_cpu_ptr(req_queue.cpu_queue);
														
 
															+
														
 
															+	/* Held cmds will be after the current cmd in the queue so start
														
 
															+	 * searching for a cmd with a matching tfm for submission.
														
 
															+	 */
														
 
															+	tmp = crypto_cmd;
														
 
															+	list_for_each_entry_continue(tmp, &cpu_queue->cmds, entry) {
														
 
															+		if (crypto_cmd->tfm != tmp->tfm)
														
 
															+			continue;
														
 
															+		held = tmp;
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	/* Process the backlog:
														
 
															+	 *   Because cmds can be executed from any point in the cmd list
														
 
															+	 *   special precautions have to be taken when handling the backlog.
														
 
															+	 */
														
 
															+	if (cpu_queue->backlog != &cpu_queue->cmds) {
														
 
															+		/* Skip over this cmd if it is the next backlog cmd */
														
 
															+		if (cpu_queue->backlog == &crypto_cmd->entry)
														
 
															+			cpu_queue->backlog = crypto_cmd->entry.next;
														
 
															+
														
 
															+		*backlog = container_of(cpu_queue->backlog,
														
 
															+					struct ccp_crypto_cmd, entry);
														
 
															+		cpu_queue->backlog = cpu_queue->backlog->next;
														
 
															+
														
 
															+		/* Skip over this cmd if it is now the next backlog cmd */
														
 
															+		if (cpu_queue->backlog == &crypto_cmd->entry)
														
 
															+			cpu_queue->backlog = crypto_cmd->entry.next;
														
 
															+	}
														
 
															+
														
 
															+	/* Remove the cmd entry from the list of cmds */
														
 
															+	cpu_queue->cmd_count--;
														
 
															+	list_del(&crypto_cmd->entry);
														
 
															+
														
 
															+	return held;
														
 
															+}
														
 
															+
														
 
															+static void ccp_crypto_complete_on_cpu(struct work_struct *work)
														
 
															+{
														
 
															+	struct ccp_crypto_cpu *cpu_work =
														
 
															+		container_of(work, struct ccp_crypto_cpu, work);
														
 
															+	struct ccp_crypto_cmd *crypto_cmd = cpu_work->crypto_cmd;
														
 
															+	struct ccp_crypto_cmd *held, *next, *backlog;
														
 
															+	struct crypto_async_request *req = crypto_cmd->req;
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->tfm);
														
 
															+	int cpu, ret;
														
 
															+
														
 
															+	cpu = get_cpu();
														
 
															+
														
 
															+	if (cpu_work->err == -EINPROGRESS) {
														
 
															+		/* Only propogate the -EINPROGRESS if necessary */
														
 
															+		if (crypto_cmd->ret == -EBUSY) {
														
 
															+			crypto_cmd->ret = -EINPROGRESS;
														
 
															+			req->complete(req, -EINPROGRESS);
														
 
															+		}
														
 
															+
														
 
															+		goto e_cpu;
														
 
															+	}
														
 
															+
														
 
															+	/* Operation has completed - update the queue before invoking
														
 
															+	 * the completion callbacks and retrieve the next cmd (cmd with
														
 
															+	 * a matching tfm) that can be submitted to the CCP.
														
 
															+	 */
														
 
															+	held = ccp_crypto_cmd_complete(crypto_cmd, &backlog);
														
 
															+	if (backlog) {
														
 
															+		backlog->ret = -EINPROGRESS;
														
 
															+		backlog->req->complete(backlog->req, -EINPROGRESS);
														
 
															+	}
														
 
															+
														
 
															+	/* Transition the state from -EBUSY to -EINPROGRESS first */
														
 
															+	if (crypto_cmd->ret == -EBUSY)
														
 
															+		req->complete(req, -EINPROGRESS);
														
 
															+
														
 
															+	/* Completion callbacks */
														
 
															+	ret = cpu_work->err;
														
 
															+	if (ctx->complete)
														
 
															+		ret = ctx->complete(req, ret);
														
 
															+	req->complete(req, ret);
														
 
															+
														
 
															+	/* Submit the next cmd */
														
 
															+	while (held) {
														
 
															+		ret = ccp_enqueue_cmd(held->cmd);
														
 
															+		if (ccp_crypto_success(ret))
														
 
															+			break;
														
 
															+
														
 
															+		/* Error occurred, report it and get the next entry */
														
 
															+		held->req->complete(held->req, ret);
														
 
															+
														
 
															+		next = ccp_crypto_cmd_complete(held, &backlog);
														
 
															+		if (backlog) {
														
 
															+			backlog->ret = -EINPROGRESS;
														
 
															+			backlog->req->complete(backlog->req, -EINPROGRESS);
														
 
															+		}
														
 
															+
														
 
															+		kfree(held);
														
 
															+		held = next;
														
 
															+	}
														
 
															+
														
 
															+	kfree(crypto_cmd);
														
 
															+
														
 
															+e_cpu:
														
 
															+	put_cpu();
														
 
															+
														
 
															+	complete(&cpu_work->completion);
														
 
															+}
														
 
															+
														
 
															+static void ccp_crypto_complete(void *data, int err)
														
 
															+{
														
 
															+	struct ccp_crypto_cmd *crypto_cmd = data;
														
 
															+	struct ccp_crypto_cpu cpu_work;
														
 
															+
														
 
															+	INIT_WORK(&cpu_work.work, ccp_crypto_complete_on_cpu);
														
 
															+	init_completion(&cpu_work.completion);
														
 
															+	cpu_work.crypto_cmd = crypto_cmd;
														
 
															+	cpu_work.err = err;
														
 
															+
														
 
															+	schedule_work_on(crypto_cmd->cpu, &cpu_work.work);
														
 
															+
														
 
															+	/* Keep the completion call synchronous */
														
 
															+	wait_for_completion(&cpu_work.completion);
														
 
															+}
														
 
															+
														
 
															+static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd)
														
 
															+{
														
 
															+	struct ccp_crypto_cpu_queue *cpu_queue;
														
 
															+	struct ccp_crypto_cmd *active = NULL, *tmp;
														
 
															+	int cpu, ret;
														
 
															+
														
 
															+	cpu = get_cpu();
														
 
															+	crypto_cmd->cpu = cpu;
														
 
															+
														
 
															+	cpu_queue = this_cpu_ptr(req_queue.cpu_queue);
														
 
															+
														
 
															+	/* Check if the cmd can/should be queued */
														
 
															+	if (cpu_queue->cmd_count >= CCP_CRYPTO_MAX_QLEN) {
														
 
															+		ret = -EBUSY;
														
 
															+		if (!(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG))
														
 
															+			goto e_cpu;
														
 
															+	}
														
 
															+
														
 
															+	/* Look for an entry with the same tfm.  If there is a cmd
														
 
															+	 * with the same tfm in the list for this cpu then the current
														
 
															+	 * cmd cannot be submitted to the CCP yet.
														
 
															+	 */
														
 
															+	list_for_each_entry(tmp, &cpu_queue->cmds, entry) {
														
 
															+		if (crypto_cmd->tfm != tmp->tfm)
														
 
															+			continue;
														
 
															+		active = tmp;
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	ret = -EINPROGRESS;
														
 
															+	if (!active) {
														
 
															+		ret = ccp_enqueue_cmd(crypto_cmd->cmd);
														
 
															+		if (!ccp_crypto_success(ret))
														
 
															+			goto e_cpu;
														
 
															+	}
														
 
															+
														
 
															+	if (cpu_queue->cmd_count >= CCP_CRYPTO_MAX_QLEN) {
														
 
															+		ret = -EBUSY;
														
 
															+		if (cpu_queue->backlog == &cpu_queue->cmds)
														
 
															+			cpu_queue->backlog = &crypto_cmd->entry;
														
 
															+	}
														
 
															+	crypto_cmd->ret = ret;
														
 
															+
														
 
															+	cpu_queue->cmd_count++;
														
 
															+	list_add_tail(&crypto_cmd->entry, &cpu_queue->cmds);
														
 
															+
														
 
															+e_cpu:
														
 
															+	put_cpu();
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * ccp_crypto_enqueue_request - queue an crypto async request for processing
														
 
															+ *				by the CCP
														
 
															+ *
														
 
															+ * @req: crypto_async_request struct to be processed
														
 
															+ * @cmd: ccp_cmd struct to be sent to the CCP
														
 
															+ */
														
 
															+int ccp_crypto_enqueue_request(struct crypto_async_request *req,
														
 
															+			       struct ccp_cmd *cmd)
														
 
															+{
														
 
															+	struct ccp_crypto_cmd *crypto_cmd;
														
 
															+	gfp_t gfp;
														
 
															+	int ret;
														
 
															+
														
 
															+	gfp = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
														
 
															+
														
 
															+	crypto_cmd = kzalloc(sizeof(*crypto_cmd), gfp);
														
 
															+	if (!crypto_cmd)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	/* The tfm pointer must be saved and not referenced from the
														
 
															+	 * crypto_async_request (req) pointer because it is used after
														
 
															+	 * completion callback for the request and the req pointer
														
 
															+	 * might not be valid anymore.
														
 
															+	 */
														
 
															+	crypto_cmd->cmd = cmd;
														
 
															+	crypto_cmd->req = req;
														
 
															+	crypto_cmd->tfm = req->tfm;
														
 
															+
														
 
															+	cmd->callback = ccp_crypto_complete;
														
 
															+	cmd->data = crypto_cmd;
														
 
															+
														
 
															+	if (req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
														
 
															+		cmd->flags |= CCP_CMD_MAY_BACKLOG;
														
 
															+	else
														
 
															+		cmd->flags &= ~CCP_CMD_MAY_BACKLOG;
														
 
															+
														
 
															+	ret = ccp_crypto_enqueue_cmd(crypto_cmd);
														
 
															+	if (!ccp_crypto_success(ret))
														
 
															+		kfree(crypto_cmd);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table,
														
 
															+					    struct scatterlist *sg_add)
														
 
															+{
														
 
															+	struct scatterlist *sg, *sg_last = NULL;
														
 
															+
														
 
															+	for (sg = table->sgl; sg; sg = sg_next(sg))
														
 
															+		if (!sg_page(sg))
														
 
															+			break;
														
 
															+	BUG_ON(!sg);
														
 
															+
														
 
															+	for (; sg && sg_add; sg = sg_next(sg), sg_add = sg_next(sg_add)) {
														
 
															+		sg_set_page(sg, sg_page(sg_add), sg_add->length,
														
 
															+			    sg_add->offset);
														
 
															+		sg_last = sg;
														
 
															+	}
														
 
															+	BUG_ON(sg_add);
														
 
															+
														
 
															+	return sg_last;
														
 
															+}
														
 
															+
														
 
															+static int ccp_register_algs(void)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = ccp_register_aes_algs(&cipher_algs);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	ret = ccp_register_aes_cmac_algs(&hash_algs);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	ret = ccp_register_aes_xts_algs(&cipher_algs);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	ret = ccp_register_sha_algs(&hash_algs);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void ccp_unregister_algs(void)
														
 
															+{
														
 
															+	struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp;
														
 
															+	struct ccp_crypto_ablkcipher_alg *ablk_alg, *ablk_tmp;
														
 
															+
														
 
															+	list_for_each_entry_safe(ahash_alg, ahash_tmp, &hash_algs, entry) {
														
 
															+		crypto_unregister_ahash(&ahash_alg->alg);
														
 
															+		list_del(&ahash_alg->entry);
														
 
															+		kfree(ahash_alg);
														
 
															+	}
														
 
															+
														
 
															+	list_for_each_entry_safe(ablk_alg, ablk_tmp, &cipher_algs, entry) {
														
 
															+		crypto_unregister_alg(&ablk_alg->alg);
														
 
															+		list_del(&ablk_alg->entry);
														
 
															+		kfree(ablk_alg);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static int ccp_init_queues(void)
														
 
															+{
														
 
															+	struct ccp_crypto_cpu_queue *cpu_queue;
														
 
															+	int cpu;
														
 
															+
														
 
															+	req_queue.cpu_queue = alloc_percpu(struct ccp_crypto_cpu_queue);
														
 
															+	if (!req_queue.cpu_queue)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	for_each_possible_cpu(cpu) {
														
 
															+		cpu_queue = per_cpu_ptr(req_queue.cpu_queue, cpu);
														
 
															+		INIT_LIST_HEAD(&cpu_queue->cmds);
														
 
															+		cpu_queue->backlog = &cpu_queue->cmds;
														
 
															+		cpu_queue->cmd_count = 0;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void ccp_fini_queue(void)
														
 
															+{
														
 
															+	struct ccp_crypto_cpu_queue *cpu_queue;
														
 
															+	int cpu;
														
 
															+
														
 
															+	for_each_possible_cpu(cpu) {
														
 
															+		cpu_queue = per_cpu_ptr(req_queue.cpu_queue, cpu);
														
 
															+		BUG_ON(!list_empty(&cpu_queue->cmds));
														
 
															+	}
														
 
															+	free_percpu(req_queue.cpu_queue);
														
 
															+}
														
 
															+
														
 
															+static int ccp_crypto_init(void)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = ccp_init_queues();
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	ret = ccp_register_algs();
														
 
															+	if (ret) {
														
 
															+		ccp_unregister_algs();
														
 
															+		ccp_fini_queue();
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static void ccp_crypto_exit(void)
														
 
															+{
														
 
															+	ccp_unregister_algs();
														
 
															+	ccp_fini_queue();
														
 
															+}
														
 
															+
														
 
															+module_init(ccp_crypto_init);
														
 
															+module_exit(ccp_crypto_exit);
														
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -0,0 +1,517 @@
 
															+/*
														
 
															+ * AMD Cryptographic Coprocessor (CCP) SHA crypto API support
														
 
															+ *
														
 
															+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
														
 
															+ *
														
 
															+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/sched.h>
														
 
															+#include <linux/delay.h>
														
 
															+#include <linux/scatterlist.h>
														
 
															+#include <linux/crypto.h>
														
 
															+#include <crypto/algapi.h>
														
 
															+#include <crypto/hash.h>
														
 
															+#include <crypto/internal/hash.h>
														
 
															+#include <crypto/sha.h>
														
 
															+#include <crypto/scatterwalk.h>
														
 
															+
														
 
															+#include "ccp-crypto.h"
														
 
															+
														
 
															+
														
 
															+struct ccp_sha_result {
														
 
															+	struct completion completion;
														
 
															+	int err;
														
 
															+};
														
 
															+
														
 
															+static void ccp_sync_hash_complete(struct crypto_async_request *req, int err)
														
 
															+{
														
 
															+	struct ccp_sha_result *result = req->data;
														
 
															+
														
 
															+	if (err == -EINPROGRESS)
														
 
															+		return;
														
 
															+
														
 
															+	result->err = err;
														
 
															+	complete(&result->completion);
														
 
															+}
														
 
															+
														
 
															+static int ccp_sync_hash(struct crypto_ahash *tfm, u8 *buf,
														
 
															+			 struct scatterlist *sg, unsigned int len)
														
 
															+{
														
 
															+	struct ccp_sha_result result;
														
 
															+	struct ahash_request *req;
														
 
															+	int ret;
														
 
															+
														
 
															+	init_completion(&result.completion);
														
 
															+
														
 
															+	req = ahash_request_alloc(tfm, GFP_KERNEL);
														
 
															+	if (!req)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
														
 
															+				   ccp_sync_hash_complete, &result);
														
 
															+	ahash_request_set_crypt(req, sg, buf, len);
														
 
															+
														
 
															+	ret = crypto_ahash_digest(req);
														
 
															+	if ((ret == -EINPROGRESS) || (ret == -EBUSY)) {
														
 
															+		ret = wait_for_completion_interruptible(&result.completion);
														
 
															+		if (!ret)
														
 
															+			ret = result.err;
														
 
															+	}
														
 
															+
														
 
															+	ahash_request_free(req);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_sha_finish_hmac(struct crypto_async_request *async_req)
														
 
															+{
														
 
															+	struct ahash_request *req = ahash_request_cast(async_req);
														
 
															+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															+	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
														
 
															+	struct scatterlist sg[2];
														
 
															+	unsigned int block_size =
														
 
															+		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
														
 
															+	unsigned int digest_size = crypto_ahash_digestsize(tfm);
														
 
															+
														
 
															+	sg_init_table(sg, ARRAY_SIZE(sg));
														
 
															+	sg_set_buf(&sg[0], ctx->u.sha.opad, block_size);
														
 
															+	sg_set_buf(&sg[1], rctx->ctx, digest_size);
														
 
															+
														
 
															+	return ccp_sync_hash(ctx->u.sha.hmac_tfm, req->result, sg,
														
 
															+			     block_size + digest_size);
														
 
															+}
														
 
															+
														
 
															+static int ccp_sha_complete(struct crypto_async_request *async_req, int ret)
														
 
															+{
														
 
															+	struct ahash_request *req = ahash_request_cast(async_req);
														
 
															+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															+	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
														
 
															+	unsigned int digest_size = crypto_ahash_digestsize(tfm);
														
 
															+
														
 
															+	if (ret)
														
 
															+		goto e_free;
														
 
															+
														
 
															+	if (rctx->hash_rem) {
														
 
															+		/* Save remaining data to buffer */
														
 
															+		unsigned int offset = rctx->nbytes - rctx->hash_rem;
														
 
															+		scatterwalk_map_and_copy(rctx->buf, rctx->src,
														
 
															+					 offset, rctx->hash_rem, 0);
														
 
															+		rctx->buf_count = rctx->hash_rem;
														
 
															+	} else
														
 
															+		rctx->buf_count = 0;
														
 
															+
														
 
															+	/* Update result area if supplied */
														
 
															+	if (req->result)
														
 
															+		memcpy(req->result, rctx->ctx, digest_size);
														
 
															+
														
 
															+	/* If we're doing an HMAC, we need to perform that on the final op */
														
 
															+	if (rctx->final && ctx->u.sha.key_len)
														
 
															+		ret = ccp_sha_finish_hmac(async_req);
														
 
															+
														
 
															+e_free:
														
 
															+	sg_free_table(&rctx->data_sg);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes,
														
 
															+			     unsigned int final)
														
 
															+{
														
 
															+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
														
 
															+	struct scatterlist *sg;
														
 
															+	unsigned int block_size =
														
 
															+		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
														
 
															+	unsigned int sg_count;
														
 
															+	gfp_t gfp;
														
 
															+	u64 len;
														
 
															+	int ret;
														
 
															+
														
 
															+	len = (u64)rctx->buf_count + (u64)nbytes;
														
 
															+
														
 
															+	if (!final && (len <= block_size)) {
														
 
															+		scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src,
														
 
															+					 0, nbytes, 0);
														
 
															+		rctx->buf_count += nbytes;
														
 
															+
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	rctx->src = req->src;
														
 
															+	rctx->nbytes = nbytes;
														
 
															+
														
 
															+	rctx->final = final;
														
 
															+	rctx->hash_rem = final ? 0 : len & (block_size - 1);
														
 
															+	rctx->hash_cnt = len - rctx->hash_rem;
														
 
															+	if (!final && !rctx->hash_rem) {
														
 
															+		/* CCP can't do zero length final, so keep some data around */
														
 
															+		rctx->hash_cnt -= block_size;
														
 
															+		rctx->hash_rem = block_size;
														
 
															+	}
														
 
															+
														
 
															+	/* Initialize the context scatterlist */
														
 
															+	sg_init_one(&rctx->ctx_sg, rctx->ctx, sizeof(rctx->ctx));
														
 
															+
														
 
															+	sg = NULL;
														
 
															+	if (rctx->buf_count && nbytes) {
														
 
															+		/* Build the data scatterlist table - allocate enough entries
														
 
															+		 * for both data pieces (buffer and input data)
														
 
															+		 */
														
 
															+		gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
														
 
															+			GFP_KERNEL : GFP_ATOMIC;
														
 
															+		sg_count = sg_nents(req->src) + 1;
														
 
															+		ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp);
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+
														
 
															+		sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count);
														
 
															+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg);
														
 
															+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src);
														
 
															+		sg_mark_end(sg);
														
 
															+
														
 
															+		sg = rctx->data_sg.sgl;
														
 
															+	} else if (rctx->buf_count) {
														
 
															+		sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count);
														
 
															+
														
 
															+		sg = &rctx->buf_sg;
														
 
															+	} else if (nbytes) {
														
 
															+		sg = req->src;
														
 
															+	}
														
 
															+
														
 
															+	rctx->msg_bits += (rctx->hash_cnt << 3);	/* Total in bits */
														
 
															+
														
 
															+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
														
 
															+	INIT_LIST_HEAD(&rctx->cmd.entry);
														
 
															+	rctx->cmd.engine = CCP_ENGINE_SHA;
														
 
															+	rctx->cmd.u.sha.type = rctx->type;
														
 
															+	rctx->cmd.u.sha.ctx = &rctx->ctx_sg;
														
 
															+	rctx->cmd.u.sha.ctx_len = sizeof(rctx->ctx);
														
 
															+	rctx->cmd.u.sha.src = sg;
														
 
															+	rctx->cmd.u.sha.src_len = rctx->hash_cnt;
														
 
															+	rctx->cmd.u.sha.final = rctx->final;
														
 
															+	rctx->cmd.u.sha.msg_bits = rctx->msg_bits;
														
 
															+
														
 
															+	rctx->first = 0;
														
 
															+
														
 
															+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_sha_init(struct ahash_request *req)
														
 
															+{
														
 
															+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															+	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
														
 
															+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
														
 
															+	struct ccp_crypto_ahash_alg *alg =
														
 
															+		ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm));
														
 
															+	unsigned int block_size =
														
 
															+		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
														
 
															+
														
 
															+	memset(rctx, 0, sizeof(*rctx));
														
 
															+
														
 
															+	memcpy(rctx->ctx, alg->init, sizeof(rctx->ctx));
														
 
															+	rctx->type = alg->type;
														
 
															+	rctx->first = 1;
														
 
															+
														
 
															+	if (ctx->u.sha.key_len) {
														
 
															+		/* Buffer the HMAC key for first update */
														
 
															+		memcpy(rctx->buf, ctx->u.sha.ipad, block_size);
														
 
															+		rctx->buf_count = block_size;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int ccp_sha_update(struct ahash_request *req)
														
 
															+{
														
 
															+	return ccp_do_sha_update(req, req->nbytes, 0);
														
 
															+}
														
 
															+
														
 
															+static int ccp_sha_final(struct ahash_request *req)
														
 
															+{
														
 
															+	return ccp_do_sha_update(req, 0, 1);
														
 
															+}
														
 
															+
														
 
															+static int ccp_sha_finup(struct ahash_request *req)
														
 
															+{
														
 
															+	return ccp_do_sha_update(req, req->nbytes, 1);
														
 
															+}
														
 
															+
														
 
															+static int ccp_sha_digest(struct ahash_request *req)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = ccp_sha_init(req);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	return ccp_sha_finup(req);
														
 
															+}
														
 
															+
														
 
															+static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
														
 
															+			  unsigned int key_len)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
														
 
															+	struct scatterlist sg;
														
 
															+	unsigned int block_size =
														
 
															+		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
														
 
															+	unsigned int digest_size = crypto_ahash_digestsize(tfm);
														
 
															+	int i, ret;
														
 
															+
														
 
															+	/* Set to zero until complete */
														
 
															+	ctx->u.sha.key_len = 0;
														
 
															+
														
 
															+	/* Clear key area to provide zero padding for keys smaller
														
 
															+	 * than the block size
														
 
															+	 */
														
 
															+	memset(ctx->u.sha.key, 0, sizeof(ctx->u.sha.key));
														
 
															+
														
 
															+	if (key_len > block_size) {
														
 
															+		/* Must hash the input key */
														
 
															+		sg_init_one(&sg, key, key_len);
														
 
															+		ret = ccp_sync_hash(tfm, ctx->u.sha.key, &sg, key_len);
														
 
															+		if (ret) {
														
 
															+			crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
														
 
															+			return -EINVAL;
														
 
															+		}
														
 
															+
														
 
															+		key_len = digest_size;
														
 
															+	} else
														
 
															+		memcpy(ctx->u.sha.key, key, key_len);
														
 
															+
														
 
															+	for (i = 0; i < block_size; i++) {
														
 
															+		ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36;
														
 
															+		ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ 0x5c;
														
 
															+	}
														
 
															+
														
 
															+	ctx->u.sha.key_len = key_len;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int ccp_sha_cra_init(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
														
 
															+
														
 
															+	ctx->complete = ccp_sha_complete;
														
 
															+	ctx->u.sha.key_len = 0;
														
 
															+
														
 
															+	crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_sha_req_ctx));
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void ccp_sha_cra_exit(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															+	struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(tfm);
														
 
															+	struct crypto_ahash *hmac_tfm;
														
 
															+
														
 
															+	hmac_tfm = crypto_alloc_ahash(alg->child_alg,
														
 
															+				      CRYPTO_ALG_TYPE_AHASH, 0);
														
 
															+	if (IS_ERR(hmac_tfm)) {
														
 
															+		pr_warn("could not load driver %s need for HMAC support\n",
														
 
															+			alg->child_alg);
														
 
															+		return PTR_ERR(hmac_tfm);
														
 
															+	}
														
 
															+
														
 
															+	ctx->u.sha.hmac_tfm = hmac_tfm;
														
 
															+
														
 
															+	return ccp_sha_cra_init(tfm);
														
 
															+}
														
 
															+
														
 
															+static void ccp_hmac_sha_cra_exit(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
														
 
															+
														
 
															+	if (ctx->u.sha.hmac_tfm)
														
 
															+		crypto_free_ahash(ctx->u.sha.hmac_tfm);
														
 
															+
														
 
															+	ccp_sha_cra_exit(tfm);
														
 
															+}
														
 
															+
														
 
															+static const __be32 sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
														
 
															+	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
														
 
															+	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
														
 
															+	cpu_to_be32(SHA1_H4), 0, 0, 0,
														
 
															+};
														
 
															+
														
 
															+static const __be32 sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
														
 
															+	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
														
 
															+	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
														
 
															+	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
														
 
															+	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
														
 
															+};
														
 
															+
														
 
															+static const __be32 sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
														
 
															+	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
														
 
															+	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
														
 
															+	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
														
 
															+	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
														
 
															+};
														
 
															+
														
 
															+struct ccp_sha_def {
														
 
															+	const char *name;
														
 
															+	const char *drv_name;
														
 
															+	const __be32 *init;
														
 
															+	enum ccp_sha_type type;
														
 
															+	u32 digest_size;
														
 
															+	u32 block_size;
														
 
															+};
														
 
															+
														
 
															+static struct ccp_sha_def sha_algs[] = {
														
 
															+	{
														
 
															+		.name		= "sha1",
														
 
															+		.drv_name	= "sha1-ccp",
														
 
															+		.init		= sha1_init,
														
 
															+		.type		= CCP_SHA_TYPE_1,
														
 
															+		.digest_size	= SHA1_DIGEST_SIZE,
														
 
															+		.block_size	= SHA1_BLOCK_SIZE,
														
 
															+	},
														
 
															+	{
														
 
															+		.name		= "sha224",
														
 
															+		.drv_name	= "sha224-ccp",
														
 
															+		.init		= sha224_init,
														
 
															+		.type		= CCP_SHA_TYPE_224,
														
 
															+		.digest_size	= SHA224_DIGEST_SIZE,
														
 
															+		.block_size	= SHA224_BLOCK_SIZE,
														
 
															+	},
														
 
															+	{
														
 
															+		.name		= "sha256",
														
 
															+		.drv_name	= "sha256-ccp",
														
 
															+		.init		= sha256_init,
														
 
															+		.type		= CCP_SHA_TYPE_256,
														
 
															+		.digest_size	= SHA256_DIGEST_SIZE,
														
 
															+		.block_size	= SHA256_BLOCK_SIZE,
														
 
															+	},
														
 
															+};
														
 
															+
														
 
															+static int ccp_register_hmac_alg(struct list_head *head,
														
 
															+				 const struct ccp_sha_def *def,
														
 
															+				 const struct ccp_crypto_ahash_alg *base_alg)
														
 
															+{
														
 
															+	struct ccp_crypto_ahash_alg *ccp_alg;
														
 
															+	struct ahash_alg *alg;
														
 
															+	struct hash_alg_common *halg;
														
 
															+	struct crypto_alg *base;
														
 
															+	int ret;
														
 
															+
														
 
															+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
														
 
															+	if (!ccp_alg)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	/* Copy the base algorithm and only change what's necessary */
														
 
															+	*ccp_alg = *base_alg;
														
 
															+	INIT_LIST_HEAD(&ccp_alg->entry);
														
 
															+
														
 
															+	strncpy(ccp_alg->child_alg, def->name, CRYPTO_MAX_ALG_NAME);
														
 
															+
														
 
															+	alg = &ccp_alg->alg;
														
 
															+	alg->setkey = ccp_sha_setkey;
														
 
															+
														
 
															+	halg = &alg->halg;
														
 
															+
														
 
															+	base = &halg->base;
														
 
															+	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", def->name);
														
 
															+	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "hmac-%s",
														
 
															+		 def->drv_name);
														
 
															+	base->cra_init = ccp_hmac_sha_cra_init;
														
 
															+	base->cra_exit = ccp_hmac_sha_cra_exit;
														
 
															+
														
 
															+	ret = crypto_register_ahash(alg);
														
 
															+	if (ret) {
														
 
															+		pr_err("%s ahash algorithm registration error (%d)\n",
														
 
															+			base->cra_name, ret);
														
 
															+		kfree(ccp_alg);
														
 
															+		return ret;
														
 
															+	}
														
 
															+
														
 
															+	list_add(&ccp_alg->entry, head);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_register_sha_alg(struct list_head *head,
														
 
															+				const struct ccp_sha_def *def)
														
 
															+{
														
 
															+	struct ccp_crypto_ahash_alg *ccp_alg;
														
 
															+	struct ahash_alg *alg;
														
 
															+	struct hash_alg_common *halg;
														
 
															+	struct crypto_alg *base;
														
 
															+	int ret;
														
 
															+
														
 
															+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
														
 
															+	if (!ccp_alg)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+	INIT_LIST_HEAD(&ccp_alg->entry);
														
 
															+
														
 
															+	ccp_alg->init = def->init;
														
 
															+	ccp_alg->type = def->type;
														
 
															+
														
 
															+	alg = &ccp_alg->alg;
														
 
															+	alg->init = ccp_sha_init;
														
 
															+	alg->update = ccp_sha_update;
														
 
															+	alg->final = ccp_sha_final;
														
 
															+	alg->finup = ccp_sha_finup;
														
 
															+	alg->digest = ccp_sha_digest;
														
 
															+
														
 
															+	halg = &alg->halg;
														
 
															+	halg->digestsize = def->digest_size;
														
 
															+
														
 
															+	base = &halg->base;
														
 
															+	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
														
 
															+	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
														
 
															+		 def->drv_name);
														
 
															+	base->cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC |
														
 
															+			  CRYPTO_ALG_KERN_DRIVER_ONLY |
														
 
															+			  CRYPTO_ALG_NEED_FALLBACK;
														
 
															+	base->cra_blocksize = def->block_size;
														
 
															+	base->cra_ctxsize = sizeof(struct ccp_ctx);
														
 
															+	base->cra_priority = CCP_CRA_PRIORITY;
														
 
															+	base->cra_type = &crypto_ahash_type;
														
 
															+	base->cra_init = ccp_sha_cra_init;
														
 
															+	base->cra_exit = ccp_sha_cra_exit;
														
 
															+	base->cra_module = THIS_MODULE;
														
 
															+
														
 
															+	ret = crypto_register_ahash(alg);
														
 
															+	if (ret) {
														
 
															+		pr_err("%s ahash algorithm registration error (%d)\n",
														
 
															+			base->cra_name, ret);
														
 
															+		kfree(ccp_alg);
														
 
															+		return ret;
														
 
															+	}
														
 
															+
														
 
															+	list_add(&ccp_alg->entry, head);
														
 
															+
														
 
															+	ret = ccp_register_hmac_alg(head, def, ccp_alg);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+int ccp_register_sha_algs(struct list_head *head)
														
 
															+{
														
 
															+	int i, ret;
														
 
															+
														
 
															+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++) {
														
 
															+		ret = ccp_register_sha_alg(head, &sha_algs[i]);
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -0,0 +1,197 @@
 
															+/*
														
 
															+ * AMD Cryptographic Coprocessor (CCP) crypto API support
														
 
															+ *
														
 
															+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
														
 
															+ *
														
 
															+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __CCP_CRYPTO_H__
														
 
															+#define __CCP_CRYPTO_H__
														
 
															+
														
 
															+
														
 
															+#include <linux/list.h>
														
 
															+#include <linux/wait.h>
														
 
															+#include <linux/pci.h>
														
 
															+#include <linux/ccp.h>
														
 
															+#include <linux/crypto.h>
														
 
															+#include <crypto/algapi.h>
														
 
															+#include <crypto/aes.h>
														
 
															+#include <crypto/ctr.h>
														
 
															+#include <crypto/hash.h>
														
 
															+#include <crypto/sha.h>
														
 
															+
														
 
															+
														
 
															+#define CCP_CRA_PRIORITY	300
														
 
															+
														
 
															+struct ccp_crypto_ablkcipher_alg {
														
 
															+	struct list_head entry;
														
 
															+
														
 
															+	u32 mode;
														
 
															+
														
 
															+	struct crypto_alg alg;
														
 
															+};
														
 
															+
														
 
															+struct ccp_crypto_ahash_alg {
														
 
															+	struct list_head entry;
														
 
															+
														
 
															+	const __be32 *init;
														
 
															+	u32 type;
														
 
															+	u32 mode;
														
 
															+
														
 
															+	/* Child algorithm used for HMAC, CMAC, etc */
														
 
															+	char child_alg[CRYPTO_MAX_ALG_NAME];
														
 
															+
														
 
															+	struct ahash_alg alg;
														
 
															+};
														
 
															+
														
 
															+static inline struct ccp_crypto_ablkcipher_alg *
														
 
															+	ccp_crypto_ablkcipher_alg(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+	struct crypto_alg *alg = tfm->__crt_alg;
														
 
															+
														
 
															+	return container_of(alg, struct ccp_crypto_ablkcipher_alg, alg);
														
 
															+}
														
 
															+
														
 
															+static inline struct ccp_crypto_ahash_alg *
														
 
															+	ccp_crypto_ahash_alg(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+	struct crypto_alg *alg = tfm->__crt_alg;
														
 
															+	struct ahash_alg *ahash_alg;
														
 
															+
														
 
															+	ahash_alg = container_of(alg, struct ahash_alg, halg.base);
														
 
															+
														
 
															+	return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg);
														
 
															+}
														
 
															+
														
 
															+
														
 
															+/***** AES related defines *****/
														
 
															+struct ccp_aes_ctx {
														
 
															+	/* Fallback cipher for XTS with unsupported unit sizes */
														
 
															+	struct crypto_ablkcipher *tfm_ablkcipher;
														
 
															+
														
 
															+	/* Cipher used to generate CMAC K1/K2 keys */
														
 
															+	struct crypto_cipher *tfm_cipher;
														
 
															+
														
 
															+	enum ccp_engine engine;
														
 
															+	enum ccp_aes_type type;
														
 
															+	enum ccp_aes_mode mode;
														
 
															+
														
 
															+	struct scatterlist key_sg;
														
 
															+	unsigned int key_len;
														
 
															+	u8 key[AES_MAX_KEY_SIZE];
														
 
															+
														
 
															+	u8 nonce[CTR_RFC3686_NONCE_SIZE];
														
 
															+
														
 
															+	/* CMAC key structures */
														
 
															+	struct scatterlist k1_sg;
														
 
															+	struct scatterlist k2_sg;
														
 
															+	unsigned int kn_len;
														
 
															+	u8 k1[AES_BLOCK_SIZE];
														
 
															+	u8 k2[AES_BLOCK_SIZE];
														
 
															+};
														
 
															+
														
 
															+struct ccp_aes_req_ctx {
														
 
															+	struct scatterlist iv_sg;
														
 
															+	u8 iv[AES_BLOCK_SIZE];
														
 
															+
														
 
															+	/* Fields used for RFC3686 requests */
														
 
															+	u8 *rfc3686_info;
														
 
															+	u8 rfc3686_iv[AES_BLOCK_SIZE];
														
 
															+
														
 
															+	struct ccp_cmd cmd;
														
 
															+};
														
 
															+
														
 
															+struct ccp_aes_cmac_req_ctx {
														
 
															+	unsigned int null_msg;
														
 
															+	unsigned int final;
														
 
															+
														
 
															+	struct scatterlist *src;
														
 
															+	unsigned int nbytes;
														
 
															+
														
 
															+	u64 hash_cnt;
														
 
															+	unsigned int hash_rem;
														
 
															+
														
 
															+	struct sg_table data_sg;
														
 
															+
														
 
															+	struct scatterlist iv_sg;
														
 
															+	u8 iv[AES_BLOCK_SIZE];
														
 
															+
														
 
															+	struct scatterlist buf_sg;
														
 
															+	unsigned int buf_count;
														
 
															+	u8 buf[AES_BLOCK_SIZE];
														
 
															+
														
 
															+	struct scatterlist pad_sg;
														
 
															+	unsigned int pad_count;
														
 
															+	u8 pad[AES_BLOCK_SIZE];
														
 
															+
														
 
															+	struct ccp_cmd cmd;
														
 
															+};
														
 
															+
														
 
															+/***** SHA related defines *****/
														
 
															+#define MAX_SHA_CONTEXT_SIZE	SHA256_DIGEST_SIZE
														
 
															+#define MAX_SHA_BLOCK_SIZE	SHA256_BLOCK_SIZE
														
 
															+
														
 
															+struct ccp_sha_ctx {
														
 
															+	unsigned int key_len;
														
 
															+	u8 key[MAX_SHA_BLOCK_SIZE];
														
 
															+	u8 ipad[MAX_SHA_BLOCK_SIZE];
														
 
															+	u8 opad[MAX_SHA_BLOCK_SIZE];
														
 
															+	struct crypto_ahash *hmac_tfm;
														
 
															+};
														
 
															+
														
 
															+struct ccp_sha_req_ctx {
														
 
															+	enum ccp_sha_type type;
														
 
															+
														
 
															+	u64 msg_bits;
														
 
															+
														
 
															+	unsigned int first;
														
 
															+	unsigned int final;
														
 
															+
														
 
															+	struct scatterlist *src;
														
 
															+	unsigned int nbytes;
														
 
															+
														
 
															+	u64 hash_cnt;
														
 
															+	unsigned int hash_rem;
														
 
															+
														
 
															+	struct sg_table data_sg;
														
 
															+
														
 
															+	struct scatterlist ctx_sg;
														
 
															+	u8 ctx[MAX_SHA_CONTEXT_SIZE];
														
 
															+
														
 
															+	struct scatterlist buf_sg;
														
 
															+	unsigned int buf_count;
														
 
															+	u8 buf[MAX_SHA_BLOCK_SIZE];
														
 
															+
														
 
															+	/* HMAC support field */
														
 
															+	struct scatterlist pad_sg;
														
 
															+
														
 
															+	/* CCP driver command */
														
 
															+	struct ccp_cmd cmd;
														
 
															+};
														
 
															+
														
 
															+/***** Common Context Structure *****/
														
 
															+struct ccp_ctx {
														
 
															+	int (*complete)(struct crypto_async_request *req, int ret);
														
 
															+
														
 
															+	union {
														
 
															+		struct ccp_aes_ctx aes;
														
 
															+		struct ccp_sha_ctx sha;
														
 
															+	} u;
														
 
															+};
														
 
															+
														
 
															+int ccp_crypto_enqueue_request(struct crypto_async_request *req,
														
 
															+			       struct ccp_cmd *cmd);
														
 
															+struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table,
														
 
															+					    struct scatterlist *sg_add);
														
 
															+
														
 
															+int ccp_register_aes_algs(struct list_head *head);
														
 
															+int ccp_register_aes_cmac_algs(struct list_head *head);
														
 
															+int ccp_register_aes_xts_algs(struct list_head *head);
														
 
															+int ccp_register_sha_algs(struct list_head *head);
														
 
															+
														
 
															+#endif
														
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -0,0 +1,595 @@
 
															+/*
														
 
															+ * AMD Cryptographic Coprocessor (CCP) driver
														
 
															+ *
														
 
															+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
														
 
															+ *
														
 
															+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/kthread.h>
														
 
															+#include <linux/sched.h>
														
 
															+#include <linux/interrupt.h>
														
 
															+#include <linux/spinlock.h>
														
 
															+#include <linux/mutex.h>
														
 
															+#include <linux/delay.h>
														
 
															+#include <linux/hw_random.h>
														
 
															+#include <linux/cpu.h>
														
 
															+#include <asm/cpu_device_id.h>
														
 
															+#include <linux/ccp.h>
														
 
															+
														
 
															+#include "ccp-dev.h"
														
 
															+
														
 
															+MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
														
 
															+MODULE_LICENSE("GPL");
														
 
															+MODULE_VERSION("1.0.0");
														
 
															+MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver");
														
 
															+
														
 
															+
														
 
															+static struct ccp_device *ccp_dev;
														
 
															+static inline struct ccp_device *ccp_get_device(void)
														
 
															+{
														
 
															+	return ccp_dev;
														
 
															+}
														
 
															+
														
 
															+static inline void ccp_add_device(struct ccp_device *ccp)
														
 
															+{
														
 
															+	ccp_dev = ccp;
														
 
															+}
														
 
															+
														
 
															+static inline void ccp_del_device(struct ccp_device *ccp)
														
 
															+{
														
 
															+	ccp_dev = NULL;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * ccp_enqueue_cmd - queue an operation for processing by the CCP
														
 
															+ *
														
 
															+ * @cmd: ccp_cmd struct to be processed
														
 
															+ *
														
 
															+ * Queue a cmd to be processed by the CCP. If queueing the cmd
														
 
															+ * would exceed the defined length of the cmd queue the cmd will
														
 
															+ * only be queued if the CCP_CMD_MAY_BACKLOG flag is set and will
														
 
															+ * result in a return code of -EBUSY.
														
 
															+ *
														
 
															+ * The callback routine specified in the ccp_cmd struct will be
														
 
															+ * called to notify the caller of completion (if the cmd was not
														
 
															+ * backlogged) or advancement out of the backlog. If the cmd has
														
 
															+ * advanced out of the backlog the "err" value of the callback
														
 
															+ * will be -EINPROGRESS. Any other "err" value during callback is
														
 
															+ * the result of the operation.
														
 
															+ *
														
 
															+ * The cmd has been successfully queued if:
														
 
															+ *   the return code is -EINPROGRESS or
														
 
															+ *   the return code is -EBUSY and CCP_CMD_MAY_BACKLOG flag is set
														
 
															+ */
														
 
															+int ccp_enqueue_cmd(struct ccp_cmd *cmd)
														
 
															+{
														
 
															+	struct ccp_device *ccp = ccp_get_device();
														
 
															+	unsigned long flags;
														
 
															+	unsigned int i;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!ccp)
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	/* Caller must supply a callback routine */
														
 
															+	if (!cmd->callback)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	cmd->ccp = ccp;
														
 
															+
														
 
															+	spin_lock_irqsave(&ccp->cmd_lock, flags);
														
 
															+
														
 
															+	i = ccp->cmd_q_count;
														
 
															+
														
 
															+	if (ccp->cmd_count >= MAX_CMD_QLEN) {
														
 
															+		ret = -EBUSY;
														
 
															+		if (cmd->flags & CCP_CMD_MAY_BACKLOG)
														
 
															+			list_add_tail(&cmd->entry, &ccp->backlog);
														
 
															+	} else {
														
 
															+		ret = -EINPROGRESS;
														
 
															+		ccp->cmd_count++;
														
 
															+		list_add_tail(&cmd->entry, &ccp->cmd);
														
 
															+
														
 
															+		/* Find an idle queue */
														
 
															+		if (!ccp->suspending) {
														
 
															+			for (i = 0; i < ccp->cmd_q_count; i++) {
														
 
															+				if (ccp->cmd_q[i].active)
														
 
															+					continue;
														
 
															+
														
 
															+				break;
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
														
 
															+
														
 
															+	/* If we found an idle queue, wake it up */
														
 
															+	if (i < ccp->cmd_q_count)
														
 
															+		wake_up_process(ccp->cmd_q[i].kthread);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+EXPORT_SYMBOL_GPL(ccp_enqueue_cmd);
														
 
															+
														
 
															+static void ccp_do_cmd_backlog(struct work_struct *work)
														
 
															+{
														
 
															+	struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work);
														
 
															+	struct ccp_device *ccp = cmd->ccp;
														
 
															+	unsigned long flags;
														
 
															+	unsigned int i;
														
 
															+
														
 
															+	cmd->callback(cmd->data, -EINPROGRESS);
														
 
															+
														
 
															+	spin_lock_irqsave(&ccp->cmd_lock, flags);
														
 
															+
														
 
															+	ccp->cmd_count++;
														
 
															+	list_add_tail(&cmd->entry, &ccp->cmd);
														
 
															+
														
 
															+	/* Find an idle queue */
														
 
															+	for (i = 0; i < ccp->cmd_q_count; i++) {
														
 
															+		if (ccp->cmd_q[i].active)
														
 
															+			continue;
														
 
															+
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
														
 
															+
														
 
															+	/* If we found an idle queue, wake it up */
														
 
															+	if (i < ccp->cmd_q_count)
														
 
															+		wake_up_process(ccp->cmd_q[i].kthread);
														
 
															+}
														
 
															+
														
 
															+static struct ccp_cmd *ccp_dequeue_cmd(struct ccp_cmd_queue *cmd_q)
														
 
															+{
														
 
															+	struct ccp_device *ccp = cmd_q->ccp;
														
 
															+	struct ccp_cmd *cmd = NULL;
														
 
															+	struct ccp_cmd *backlog = NULL;
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	spin_lock_irqsave(&ccp->cmd_lock, flags);
														
 
															+
														
 
															+	cmd_q->active = 0;
														
 
															+
														
 
															+	if (ccp->suspending) {
														
 
															+		cmd_q->suspended = 1;
														
 
															+
														
 
															+		spin_unlock_irqrestore(&ccp->cmd_lock, flags);
														
 
															+		wake_up_interruptible(&ccp->suspend_queue);
														
 
															+
														
 
															+		return NULL;
														
 
															+	}
														
 
															+
														
 
															+	if (ccp->cmd_count) {
														
 
															+		cmd_q->active = 1;
														
 
															+
														
 
															+		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
														
 
															+		list_del(&cmd->entry);
														
 
															+
														
 
															+		ccp->cmd_count--;
														
 
															+	}
														
 
															+
														
 
															+	if (!list_empty(&ccp->backlog)) {
														
 
															+		backlog = list_first_entry(&ccp->backlog, struct ccp_cmd,
														
 
															+					   entry);
														
 
															+		list_del(&backlog->entry);
														
 
															+	}
														
 
															+
														
 
															+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
														
 
															+
														
 
															+	if (backlog) {
														
 
															+		INIT_WORK(&backlog->work, ccp_do_cmd_backlog);
														
 
															+		schedule_work(&backlog->work);
														
 
															+	}
														
 
															+
														
 
															+	return cmd;
														
 
															+}
														
 
															+
														
 
															+static void ccp_do_cmd_complete(struct work_struct *work)
														
 
															+{
														
 
															+	struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work);
														
 
															+
														
 
															+	cmd->callback(cmd->data, cmd->ret);
														
 
															+}
														
 
															+
														
 
															+static int ccp_cmd_queue_thread(void *data)
														
 
															+{
														
 
															+	struct ccp_cmd_queue *cmd_q = (struct ccp_cmd_queue *)data;
														
 
															+	struct ccp_cmd *cmd;
														
 
															+
														
 
															+	set_current_state(TASK_INTERRUPTIBLE);
														
 
															+	while (!kthread_should_stop()) {
														
 
															+		schedule();
														
 
															+
														
 
															+		set_current_state(TASK_INTERRUPTIBLE);
														
 
															+
														
 
															+		cmd = ccp_dequeue_cmd(cmd_q);
														
 
															+		if (!cmd)
														
 
															+			continue;
														
 
															+
														
 
															+		__set_current_state(TASK_RUNNING);
														
 
															+
														
 
															+		/* Execute the command */
														
 
															+		cmd->ret = ccp_run_cmd(cmd_q, cmd);
														
 
															+
														
 
															+		/* Schedule the completion callback */
														
 
															+		INIT_WORK(&cmd->work, ccp_do_cmd_complete);
														
 
															+		schedule_work(&cmd->work);
														
 
															+	}
														
 
															+
														
 
															+	__set_current_state(TASK_RUNNING);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
														
 
															+{
														
 
															+	struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng);
														
 
															+	u32 trng_value;
														
 
															+	int len = min_t(int, sizeof(trng_value), max);
														
 
															+
														
 
															+	/*
														
 
															+	 * Locking is provided by the caller so we can update device
														
 
															+	 * hwrng-related fields safely
														
 
															+	 */
														
 
															+	trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG);
														
 
															+	if (!trng_value) {
														
 
															+		/* Zero is returned if not data is available or if a
														
 
															+		 * bad-entropy error is present. Assume an error if
														
 
															+		 * we exceed TRNG_RETRIES reads of zero.
														
 
															+		 */
														
 
															+		if (ccp->hwrng_retries++ > TRNG_RETRIES)
														
 
															+			return -EIO;
														
 
															+
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	/* Reset the counter and save the rng value */
														
 
															+	ccp->hwrng_retries = 0;
														
 
															+	memcpy(data, &trng_value, len);
														
 
															+
														
 
															+	return len;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * ccp_alloc_struct - allocate and initialize the ccp_device struct
														
 
															+ *
														
 
															+ * @dev: device struct of the CCP
														
 
															+ */
														
 
															+struct ccp_device *ccp_alloc_struct(struct device *dev)
														
 
															+{
														
 
															+	struct ccp_device *ccp;
														
 
															+
														
 
															+	ccp = kzalloc(sizeof(*ccp), GFP_KERNEL);
														
 
															+	if (ccp == NULL) {
														
 
															+		dev_err(dev, "unable to allocate device struct\n");
														
 
															+		return NULL;
														
 
															+	}
														
 
															+	ccp->dev = dev;
														
 
															+
														
 
															+	INIT_LIST_HEAD(&ccp->cmd);
														
 
															+	INIT_LIST_HEAD(&ccp->backlog);
														
 
															+
														
 
															+	spin_lock_init(&ccp->cmd_lock);
														
 
															+	mutex_init(&ccp->req_mutex);
														
 
															+	mutex_init(&ccp->ksb_mutex);
														
 
															+	ccp->ksb_count = KSB_COUNT;
														
 
															+	ccp->ksb_start = 0;
														
 
															+
														
 
															+	return ccp;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * ccp_init - initialize the CCP device
														
 
															+ *
														
 
															+ * @ccp: ccp_device struct
														
 
															+ */
														
 
															+int ccp_init(struct ccp_device *ccp)
														
 
															+{
														
 
															+	struct device *dev = ccp->dev;
														
 
															+	struct ccp_cmd_queue *cmd_q;
														
 
															+	struct dma_pool *dma_pool;
														
 
															+	char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
														
 
															+	unsigned int qmr, qim, i;
														
 
															+	int ret;
														
 
															+
														
 
															+	/* Find available queues */
														
 
															+	qim = 0;
														
 
															+	qmr = ioread32(ccp->io_regs + Q_MASK_REG);
														
 
															+	for (i = 0; i < MAX_HW_QUEUES; i++) {
														
 
															+		if (!(qmr & (1 << i)))
														
 
															+			continue;
														
 
															+
														
 
															+		/* Allocate a dma pool for this queue */
														
 
															+		snprintf(dma_pool_name, sizeof(dma_pool_name), "ccp_q%d", i);
														
 
															+		dma_pool = dma_pool_create(dma_pool_name, dev,
														
 
															+					   CCP_DMAPOOL_MAX_SIZE,
														
 
															+					   CCP_DMAPOOL_ALIGN, 0);
														
 
															+		if (!dma_pool) {
														
 
															+			dev_err(dev, "unable to allocate dma pool\n");
														
 
															+			ret = -ENOMEM;
														
 
															+			goto e_pool;
														
 
															+		}
														
 
															+
														
 
															+		cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
														
 
															+		ccp->cmd_q_count++;
														
 
															+
														
 
															+		cmd_q->ccp = ccp;
														
 
															+		cmd_q->id = i;
														
 
															+		cmd_q->dma_pool = dma_pool;
														
 
															+
														
 
															+		/* Reserve 2 KSB regions for the queue */
														
 
															+		cmd_q->ksb_key = KSB_START + ccp->ksb_start++;
														
 
															+		cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++;
														
 
															+		ccp->ksb_count -= 2;
														
 
															+
														
 
															+		/* Preset some register values and masks that are queue
														
 
															+		 * number dependent
														
 
															+		 */
														
 
															+		cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE +
														
 
															+				    (CMD_Q_STATUS_INCR * i);
														
 
															+		cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE +
														
 
															+					(CMD_Q_STATUS_INCR * i);
														
 
															+		cmd_q->int_ok = 1 << (i * 2);
														
 
															+		cmd_q->int_err = 1 << ((i * 2) + 1);
														
 
															+
														
 
															+		cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
														
 
															+
														
 
															+		init_waitqueue_head(&cmd_q->int_queue);
														
 
															+
														
 
															+		/* Build queue interrupt mask (two interrupts per queue) */
														
 
															+		qim |= cmd_q->int_ok | cmd_q->int_err;
														
 
															+
														
 
															+		dev_dbg(dev, "queue #%u available\n", i);
														
 
															+	}
														
 
															+	if (ccp->cmd_q_count == 0) {
														
 
															+		dev_notice(dev, "no command queues available\n");
														
 
															+		ret = -EIO;
														
 
															+		goto e_pool;
														
 
															+	}
														
 
															+	dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
														
 
															+
														
 
															+	/* Disable and clear interrupts until ready */
														
 
															+	iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
														
 
															+	for (i = 0; i < ccp->cmd_q_count; i++) {
														
 
															+		cmd_q = &ccp->cmd_q[i];
														
 
															+
														
 
															+		ioread32(cmd_q->reg_int_status);
														
 
															+		ioread32(cmd_q->reg_status);
														
 
															+	}
														
 
															+	iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
														
 
															+
														
 
															+	/* Request an irq */
														
 
															+	ret = ccp->get_irq(ccp);
														
 
															+	if (ret) {
														
 
															+		dev_err(dev, "unable to allocate an IRQ\n");
														
 
															+		goto e_pool;
														
 
															+	}
														
 
															+
														
 
															+	/* Initialize the queues used to wait for KSB space and suspend */
														
 
															+	init_waitqueue_head(&ccp->ksb_queue);
														
 
															+	init_waitqueue_head(&ccp->suspend_queue);
														
 
															+
														
 
															+	/* Create a kthread for each queue */
														
 
															+	for (i = 0; i < ccp->cmd_q_count; i++) {
														
 
															+		struct task_struct *kthread;
														
 
															+
														
 
															+		cmd_q = &ccp->cmd_q[i];
														
 
															+
														
 
															+		kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
														
 
															+					 "ccp-q%u", cmd_q->id);
														
 
															+		if (IS_ERR(kthread)) {
														
 
															+			dev_err(dev, "error creating queue thread (%ld)\n",
														
 
															+				PTR_ERR(kthread));
														
 
															+			ret = PTR_ERR(kthread);
														
 
															+			goto e_kthread;
														
 
															+		}
														
 
															+
														
 
															+		cmd_q->kthread = kthread;
														
 
															+		wake_up_process(kthread);
														
 
															+	}
														
 
															+
														
 
															+	/* Register the RNG */
														
 
															+	ccp->hwrng.name = "ccp-rng";
														
 
															+	ccp->hwrng.read = ccp_trng_read;
														
 
															+	ret = hwrng_register(&ccp->hwrng);
														
 
															+	if (ret) {
														
 
															+		dev_err(dev, "error registering hwrng (%d)\n", ret);
														
 
															+		goto e_kthread;
														
 
															+	}
														
 
															+
														
 
															+	/* Make the device struct available before enabling interrupts */
														
 
															+	ccp_add_device(ccp);
														
 
															+
														
 
															+	/* Enable interrupts */
														
 
															+	iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+e_kthread:
														
 
															+	for (i = 0; i < ccp->cmd_q_count; i++)
														
 
															+		if (ccp->cmd_q[i].kthread)
														
 
															+			kthread_stop(ccp->cmd_q[i].kthread);
														
 
															+
														
 
															+	ccp->free_irq(ccp);
														
 
															+
														
 
															+e_pool:
														
 
															+	for (i = 0; i < ccp->cmd_q_count; i++)
														
 
															+		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * ccp_destroy - tear down the CCP device
														
 
															+ *
														
 
															+ * @ccp: ccp_device struct
														
 
															+ */
														
 
															+void ccp_destroy(struct ccp_device *ccp)
														
 
															+{
														
 
															+	struct ccp_cmd_queue *cmd_q;
														
 
															+	struct ccp_cmd *cmd;
														
 
															+	unsigned int qim, i;
														
 
															+
														
 
															+	/* Remove general access to the device struct */
														
 
															+	ccp_del_device(ccp);
														
 
															+
														
 
															+	/* Unregister the RNG */
														
 
															+	hwrng_unregister(&ccp->hwrng);
														
 
															+
														
 
															+	/* Stop the queue kthreads */
														
 
															+	for (i = 0; i < ccp->cmd_q_count; i++)
														
 
															+		if (ccp->cmd_q[i].kthread)
														
 
															+			kthread_stop(ccp->cmd_q[i].kthread);
														
 
															+
														
 
															+	/* Build queue interrupt mask (two interrupt masks per queue) */
														
 
															+	qim = 0;
														
 
															+	for (i = 0; i < ccp->cmd_q_count; i++) {
														
 
															+		cmd_q = &ccp->cmd_q[i];
														
 
															+		qim |= cmd_q->int_ok | cmd_q->int_err;
														
 
															+	}
														
 
															+
														
 
															+	/* Disable and clear interrupts */
														
 
															+	iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
														
 
															+	for (i = 0; i < ccp->cmd_q_count; i++) {
														
 
															+		cmd_q = &ccp->cmd_q[i];
														
 
															+
														
 
															+		ioread32(cmd_q->reg_int_status);
														
 
															+		ioread32(cmd_q->reg_status);
														
 
															+	}
														
 
															+	iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
														
 
															+
														
 
															+	ccp->free_irq(ccp);
														
 
															+
														
 
															+	for (i = 0; i < ccp->cmd_q_count; i++)
														
 
															+		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
														
 
															+
														
 
															+	/* Flush the cmd and backlog queue */
														
 
															+	while (!list_empty(&ccp->cmd)) {
														
 
															+		/* Invoke the callback directly with an error code */
														
 
															+		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
														
 
															+		list_del(&cmd->entry);
														
 
															+		cmd->callback(cmd->data, -ENODEV);
														
 
															+	}
														
 
															+	while (!list_empty(&ccp->backlog)) {
														
 
															+		/* Invoke the callback directly with an error code */
														
 
															+		cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
														
 
															+		list_del(&cmd->entry);
														
 
															+		cmd->callback(cmd->data, -ENODEV);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * ccp_irq_handler - handle interrupts generated by the CCP device
														
 
															+ *
														
 
															+ * @irq: the irq associated with the interrupt
														
 
															+ * @data: the data value supplied when the irq was created
														
 
															+ */
														
 
															+irqreturn_t ccp_irq_handler(int irq, void *data)
														
 
															+{
														
 
															+	struct device *dev = data;
														
 
															+	struct ccp_device *ccp = dev_get_drvdata(dev);
														
 
															+	struct ccp_cmd_queue *cmd_q;
														
 
															+	u32 q_int, status;
														
 
															+	unsigned int i;
														
 
															+
														
 
															+	status = ioread32(ccp->io_regs + IRQ_STATUS_REG);
														
 
															+
														
 
															+	for (i = 0; i < ccp->cmd_q_count; i++) {
														
 
															+		cmd_q = &ccp->cmd_q[i];
														
 
															+
														
 
															+		q_int = status & (cmd_q->int_ok | cmd_q->int_err);
														
 
															+		if (q_int) {
														
 
															+			cmd_q->int_status = status;
														
 
															+			cmd_q->q_status = ioread32(cmd_q->reg_status);
														
 
															+			cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
														
 
															+
														
 
															+			/* On error, only save the first error value */
														
 
															+			if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error)
														
 
															+				cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
														
 
															+
														
 
															+			cmd_q->int_rcvd = 1;
														
 
															+
														
 
															+			/* Acknowledge the interrupt and wake the kthread */
														
 
															+			iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG);
														
 
															+			wake_up_interruptible(&cmd_q->int_queue);
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	return IRQ_HANDLED;
														
 
															+}
														
 
															+
														
 
															+#ifdef CONFIG_PM
														
 
															+bool ccp_queues_suspended(struct ccp_device *ccp)
														
 
															+{
														
 
															+	unsigned int suspended = 0;
														
 
															+	unsigned long flags;
														
 
															+	unsigned int i;
														
 
															+
														
 
															+	spin_lock_irqsave(&ccp->cmd_lock, flags);
														
 
															+
														
 
															+	for (i = 0; i < ccp->cmd_q_count; i++)
														
 
															+		if (ccp->cmd_q[i].suspended)
														
 
															+			suspended++;
														
 
															+
														
 
															+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
														
 
															+
														
 
															+	return ccp->cmd_q_count == suspended;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+static const struct x86_cpu_id ccp_support[] = {
														
 
															+	{ X86_VENDOR_AMD, 22, },
														
 
															+};
														
 
															+
														
 
															+static int __init ccp_mod_init(void)
														
 
															+{
														
 
															+	struct cpuinfo_x86 *cpuinfo = &boot_cpu_data;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!x86_match_cpu(ccp_support))
														
 
															+		return -ENODEV;
														
 
															+
														
 
															+	switch (cpuinfo->x86) {
														
 
															+	case 22:
														
 
															+		if ((cpuinfo->x86_model < 48) || (cpuinfo->x86_model > 63))
														
 
															+			return -ENODEV;
														
 
															+
														
 
															+		ret = ccp_pci_init();
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+
														
 
															+		/* Don't leave the driver loaded if init failed */
														
 
															+		if (!ccp_get_device()) {
														
 
															+			ccp_pci_exit();
														
 
															+			return -ENODEV;
														
 
															+		}
														
 
															+
														
 
															+		return 0;
														
 
															+
														
 
															+		break;
														
 
															+	}
														
 
															+
														
 
															+	return -ENODEV;
														
 
															+}
														
 
															+
														
 
															+static void __exit ccp_mod_exit(void)
														
 
															+{
														
 
															+	struct cpuinfo_x86 *cpuinfo = &boot_cpu_data;
														
 
															+
														
 
															+	switch (cpuinfo->x86) {
														
 
															+	case 22:
														
 
															+		ccp_pci_exit();
														
 
															+		break;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+module_init(ccp_mod_init);
														
 
															+module_exit(ccp_mod_exit);
														
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -0,0 +1,272 @@
 
															+/*
														
 
															+ * AMD Cryptographic Coprocessor (CCP) driver
														
 
															+ *
														
 
															+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
														
 
															+ *
														
 
															+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __CCP_DEV_H__
														
 
															+#define __CCP_DEV_H__
														
 
															+
														
 
															+#include <linux/device.h>
														
 
															+#include <linux/pci.h>
														
 
															+#include <linux/spinlock.h>
														
 
															+#include <linux/mutex.h>
														
 
															+#include <linux/list.h>
														
 
															+#include <linux/wait.h>
														
 
															+#include <linux/dmapool.h>
														
 
															+#include <linux/hw_random.h>
														
 
															+
														
 
															+
														
 
															+#define IO_OFFSET			0x20000
														
 
															+
														
 
															+#define MAX_DMAPOOL_NAME_LEN		32
														
 
															+
														
 
															+#define MAX_HW_QUEUES			5
														
 
															+#define MAX_CMD_QLEN			100
														
 
															+
														
 
															+#define TRNG_RETRIES			10
														
 
															+
														
 
															+
														
 
															+/****** Register Mappings ******/
														
 
															+#define Q_MASK_REG			0x000
														
 
															+#define TRNG_OUT_REG			0x00c
														
 
															+#define IRQ_MASK_REG			0x040
														
 
															+#define IRQ_STATUS_REG			0x200
														
 
															+
														
 
															+#define DEL_CMD_Q_JOB			0x124
														
 
															+#define DEL_Q_ACTIVE			0x00000200
														
 
															+#define DEL_Q_ID_SHIFT			6
														
 
															+
														
 
															+#define CMD_REQ0			0x180
														
 
															+#define CMD_REQ_INCR			0x04
														
 
															+
														
 
															+#define CMD_Q_STATUS_BASE		0x210
														
 
															+#define CMD_Q_INT_STATUS_BASE		0x214
														
 
															+#define CMD_Q_STATUS_INCR		0x20
														
 
															+
														
 
															+#define CMD_Q_CACHE			0x228
														
 
															+#define CMD_Q_CACHE_INC			0x20
														
 
															+
														
 
															+#define CMD_Q_ERROR(__qs)		((__qs) & 0x0000003f);
														
 
															+#define CMD_Q_DEPTH(__qs)		(((__qs) >> 12) & 0x0000000f);
														
 
															+
														
 
															+/****** REQ0 Related Values ******/
														
 
															+#define REQ0_WAIT_FOR_WRITE		0x00000004
														
 
															+#define REQ0_INT_ON_COMPLETE		0x00000002
														
 
															+#define REQ0_STOP_ON_COMPLETE		0x00000001
														
 
															+
														
 
															+#define REQ0_CMD_Q_SHIFT		9
														
 
															+#define REQ0_JOBID_SHIFT		3
														
 
															+
														
 
															+/****** REQ1 Related Values ******/
														
 
															+#define REQ1_PROTECT_SHIFT		27
														
 
															+#define REQ1_ENGINE_SHIFT		23
														
 
															+#define REQ1_KEY_KSB_SHIFT		2
														
 
															+
														
 
															+#define REQ1_EOM			0x00000002
														
 
															+#define REQ1_INIT			0x00000001
														
 
															+
														
 
															+/* AES Related Values */
														
 
															+#define REQ1_AES_TYPE_SHIFT		21
														
 
															+#define REQ1_AES_MODE_SHIFT		18
														
 
															+#define REQ1_AES_ACTION_SHIFT		17
														
 
															+#define REQ1_AES_CFB_SIZE_SHIFT		10
														
 
															+
														
 
															+/* XTS-AES Related Values */
														
 
															+#define REQ1_XTS_AES_SIZE_SHIFT		10
														
 
															+
														
 
															+/* SHA Related Values */
														
 
															+#define REQ1_SHA_TYPE_SHIFT		21
														
 
															+
														
 
															+/* RSA Related Values */
														
 
															+#define REQ1_RSA_MOD_SIZE_SHIFT		10
														
 
															+
														
 
															+/* Pass-Through Related Values */
														
 
															+#define REQ1_PT_BW_SHIFT		12
														
 
															+#define REQ1_PT_BS_SHIFT		10
														
 
															+
														
 
															+/* ECC Related Values */
														
 
															+#define REQ1_ECC_AFFINE_CONVERT		0x00200000
														
 
															+#define REQ1_ECC_FUNCTION_SHIFT		18
														
 
															+
														
 
															+/****** REQ4 Related Values ******/
														
 
															+#define REQ4_KSB_SHIFT			18
														
 
															+#define REQ4_MEMTYPE_SHIFT		16
														
 
															+
														
 
															+/****** REQ6 Related Values ******/
														
 
															+#define REQ6_MEMTYPE_SHIFT		16
														
 
															+
														
 
															+
														
 
															+/****** Key Storage Block ******/
														
 
															+#define KSB_START			77
														
 
															+#define KSB_END				127
														
 
															+#define KSB_COUNT			(KSB_END - KSB_START + 1)
														
 
															+#define CCP_KSB_BITS			256
														
 
															+#define CCP_KSB_BYTES			32
														
 
															+
														
 
															+#define CCP_JOBID_MASK			0x0000003f
														
 
															+
														
 
															+#define CCP_DMAPOOL_MAX_SIZE		64
														
 
															+#define CCP_DMAPOOL_ALIGN		(1 << 5)
														
 
															+
														
 
															+#define CCP_REVERSE_BUF_SIZE		64
														
 
															+
														
 
															+#define CCP_AES_KEY_KSB_COUNT		1
														
 
															+#define CCP_AES_CTX_KSB_COUNT		1
														
 
															+
														
 
															+#define CCP_XTS_AES_KEY_KSB_COUNT	1
														
 
															+#define CCP_XTS_AES_CTX_KSB_COUNT	1
														
 
															+
														
 
															+#define CCP_SHA_KSB_COUNT		1
														
 
															+
														
 
															+#define CCP_RSA_MAX_WIDTH		4096
														
 
															+
														
 
															+#define CCP_PASSTHRU_BLOCKSIZE		256
														
 
															+#define CCP_PASSTHRU_MASKSIZE		32
														
 
															+#define CCP_PASSTHRU_KSB_COUNT		1
														
 
															+
														
 
															+#define CCP_ECC_MODULUS_BYTES		48      /* 384-bits */
														
 
															+#define CCP_ECC_MAX_OPERANDS		6
														
 
															+#define CCP_ECC_MAX_OUTPUTS		3
														
 
															+#define CCP_ECC_SRC_BUF_SIZE		448
														
 
															+#define CCP_ECC_DST_BUF_SIZE		192
														
 
															+#define CCP_ECC_OPERAND_SIZE		64
														
 
															+#define CCP_ECC_OUTPUT_SIZE		64
														
 
															+#define CCP_ECC_RESULT_OFFSET		60
														
 
															+#define CCP_ECC_RESULT_SUCCESS		0x0001
														
 
															+
														
 
															+
														
 
															+struct ccp_device;
														
 
															+struct ccp_cmd;
														
 
															+
														
 
															+struct ccp_cmd_queue {
														
 
															+	struct ccp_device *ccp;
														
 
															+
														
 
															+	/* Queue identifier */
														
 
															+	u32 id;
														
 
															+
														
 
															+	/* Queue dma pool */
														
 
															+	struct dma_pool *dma_pool;
														
 
															+
														
 
															+	/* Queue reserved KSB regions */
														
 
															+	u32 ksb_key;
														
 
															+	u32 ksb_ctx;
														
 
															+
														
 
															+	/* Queue processing thread */
														
 
															+	struct task_struct *kthread;
														
 
															+	unsigned int active;
														
 
															+	unsigned int suspended;
														
 
															+
														
 
															+	/* Number of free command slots available */
														
 
															+	unsigned int free_slots;
														
 
															+
														
 
															+	/* Interrupt masks */
														
 
															+	u32 int_ok;
														
 
															+	u32 int_err;
														
 
															+
														
 
															+	/* Register addresses for queue */
														
 
															+	void __iomem *reg_status;
														
 
															+	void __iomem *reg_int_status;
														
 
															+
														
 
															+	/* Status values from job */
														
 
															+	u32 int_status;
														
 
															+	u32 q_status;
														
 
															+	u32 q_int_status;
														
 
															+	u32 cmd_error;
														
 
															+
														
 
															+	/* Interrupt wait queue */
														
 
															+	wait_queue_head_t int_queue;
														
 
															+	unsigned int int_rcvd;
														
 
															+} ____cacheline_aligned;
														
 
															+
														
 
															+struct ccp_device {
														
 
															+	struct device *dev;
														
 
															+
														
 
															+	/*
														
 
															+	 * Bus specific device information
														
 
															+	 */
														
 
															+	void *dev_specific;
														
 
															+	int (*get_irq)(struct ccp_device *ccp);
														
 
															+	void (*free_irq)(struct ccp_device *ccp);
														
 
															+
														
 
															+	/*
														
 
															+	 * I/O area used for device communication. The register mapping
														
 
															+	 * starts at an offset into the mapped bar.
														
 
															+	 *   The CMD_REQx registers and the Delete_Cmd_Queue_Job register
														
 
															+	 *   need to be protected while a command queue thread is accessing
														
 
															+	 *   them.
														
 
															+	 */
														
 
															+	struct mutex req_mutex ____cacheline_aligned;
														
 
															+	void __iomem *io_map;
														
 
															+	void __iomem *io_regs;
														
 
															+
														
 
															+	/*
														
 
															+	 * Master lists that all cmds are queued on. Because there can be
														
 
															+	 * more than one CCP command queue that can process a cmd a separate
														
 
															+	 * backlog list is neeeded so that the backlog completion call
														
 
															+	 * completes before the cmd is available for execution.
														
 
															+	 */
														
 
															+	spinlock_t cmd_lock ____cacheline_aligned;
														
 
															+	unsigned int cmd_count;
														
 
															+	struct list_head cmd;
														
 
															+	struct list_head backlog;
														
 
															+
														
 
															+	/*
														
 
															+	 * The command queues. These represent the queues available on the
														
 
															+	 * CCP that are available for processing cmds
														
 
															+	 */
														
 
															+	struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES];
														
 
															+	unsigned int cmd_q_count;
														
 
															+
														
 
															+	/*
														
 
															+	 * Support for the CCP True RNG
														
 
															+	 */
														
 
															+	struct hwrng hwrng;
														
 
															+	unsigned int hwrng_retries;
														
 
															+
														
 
															+	/*
														
 
															+	 * A counter used to generate job-ids for cmds submitted to the CCP
														
 
															+	 */
														
 
															+	atomic_t current_id ____cacheline_aligned;
														
 
															+
														
 
															+	/*
														
 
															+	 * The CCP uses key storage blocks (KSB) to maintain context for certain
														
 
															+	 * operations. To prevent multiple cmds from using the same KSB range
														
 
															+	 * a command queue reserves a KSB range for the duration of the cmd.
														
 
															+	 * Each queue, will however, reserve 2 KSB blocks for operations that
														
 
															+	 * only require single KSB entries (eg. AES context/iv and key) in order
														
 
															+	 * to avoid allocation contention.  This will reserve at most 10 KSB
														
 
															+	 * entries, leaving 40 KSB entries available for dynamic allocation.
														
 
															+	 */
														
 
															+	struct mutex ksb_mutex ____cacheline_aligned;
														
 
															+	DECLARE_BITMAP(ksb, KSB_COUNT);
														
 
															+	wait_queue_head_t ksb_queue;
														
 
															+	unsigned int ksb_avail;
														
 
															+	unsigned int ksb_count;
														
 
															+	u32 ksb_start;
														
 
															+
														
 
															+	/* Suspend support */
														
 
															+	unsigned int suspending;
														
 
															+	wait_queue_head_t suspend_queue;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+int ccp_pci_init(void);
														
 
															+void ccp_pci_exit(void);
														
 
															+
														
 
															+struct ccp_device *ccp_alloc_struct(struct device *dev);
														
 
															+int ccp_init(struct ccp_device *ccp);
														
 
															+void ccp_destroy(struct ccp_device *ccp);
														
 
															+bool ccp_queues_suspended(struct ccp_device *ccp);
														
 
															+
														
 
															+irqreturn_t ccp_irq_handler(int irq, void *data);
														
 
															+
														
 
															+int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd);
														
 
															+
														
 
															+#endif
														
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -0,0 +1,2024 @@
 
															+/*
														
 
															+ * AMD Cryptographic Coprocessor (CCP) driver
														
 
															+ *
														
 
															+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
														
 
															+ *
														
 
															+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/pci.h>
														
 
															+#include <linux/pci_ids.h>
														
 
															+#include <linux/kthread.h>
														
 
															+#include <linux/sched.h>
														
 
															+#include <linux/interrupt.h>
														
 
															+#include <linux/spinlock.h>
														
 
															+#include <linux/mutex.h>
														
 
															+#include <linux/delay.h>
														
 
															+#include <linux/ccp.h>
														
 
															+#include <linux/scatterlist.h>
														
 
															+#include <crypto/scatterwalk.h>
														
 
															+
														
 
															+#include "ccp-dev.h"
														
 
															+
														
 
															+
														
 
															+enum ccp_memtype {
														
 
															+	CCP_MEMTYPE_SYSTEM = 0,
														
 
															+	CCP_MEMTYPE_KSB,
														
 
															+	CCP_MEMTYPE_LOCAL,
														
 
															+	CCP_MEMTYPE__LAST,
														
 
															+};
														
 
															+
														
 
															+struct ccp_dma_info {
														
 
															+	dma_addr_t address;
														
 
															+	unsigned int offset;
														
 
															+	unsigned int length;
														
 
															+	enum dma_data_direction dir;
														
 
															+};
														
 
															+
														
 
															+struct ccp_dm_workarea {
														
 
															+	struct device *dev;
														
 
															+	struct dma_pool *dma_pool;
														
 
															+	unsigned int length;
														
 
															+
														
 
															+	u8 *address;
														
 
															+	struct ccp_dma_info dma;
														
 
															+};
														
 
															+
														
 
															+struct ccp_sg_workarea {
														
 
															+	struct scatterlist *sg;
														
 
															+	unsigned int nents;
														
 
															+	unsigned int length;
														
 
															+
														
 
															+	struct scatterlist *dma_sg;
														
 
															+	struct device *dma_dev;
														
 
															+	unsigned int dma_count;
														
 
															+	enum dma_data_direction dma_dir;
														
 
															+
														
 
															+	unsigned int sg_used;
														
 
															+
														
 
															+	u64 bytes_left;
														
 
															+};
														
 
															+
														
 
															+struct ccp_data {
														
 
															+	struct ccp_sg_workarea sg_wa;
														
 
															+	struct ccp_dm_workarea dm_wa;
														
 
															+};
														
 
															+
														
 
															+struct ccp_mem {
														
 
															+	enum ccp_memtype type;
														
 
															+	union {
														
 
															+		struct ccp_dma_info dma;
														
 
															+		u32 ksb;
														
 
															+	} u;
														
 
															+};
														
 
															+
														
 
															+struct ccp_aes_op {
														
 
															+	enum ccp_aes_type type;
														
 
															+	enum ccp_aes_mode mode;
														
 
															+	enum ccp_aes_action action;
														
 
															+};
														
 
															+
														
 
															+struct ccp_xts_aes_op {
														
 
															+	enum ccp_aes_action action;
														
 
															+	enum ccp_xts_aes_unit_size unit_size;
														
 
															+};
														
 
															+
														
 
															+struct ccp_sha_op {
														
 
															+	enum ccp_sha_type type;
														
 
															+	u64 msg_bits;
														
 
															+};
														
 
															+
														
 
															+struct ccp_rsa_op {
														
 
															+	u32 mod_size;
														
 
															+	u32 input_len;
														
 
															+};
														
 
															+
														
 
															+struct ccp_passthru_op {
														
 
															+	enum ccp_passthru_bitwise bit_mod;
														
 
															+	enum ccp_passthru_byteswap byte_swap;
														
 
															+};
														
 
															+
														
 
															+struct ccp_ecc_op {
														
 
															+	enum ccp_ecc_function function;
														
 
															+};
														
 
															+
														
 
															+struct ccp_op {
														
 
															+	struct ccp_cmd_queue *cmd_q;
														
 
															+
														
 
															+	u32 jobid;
														
 
															+	u32 ioc;
														
 
															+	u32 soc;
														
 
															+	u32 ksb_key;
														
 
															+	u32 ksb_ctx;
														
 
															+	u32 init;
														
 
															+	u32 eom;
														
 
															+
														
 
															+	struct ccp_mem src;
														
 
															+	struct ccp_mem dst;
														
 
															+
														
 
															+	union {
														
 
															+		struct ccp_aes_op aes;
														
 
															+		struct ccp_xts_aes_op xts;
														
 
															+		struct ccp_sha_op sha;
														
 
															+		struct ccp_rsa_op rsa;
														
 
															+		struct ccp_passthru_op passthru;
														
 
															+		struct ccp_ecc_op ecc;
														
 
															+	} u;
														
 
															+};
														
 
															+
														
 
															+/* The CCP cannot perform zero-length sha operations so the caller
														
 
															+ * is required to buffer data for the final operation.  However, a
														
 
															+ * sha operation for a message with a total length of zero is valid
														
 
															+ * so known values are required to supply the result.
														
 
															+ */
														
 
															+static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
														
 
															+	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
														
 
															+	0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
														
 
															+	0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
														
 
															+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
														
 
															+};
														
 
															+
														
 
															+static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
														
 
															+	0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
														
 
															+	0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
														
 
															+	0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
														
 
															+	0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
														
 
															+};
														
 
															+
														
 
															+static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
														
 
															+	0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
														
 
															+	0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
														
 
															+	0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
														
 
															+	0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
														
 
															+};
														
 
															+
														
 
															+static u32 ccp_addr_lo(struct ccp_dma_info *info)
														
 
															+{
														
 
															+	return lower_32_bits(info->address + info->offset);
														
 
															+}
														
 
															+
														
 
															+static u32 ccp_addr_hi(struct ccp_dma_info *info)
														
 
															+{
														
 
															+	return upper_32_bits(info->address + info->offset) & 0x0000ffff;
														
 
															+}
														
 
															+
														
 
															+static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
														
 
															+{
														
 
															+	struct ccp_cmd_queue *cmd_q = op->cmd_q;
														
 
															+	struct ccp_device *ccp = cmd_q->ccp;
														
 
															+	void __iomem *cr_addr;
														
 
															+	u32 cr0, cmd;
														
 
															+	unsigned int i;
														
 
															+	int ret = 0;
														
 
															+
														
 
															+	/* We could read a status register to see how many free slots
														
 
															+	 * are actually available, but reading that register resets it
														
 
															+	 * and you could lose some error information.
														
 
															+	 */
														
 
															+	cmd_q->free_slots--;
														
 
															+
														
 
															+	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
														
 
															+	      | (op->jobid << REQ0_JOBID_SHIFT)
														
 
															+	      | REQ0_WAIT_FOR_WRITE;
														
 
															+
														
 
															+	if (op->soc)
														
 
															+		cr0 |= REQ0_STOP_ON_COMPLETE
														
 
															+		       | REQ0_INT_ON_COMPLETE;
														
 
															+
														
 
															+	if (op->ioc || !cmd_q->free_slots)
														
 
															+		cr0 |= REQ0_INT_ON_COMPLETE;
														
 
															+
														
 
															+	/* Start at CMD_REQ1 */
														
 
															+	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
														
 
															+
														
 
															+	mutex_lock(&ccp->req_mutex);
														
 
															+
														
 
															+	/* Write CMD_REQ1 through CMD_REQx first */
														
 
															+	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
														
 
															+		iowrite32(*(cr + i), cr_addr);
														
 
															+
														
 
															+	/* Tell the CCP to start */
														
 
															+	wmb();
														
 
															+	iowrite32(cr0, ccp->io_regs + CMD_REQ0);
														
 
															+
														
 
															+	mutex_unlock(&ccp->req_mutex);
														
 
															+
														
 
															+	if (cr0 & REQ0_INT_ON_COMPLETE) {
														
 
															+		/* Wait for the job to complete */
														
 
															+		ret = wait_event_interruptible(cmd_q->int_queue,
														
 
															+					       cmd_q->int_rcvd);
														
 
															+		if (ret || cmd_q->cmd_error) {
														
 
															+			/* On error delete all related jobs from the queue */
														
 
															+			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
														
 
															+			      | op->jobid;
														
 
															+
														
 
															+			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
														
 
															+
														
 
															+			if (!ret)
														
 
															+				ret = -EIO;
														
 
															+		} else if (op->soc) {
														
 
															+			/* Delete just head job from the queue on SoC */
														
 
															+			cmd = DEL_Q_ACTIVE
														
 
															+			      | (cmd_q->id << DEL_Q_ID_SHIFT)
														
 
															+			      | op->jobid;
														
 
															+
														
 
															+			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
														
 
															+		}
														
 
															+
														
 
															+		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
														
 
															+
														
 
															+		cmd_q->int_rcvd = 0;
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_perform_aes(struct ccp_op *op)
														
 
															+{
														
 
															+	u32 cr[6];
														
 
															+
														
 
															+	/* Fill out the register contents for REQ1 through REQ6 */
														
 
															+	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
														
 
															+		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
														
 
															+		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
														
 
															+		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
														
 
															+		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
														
 
															+	cr[1] = op->src.u.dma.length - 1;
														
 
															+	cr[2] = ccp_addr_lo(&op->src.u.dma);
														
 
															+	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
														
 
															+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
														
 
															+		| ccp_addr_hi(&op->src.u.dma);
														
 
															+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
														
 
															+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
														
 
															+		| ccp_addr_hi(&op->dst.u.dma);
														
 
															+
														
 
															+	if (op->u.aes.mode == CCP_AES_MODE_CFB)
														
 
															+		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
														
 
															+
														
 
															+	if (op->eom)
														
 
															+		cr[0] |= REQ1_EOM;
														
 
															+
														
 
															+	if (op->init)
														
 
															+		cr[0] |= REQ1_INIT;
														
 
															+
														
 
															+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
														
 
															+}
														
 
															+
														
 
															+static int ccp_perform_xts_aes(struct ccp_op *op)
														
 
															+{
														
 
															+	u32 cr[6];
														
 
															+
														
 
															+	/* Fill out the register contents for REQ1 through REQ6 */
														
 
															+	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
														
 
															+		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
														
 
															+		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
														
 
															+		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
														
 
															+	cr[1] = op->src.u.dma.length - 1;
														
 
															+	cr[2] = ccp_addr_lo(&op->src.u.dma);
														
 
															+	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
														
 
															+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
														
 
															+		| ccp_addr_hi(&op->src.u.dma);
														
 
															+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
														
 
															+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
														
 
															+		| ccp_addr_hi(&op->dst.u.dma);
														
 
															+
														
 
															+	if (op->eom)
														
 
															+		cr[0] |= REQ1_EOM;
														
 
															+
														
 
															+	if (op->init)
														
 
															+		cr[0] |= REQ1_INIT;
														
 
															+
														
 
															+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
														
 
															+}
														
 
															+
														
 
															+static int ccp_perform_sha(struct ccp_op *op)
														
 
															+{
														
 
															+	u32 cr[6];
														
 
															+
														
 
															+	/* Fill out the register contents for REQ1 through REQ6 */
														
 
															+	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
														
 
															+		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
														
 
															+		| REQ1_INIT;
														
 
															+	cr[1] = op->src.u.dma.length - 1;
														
 
															+	cr[2] = ccp_addr_lo(&op->src.u.dma);
														
 
															+	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
														
 
															+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
														
 
															+		| ccp_addr_hi(&op->src.u.dma);
														
 
															+
														
 
															+	if (op->eom) {
														
 
															+		cr[0] |= REQ1_EOM;
														
 
															+		cr[4] = lower_32_bits(op->u.sha.msg_bits);
														
 
															+		cr[5] = upper_32_bits(op->u.sha.msg_bits);
														
 
															+	} else {
														
 
															+		cr[4] = 0;
														
 
															+		cr[5] = 0;
														
 
															+	}
														
 
															+
														
 
															+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
														
 
															+}
														
 
															+
														
 
															+static int ccp_perform_rsa(struct ccp_op *op)
														
 
															+{
														
 
															+	u32 cr[6];
														
 
															+
														
 
															+	/* Fill out the register contents for REQ1 through REQ6 */
														
 
															+	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
														
 
															+		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
														
 
															+		| (op->ksb_key << REQ1_KEY_KSB_SHIFT)
														
 
															+		| REQ1_EOM;
														
 
															+	cr[1] = op->u.rsa.input_len - 1;
														
 
															+	cr[2] = ccp_addr_lo(&op->src.u.dma);
														
 
															+	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
														
 
															+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
														
 
															+		| ccp_addr_hi(&op->src.u.dma);
														
 
															+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
														
 
															+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
														
 
															+		| ccp_addr_hi(&op->dst.u.dma);
														
 
															+
														
 
															+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
														
 
															+}
														
 
															+
														
 
															+static int ccp_perform_passthru(struct ccp_op *op)
														
 
															+{
														
 
															+	u32 cr[6];
														
 
															+
														
 
															+	/* Fill out the register contents for REQ1 through REQ6 */
														
 
															+	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
														
 
															+		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
														
 
															+		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
														
 
															+
														
 
															+	if (op->src.type == CCP_MEMTYPE_SYSTEM)
														
 
															+		cr[1] = op->src.u.dma.length - 1;
														
 
															+	else
														
 
															+		cr[1] = op->dst.u.dma.length - 1;
														
 
															+
														
 
															+	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
														
 
															+		cr[2] = ccp_addr_lo(&op->src.u.dma);
														
 
															+		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
														
 
															+			| ccp_addr_hi(&op->src.u.dma);
														
 
															+
														
 
															+		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
														
 
															+			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
														
 
															+	} else {
														
 
															+		cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
														
 
															+		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
														
 
															+	}
														
 
															+
														
 
															+	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
														
 
															+		cr[4] = ccp_addr_lo(&op->dst.u.dma);
														
 
															+		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
														
 
															+			| ccp_addr_hi(&op->dst.u.dma);
														
 
															+	} else {
														
 
															+		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
														
 
															+		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
														
 
															+	}
														
 
															+
														
 
															+	if (op->eom)
														
 
															+		cr[0] |= REQ1_EOM;
														
 
															+
														
 
															+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
														
 
															+}
														
 
															+
														
 
															+static int ccp_perform_ecc(struct ccp_op *op)
														
 
															+{
														
 
															+	u32 cr[6];
														
 
															+
														
 
															+	/* Fill out the register contents for REQ1 through REQ6 */
														
 
															+	cr[0] = REQ1_ECC_AFFINE_CONVERT
														
 
															+		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
														
 
															+		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
														
 
															+		| REQ1_EOM;
														
 
															+	cr[1] = op->src.u.dma.length - 1;
														
 
															+	cr[2] = ccp_addr_lo(&op->src.u.dma);
														
 
															+	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
														
 
															+		| ccp_addr_hi(&op->src.u.dma);
														
 
															+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
														
 
															+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
														
 
															+		| ccp_addr_hi(&op->dst.u.dma);
														
 
															+
														
 
															+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
														
 
															+}
														
 
															+
														
 
															+static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
														
 
															+{
														
 
															+	int start;
														
 
															+
														
 
															+	for (;;) {
														
 
															+		mutex_lock(&ccp->ksb_mutex);
														
 
															+
														
 
															+		start = (u32)bitmap_find_next_zero_area(ccp->ksb,
														
 
															+							ccp->ksb_count,
														
 
															+							ccp->ksb_start,
														
 
															+							count, 0);
														
 
															+		if (start <= ccp->ksb_count) {
														
 
															+			bitmap_set(ccp->ksb, start, count);
														
 
															+
														
 
															+			mutex_unlock(&ccp->ksb_mutex);
														
 
															+			break;
														
 
															+		}
														
 
															+
														
 
															+		ccp->ksb_avail = 0;
														
 
															+
														
 
															+		mutex_unlock(&ccp->ksb_mutex);
														
 
															+
														
 
															+		/* Wait for KSB entries to become available */
														
 
															+		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
														
 
															+			return 0;
														
 
															+	}
														
 
															+
														
 
															+	return KSB_START + start;
														
 
															+}
														
 
															+
														
 
															+static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
														
 
															+			 unsigned int count)
														
 
															+{
														
 
															+	if (!start)
														
 
															+		return;
														
 
															+
														
 
															+	mutex_lock(&ccp->ksb_mutex);
														
 
															+
														
 
															+	bitmap_clear(ccp->ksb, start - KSB_START, count);
														
 
															+
														
 
															+	ccp->ksb_avail = 1;
														
 
															+
														
 
															+	mutex_unlock(&ccp->ksb_mutex);
														
 
															+
														
 
															+	wake_up_interruptible_all(&ccp->ksb_queue);
														
 
															+}
														
 
															+
														
 
															+static u32 ccp_gen_jobid(struct ccp_device *ccp)
														
 
															+{
														
 
															+	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
														
 
															+}
														
 
															+
														
 
															+static void ccp_sg_free(struct ccp_sg_workarea *wa)
														
 
															+{
														
 
															+	if (wa->dma_count)
														
 
															+		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
														
 
															+
														
 
															+	wa->dma_count = 0;
														
 
															+}
														
 
															+
														
 
															+static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
														
 
															+				struct scatterlist *sg, u64 len,
														
 
															+				enum dma_data_direction dma_dir)
														
 
															+{
														
 
															+	memset(wa, 0, sizeof(*wa));
														
 
															+
														
 
															+	wa->sg = sg;
														
 
															+	if (!sg)
														
 
															+		return 0;
														
 
															+
														
 
															+	wa->nents = sg_nents(sg);
														
 
															+	wa->length = sg->length;
														
 
															+	wa->bytes_left = len;
														
 
															+	wa->sg_used = 0;
														
 
															+
														
 
															+	if (len == 0)
														
 
															+		return 0;
														
 
															+
														
 
															+	if (dma_dir == DMA_NONE)
														
 
															+		return 0;
														
 
															+
														
 
															+	wa->dma_sg = sg;
														
 
															+	wa->dma_dev = dev;
														
 
															+	wa->dma_dir = dma_dir;
														
 
															+	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
														
 
															+	if (!wa->dma_count)
														
 
															+		return -ENOMEM;
														
 
															+
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
														
 
															+{
														
 
															+	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
														
 
															+
														
 
															+	if (!wa->sg)
														
 
															+		return;
														
 
															+
														
 
															+	wa->sg_used += nbytes;
														
 
															+	wa->bytes_left -= nbytes;
														
 
															+	if (wa->sg_used == wa->sg->length) {
														
 
															+		wa->sg = sg_next(wa->sg);
														
 
															+		wa->sg_used = 0;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void ccp_dm_free(struct ccp_dm_workarea *wa)
														
 
															+{
														
 
															+	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
														
 
															+		if (wa->address)
														
 
															+			dma_pool_free(wa->dma_pool, wa->address,
														
 
															+				      wa->dma.address);
														
 
															+	} else {
														
 
															+		if (wa->dma.address)
														
 
															+			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
														
 
															+					 wa->dma.dir);
														
 
															+		kfree(wa->address);
														
 
															+	}
														
 
															+
														
 
															+	wa->address = NULL;
														
 
															+	wa->dma.address = 0;
														
 
															+}
														
 
															+
														
 
															+static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
														
 
															+				struct ccp_cmd_queue *cmd_q,
														
 
															+				unsigned int len,
														
 
															+				enum dma_data_direction dir)
														
 
															+{
														
 
															+	memset(wa, 0, sizeof(*wa));
														
 
															+
														
 
															+	if (!len)
														
 
															+		return 0;
														
 
															+
														
 
															+	wa->dev = cmd_q->ccp->dev;
														
 
															+	wa->length = len;
														
 
															+
														
 
															+	if (len <= CCP_DMAPOOL_MAX_SIZE) {
														
 
															+		wa->dma_pool = cmd_q->dma_pool;
														
 
															+
														
 
															+		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
														
 
															+					     &wa->dma.address);
														
 
															+		if (!wa->address)
														
 
															+			return -ENOMEM;
														
 
															+
														
 
															+		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
														
 
															+
														
 
															+		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
														
 
															+	} else {
														
 
															+		wa->address = kzalloc(len, GFP_KERNEL);
														
 
															+		if (!wa->address)
														
 
															+			return -ENOMEM;
														
 
															+
														
 
															+		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
														
 
															+						 dir);
														
 
															+		if (!wa->dma.address)
														
 
															+			return -ENOMEM;
														
 
															+
														
 
															+		wa->dma.length = len;
														
 
															+	}
														
 
															+	wa->dma.dir = dir;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
														
 
															+			    struct scatterlist *sg, unsigned int sg_offset,
														
 
															+			    unsigned int len)
														
 
															+{
														
 
															+	WARN_ON(!wa->address);
														
 
															+
														
 
															+	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
														
 
															+				 0);
														
 
															+}
														
 
															+
														
 
															+static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
														
 
															+			    struct scatterlist *sg, unsigned int sg_offset,
														
 
															+			    unsigned int len)
														
 
															+{
														
 
															+	WARN_ON(!wa->address);
														
 
															+
														
 
															+	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
														
 
															+				 1);
														
 
															+}
														
 
															+
														
 
															+static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
														
 
															+				    struct scatterlist *sg,
														
 
															+				    unsigned int len, unsigned int se_len,
														
 
															+				    bool sign_extend)
														
 
															+{
														
 
															+	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
														
 
															+	u8 buffer[CCP_REVERSE_BUF_SIZE];
														
 
															+
														
 
															+	BUG_ON(se_len > sizeof(buffer));
														
 
															+
														
 
															+	sg_offset = len;
														
 
															+	dm_offset = 0;
														
 
															+	nbytes = len;
														
 
															+	while (nbytes) {
														
 
															+		ksb_len = min_t(unsigned int, nbytes, se_len);
														
 
															+		sg_offset -= ksb_len;
														
 
															+
														
 
															+		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
														
 
															+		for (i = 0; i < ksb_len; i++)
														
 
															+			wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
														
 
															+
														
 
															+		dm_offset += ksb_len;
														
 
															+		nbytes -= ksb_len;
														
 
															+
														
 
															+		if ((ksb_len != se_len) && sign_extend) {
														
 
															+			/* Must sign-extend to nearest sign-extend length */
														
 
															+			if (wa->address[dm_offset - 1] & 0x80)
														
 
															+				memset(wa->address + dm_offset, 0xff,
														
 
															+				       se_len - ksb_len);
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
														
 
															+				    struct scatterlist *sg,
														
 
															+				    unsigned int len)
														
 
															+{
														
 
															+	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
														
 
															+	u8 buffer[CCP_REVERSE_BUF_SIZE];
														
 
															+
														
 
															+	sg_offset = 0;
														
 
															+	dm_offset = len;
														
 
															+	nbytes = len;
														
 
															+	while (nbytes) {
														
 
															+		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
														
 
															+		dm_offset -= ksb_len;
														
 
															+
														
 
															+		for (i = 0; i < ksb_len; i++)
														
 
															+			buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
														
 
															+		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
														
 
															+
														
 
															+		sg_offset += ksb_len;
														
 
															+		nbytes -= ksb_len;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
														
 
															+{
														
 
															+	ccp_dm_free(&data->dm_wa);
														
 
															+	ccp_sg_free(&data->sg_wa);
														
 
															+}
														
 
															+
														
 
															+static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
														
 
															+			 struct scatterlist *sg, u64 sg_len,
														
 
															+			 unsigned int dm_len,
														
 
															+			 enum dma_data_direction dir)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	memset(data, 0, sizeof(*data));
														
 
															+
														
 
															+	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
														
 
															+				   dir);
														
 
															+	if (ret)
														
 
															+		goto e_err;
														
 
															+
														
 
															+	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
														
 
															+	if (ret)
														
 
															+		goto e_err;
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+e_err:
														
 
															+	ccp_free_data(data, cmd_q);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
														
 
															+{
														
 
															+	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
														
 
															+	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
														
 
															+	unsigned int buf_count, nbytes;
														
 
															+
														
 
															+	/* Clear the buffer if setting it */
														
 
															+	if (!from)
														
 
															+		memset(dm_wa->address, 0, dm_wa->length);
														
 
															+
														
 
															+	if (!sg_wa->sg)
														
 
															+		return 0;
														
 
															+
														
 
															+	/* Perform the copy operation
														
 
															+	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
														
 
															+	 *   an unsigned int
														
 
															+	 */
														
 
															+	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
														
 
															+	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
														
 
															+				 nbytes, from);
														
 
															+
														
 
															+	/* Update the structures and generate the count */
														
 
															+	buf_count = 0;
														
 
															+	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
														
 
															+		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
														
 
															+			     dm_wa->length - buf_count);
														
 
															+		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
														
 
															+
														
 
															+		buf_count += nbytes;
														
 
															+		ccp_update_sg_workarea(sg_wa, nbytes);
														
 
															+	}
														
 
															+
														
 
															+	return buf_count;
														
 
															+}
														
 
															+
														
 
															+static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
														
 
															+{
														
 
															+	return ccp_queue_buf(data, 0);
														
 
															+}
														
 
															+
														
 
															+static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
														
 
															+{
														
 
															+	return ccp_queue_buf(data, 1);
														
 
															+}
														
 
															+
														
 
															+static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
														
 
															+			     struct ccp_op *op, unsigned int block_size,
														
 
															+			     bool blocksize_op)
														
 
															+{
														
 
															+	unsigned int sg_src_len, sg_dst_len, op_len;
														
 
															+
														
 
															+	/* The CCP can only DMA from/to one address each per operation. This
														
 
															+	 * requires that we find the smallest DMA area between the source
														
 
															+	 * and destination. The resulting len values will always be <= UINT_MAX
														
 
															+	 * because the dma length is an unsigned int.
														
 
															+	 */
														
 
															+	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
														
 
															+	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
														
 
															+
														
 
															+	if (dst) {
														
 
															+		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
														
 
															+		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
														
 
															+		op_len = min(sg_src_len, sg_dst_len);
														
 
															+	} else
														
 
															+		op_len = sg_src_len;
														
 
															+
														
 
															+	/* The data operation length will be at least block_size in length
														
 
															+	 * or the smaller of available sg room remaining for the source or
														
 
															+	 * the destination
														
 
															+	 */
														
 
															+	op_len = max(op_len, block_size);
														
 
															+
														
 
															+	/* Unless we have to buffer data, there's no reason to wait */
														
 
															+	op->soc = 0;
														
 
															+
														
 
															+	if (sg_src_len < block_size) {
														
 
															+		/* Not enough data in the sg element, so it
														
 
															+		 * needs to be buffered into a blocksize chunk
														
 
															+		 */
														
 
															+		int cp_len = ccp_fill_queue_buf(src);
														
 
															+
														
 
															+		op->soc = 1;
														
 
															+		op->src.u.dma.address = src->dm_wa.dma.address;
														
 
															+		op->src.u.dma.offset = 0;
														
 
															+		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
														
 
															+	} else {
														
 
															+		/* Enough data in the sg element, but we need to
														
 
															+		 * adjust for any previously copied data
														
 
															+		 */
														
 
															+		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
														
 
															+		op->src.u.dma.offset = src->sg_wa.sg_used;
														
 
															+		op->src.u.dma.length = op_len & ~(block_size - 1);
														
 
															+
														
 
															+		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
														
 
															+	}
														
 
															+
														
 
															+	if (dst) {
														
 
															+		if (sg_dst_len < block_size) {
														
 
															+			/* Not enough room in the sg element or we're on the
														
 
															+			 * last piece of data (when using padding), so the
														
 
															+			 * output needs to be buffered into a blocksize chunk
														
 
															+			 */
														
 
															+			op->soc = 1;
														
 
															+			op->dst.u.dma.address = dst->dm_wa.dma.address;
														
 
															+			op->dst.u.dma.offset = 0;
														
 
															+			op->dst.u.dma.length = op->src.u.dma.length;
														
 
															+		} else {
														
 
															+			/* Enough room in the sg element, but we need to
														
 
															+			 * adjust for any previously used area
														
 
															+			 */
														
 
															+			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
														
 
															+			op->dst.u.dma.offset = dst->sg_wa.sg_used;
														
 
															+			op->dst.u.dma.length = op->src.u.dma.length;
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
														
 
															+			     struct ccp_op *op)
														
 
															+{
														
 
															+	op->init = 0;
														
 
															+
														
 
															+	if (dst) {
														
 
															+		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
														
 
															+			ccp_empty_queue_buf(dst);
														
 
															+		else
														
 
															+			ccp_update_sg_workarea(&dst->sg_wa,
														
 
															+					       op->dst.u.dma.length);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
														
 
															+				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
														
 
															+				u32 byte_swap, bool from)
														
 
															+{
														
 
															+	struct ccp_op op;
														
 
															+
														
 
															+	memset(&op, 0, sizeof(op));
														
 
															+
														
 
															+	op.cmd_q = cmd_q;
														
 
															+	op.jobid = jobid;
														
 
															+	op.eom = 1;
														
 
															+
														
 
															+	if (from) {
														
 
															+		op.soc = 1;
														
 
															+		op.src.type = CCP_MEMTYPE_KSB;
														
 
															+		op.src.u.ksb = ksb;
														
 
															+		op.dst.type = CCP_MEMTYPE_SYSTEM;
														
 
															+		op.dst.u.dma.address = wa->dma.address;
														
 
															+		op.dst.u.dma.length = wa->length;
														
 
															+	} else {
														
 
															+		op.src.type = CCP_MEMTYPE_SYSTEM;
														
 
															+		op.src.u.dma.address = wa->dma.address;
														
 
															+		op.src.u.dma.length = wa->length;
														
 
															+		op.dst.type = CCP_MEMTYPE_KSB;
														
 
															+		op.dst.u.ksb = ksb;
														
 
															+	}
														
 
															+
														
 
															+	op.u.passthru.byte_swap = byte_swap;
														
 
															+
														
 
															+	return ccp_perform_passthru(&op);
														
 
															+}
														
 
															+
														
 
															+static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
														
 
															+			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
														
 
															+			   u32 byte_swap)
														
 
															+{
														
 
															+	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
														
 
															+}
														
 
															+
														
 
															+static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
														
 
															+			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
														
 
															+			     u32 byte_swap)
														
 
															+{
														
 
															+	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
														
 
															+}
														
 
															+
														
 
															+static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
														
 
															+				struct ccp_cmd *cmd)
														
 
															+{
														
 
															+	struct ccp_aes_engine *aes = &cmd->u.aes;
														
 
															+	struct ccp_dm_workarea key, ctx;
														
 
															+	struct ccp_data src;
														
 
															+	struct ccp_op op;
														
 
															+	unsigned int dm_offset;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!((aes->key_len == AES_KEYSIZE_128) ||
														
 
															+	      (aes->key_len == AES_KEYSIZE_192) ||
														
 
															+	      (aes->key_len == AES_KEYSIZE_256)))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (aes->src_len & (AES_BLOCK_SIZE - 1))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (aes->iv_len != AES_BLOCK_SIZE)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (!aes->key || !aes->iv || !aes->src)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (aes->cmac_final) {
														
 
															+		if (aes->cmac_key_len != AES_BLOCK_SIZE)
														
 
															+			return -EINVAL;
														
 
															+
														
 
															+		if (!aes->cmac_key)
														
 
															+			return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
														
 
															+	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
														
 
															+
														
 
															+	ret = -EIO;
														
 
															+	memset(&op, 0, sizeof(op));
														
 
															+	op.cmd_q = cmd_q;
														
 
															+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
														
 
															+	op.ksb_key = cmd_q->ksb_key;
														
 
															+	op.ksb_ctx = cmd_q->ksb_ctx;
														
 
															+	op.init = 1;
														
 
															+	op.u.aes.type = aes->type;
														
 
															+	op.u.aes.mode = aes->mode;
														
 
															+	op.u.aes.action = aes->action;
														
 
															+
														
 
															+	/* All supported key sizes fit in a single (32-byte) KSB entry
														
 
															+	 * and must be in little endian format. Use the 256-bit byte
														
 
															+	 * swap passthru option to convert from big endian to little
														
 
															+	 * endian.
														
 
															+	 */
														
 
															+	ret = ccp_init_dm_workarea(&key, cmd_q,
														
 
															+				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
														
 
															+				   DMA_TO_DEVICE);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	dm_offset = CCP_KSB_BYTES - aes->key_len;
														
 
															+	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
														
 
															+	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
														
 
															+			      CCP_PASSTHRU_BYTESWAP_256BIT);
														
 
															+	if (ret) {
														
 
															+		cmd->engine_error = cmd_q->cmd_error;
														
 
															+		goto e_key;
														
 
															+	}
														
 
															+
														
 
															+	/* The AES context fits in a single (32-byte) KSB entry and
														
 
															+	 * must be in little endian format. Use the 256-bit byte swap
														
 
															+	 * passthru option to convert from big endian to little endian.
														
 
															+	 */
														
 
															+	ret = ccp_init_dm_workarea(&ctx, cmd_q,
														
 
															+				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
														
 
															+				   DMA_BIDIRECTIONAL);
														
 
															+	if (ret)
														
 
															+		goto e_key;
														
 
															+
														
 
															+	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
														
 
															+	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
														
 
															+	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
														
 
															+			      CCP_PASSTHRU_BYTESWAP_256BIT);
														
 
															+	if (ret) {
														
 
															+		cmd->engine_error = cmd_q->cmd_error;
														
 
															+		goto e_ctx;
														
 
															+	}
														
 
															+
														
 
															+	/* Send data to the CCP AES engine */
														
 
															+	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
														
 
															+			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
														
 
															+	if (ret)
														
 
															+		goto e_ctx;
														
 
															+
														
 
															+	while (src.sg_wa.bytes_left) {
														
 
															+		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
														
 
															+		if (aes->cmac_final && !src.sg_wa.bytes_left) {
														
 
															+			op.eom = 1;
														
 
															+
														
 
															+			/* Push the K1/K2 key to the CCP now */
														
 
															+			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
														
 
															+						op.ksb_ctx,
														
 
															+						CCP_PASSTHRU_BYTESWAP_256BIT);
														
 
															+			if (ret) {
														
 
															+				cmd->engine_error = cmd_q->cmd_error;
														
 
															+				goto e_src;
														
 
															+			}
														
 
															+
														
 
															+			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
														
 
															+					aes->cmac_key_len);
														
 
															+			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
														
 
															+					      CCP_PASSTHRU_BYTESWAP_256BIT);
														
 
															+			if (ret) {
														
 
															+				cmd->engine_error = cmd_q->cmd_error;
														
 
															+				goto e_src;
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		ret = ccp_perform_aes(&op);
														
 
															+		if (ret) {
														
 
															+			cmd->engine_error = cmd_q->cmd_error;
														
 
															+			goto e_src;
														
 
															+		}
														
 
															+
														
 
															+		ccp_process_data(&src, NULL, &op);
														
 
															+	}
														
 
															+
														
 
															+	/* Retrieve the AES context - convert from LE to BE using
														
 
															+	 * 32-byte (256-bit) byteswapping
														
 
															+	 */
														
 
															+	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
														
 
															+				CCP_PASSTHRU_BYTESWAP_256BIT);
														
 
															+	if (ret) {
														
 
															+		cmd->engine_error = cmd_q->cmd_error;
														
 
															+		goto e_src;
														
 
															+	}
														
 
															+
														
 
															+	/* ...but we only need AES_BLOCK_SIZE bytes */
														
 
															+	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
														
 
															+	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
														
 
															+
														
 
															+e_src:
														
 
															+	ccp_free_data(&src, cmd_q);
														
 
															+
														
 
															+e_ctx:
														
 
															+	ccp_dm_free(&ctx);
														
 
															+
														
 
															+e_key:
														
 
															+	ccp_dm_free(&key);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
														
 
															+{
														
 
															+	struct ccp_aes_engine *aes = &cmd->u.aes;
														
 
															+	struct ccp_dm_workarea key, ctx;
														
 
															+	struct ccp_data src, dst;
														
 
															+	struct ccp_op op;
														
 
															+	unsigned int dm_offset;
														
 
															+	bool in_place = false;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (aes->mode == CCP_AES_MODE_CMAC)
														
 
															+		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
														
 
															+
														
 
															+	if (!((aes->key_len == AES_KEYSIZE_128) ||
														
 
															+	      (aes->key_len == AES_KEYSIZE_192) ||
														
 
															+	      (aes->key_len == AES_KEYSIZE_256)))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (((aes->mode == CCP_AES_MODE_ECB) ||
														
 
															+	     (aes->mode == CCP_AES_MODE_CBC) ||
														
 
															+	     (aes->mode == CCP_AES_MODE_CFB)) &&
														
 
															+	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (!aes->key || !aes->src || !aes->dst)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (aes->mode != CCP_AES_MODE_ECB) {
														
 
															+		if (aes->iv_len != AES_BLOCK_SIZE)
														
 
															+			return -EINVAL;
														
 
															+
														
 
															+		if (!aes->iv)
														
 
															+			return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
														
 
															+	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
														
 
															+
														
 
															+	ret = -EIO;
														
 
															+	memset(&op, 0, sizeof(op));
														
 
															+	op.cmd_q = cmd_q;
														
 
															+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
														
 
															+	op.ksb_key = cmd_q->ksb_key;
														
 
															+	op.ksb_ctx = cmd_q->ksb_ctx;
														
 
															+	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
														
 
															+	op.u.aes.type = aes->type;
														
 
															+	op.u.aes.mode = aes->mode;
														
 
															+	op.u.aes.action = aes->action;
														
 
															+
														
 
															+	/* All supported key sizes fit in a single (32-byte) KSB entry
														
 
															+	 * and must be in little endian format. Use the 256-bit byte
														
 
															+	 * swap passthru option to convert from big endian to little
														
 
															+	 * endian.
														
 
															+	 */
														
 
															+	ret = ccp_init_dm_workarea(&key, cmd_q,
														
 
															+				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
														
 
															+				   DMA_TO_DEVICE);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	dm_offset = CCP_KSB_BYTES - aes->key_len;
														
 
															+	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
														
 
															+	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
														
 
															+			      CCP_PASSTHRU_BYTESWAP_256BIT);
														
 
															+	if (ret) {
														
 
															+		cmd->engine_error = cmd_q->cmd_error;
														
 
															+		goto e_key;
														
 
															+	}
														
 
															+
														
 
															+	/* The AES context fits in a single (32-byte) KSB entry and
														
 
															+	 * must be in little endian format. Use the 256-bit byte swap
														
 
															+	 * passthru option to convert from big endian to little endian.
														
 
															+	 */
														
 
															+	ret = ccp_init_dm_workarea(&ctx, cmd_q,
														
 
															+				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
														
 
															+				   DMA_BIDIRECTIONAL);
														
 
															+	if (ret)
														
 
															+		goto e_key;
														
 
															+
														
 
															+	if (aes->mode != CCP_AES_MODE_ECB) {
														
 
															+		/* Load the AES context - conver to LE */
														
 
															+		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
														
 
															+		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
														
 
															+		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
														
 
															+				      CCP_PASSTHRU_BYTESWAP_256BIT);
														
 
															+		if (ret) {
														
 
															+			cmd->engine_error = cmd_q->cmd_error;
														
 
															+			goto e_ctx;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/* Prepare the input and output data workareas. For in-place
														
 
															+	 * operations we need to set the dma direction to BIDIRECTIONAL
														
 
															+	 * and copy the src workarea to the dst workarea.
														
 
															+	 */
														
 
															+	if (sg_virt(aes->src) == sg_virt(aes->dst))
														
 
															+		in_place = true;
														
 
															+
														
 
															+	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
														
 
															+			    AES_BLOCK_SIZE,
														
 
															+			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
														
 
															+	if (ret)
														
 
															+		goto e_ctx;
														
 
															+
														
 
															+	if (in_place)
														
 
															+		dst = src;
														
 
															+	else {
														
 
															+		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
														
 
															+				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
														
 
															+		if (ret)
														
 
															+			goto e_src;
														
 
															+	}
														
 
															+
														
 
															+	/* Send data to the CCP AES engine */
														
 
															+	while (src.sg_wa.bytes_left) {
														
 
															+		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
														
 
															+		if (!src.sg_wa.bytes_left) {
														
 
															+			op.eom = 1;
														
 
															+
														
 
															+			/* Since we don't retrieve the AES context in ECB
														
 
															+			 * mode we have to wait for the operation to complete
														
 
															+			 * on the last piece of data
														
 
															+			 */
														
 
															+			if (aes->mode == CCP_AES_MODE_ECB)
														
 
															+				op.soc = 1;
														
 
															+		}
														
 
															+
														
 
															+		ret = ccp_perform_aes(&op);
														
 
															+		if (ret) {
														
 
															+			cmd->engine_error = cmd_q->cmd_error;
														
 
															+			goto e_dst;
														
 
															+		}
														
 
															+
														
 
															+		ccp_process_data(&src, &dst, &op);
														
 
															+	}
														
 
															+
														
 
															+	if (aes->mode != CCP_AES_MODE_ECB) {
														
 
															+		/* Retrieve the AES context - convert from LE to BE using
														
 
															+		 * 32-byte (256-bit) byteswapping
														
 
															+		 */
														
 
															+		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
														
 
															+					CCP_PASSTHRU_BYTESWAP_256BIT);
														
 
															+		if (ret) {
														
 
															+			cmd->engine_error = cmd_q->cmd_error;
														
 
															+			goto e_dst;
														
 
															+		}
														
 
															+
														
 
															+		/* ...but we only need AES_BLOCK_SIZE bytes */
														
 
															+		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
														
 
															+		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
														
 
															+	}
														
 
															+
														
 
															+e_dst:
														
 
															+	if (!in_place)
														
 
															+		ccp_free_data(&dst, cmd_q);
														
 
															+
														
 
															+e_src:
														
 
															+	ccp_free_data(&src, cmd_q);
														
 
															+
														
 
															+e_ctx:
														
 
															+	ccp_dm_free(&ctx);
														
 
															+
														
 
															+e_key:
														
 
															+	ccp_dm_free(&key);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
														
 
															+			       struct ccp_cmd *cmd)
														
 
															+{
														
 
															+	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
														
 
															+	struct ccp_dm_workarea key, ctx;
														
 
															+	struct ccp_data src, dst;
														
 
															+	struct ccp_op op;
														
 
															+	unsigned int unit_size, dm_offset;
														
 
															+	bool in_place = false;
														
 
															+	int ret;
														
 
															+
														
 
															+	switch (xts->unit_size) {
														
 
															+	case CCP_XTS_AES_UNIT_SIZE_16:
														
 
															+		unit_size = 16;
														
 
															+		break;
														
 
															+	case CCP_XTS_AES_UNIT_SIZE_512:
														
 
															+		unit_size = 512;
														
 
															+		break;
														
 
															+	case CCP_XTS_AES_UNIT_SIZE_1024:
														
 
															+		unit_size = 1024;
														
 
															+		break;
														
 
															+	case CCP_XTS_AES_UNIT_SIZE_2048:
														
 
															+		unit_size = 2048;
														
 
															+		break;
														
 
															+	case CCP_XTS_AES_UNIT_SIZE_4096:
														
 
															+		unit_size = 4096;
														
 
															+		break;
														
 
															+
														
 
															+	default:
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	if (xts->key_len != AES_KEYSIZE_128)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (xts->iv_len != AES_BLOCK_SIZE)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
														
 
															+	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
														
 
															+
														
 
															+	ret = -EIO;
														
 
															+	memset(&op, 0, sizeof(op));
														
 
															+	op.cmd_q = cmd_q;
														
 
															+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
														
 
															+	op.ksb_key = cmd_q->ksb_key;
														
 
															+	op.ksb_ctx = cmd_q->ksb_ctx;
														
 
															+	op.init = 1;
														
 
															+	op.u.xts.action = xts->action;
														
 
															+	op.u.xts.unit_size = xts->unit_size;
														
 
															+
														
 
															+	/* All supported key sizes fit in a single (32-byte) KSB entry
														
 
															+	 * and must be in little endian format. Use the 256-bit byte
														
 
															+	 * swap passthru option to convert from big endian to little
														
 
															+	 * endian.
														
 
															+	 */
														
 
															+	ret = ccp_init_dm_workarea(&key, cmd_q,
														
 
															+				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
														
 
															+				   DMA_TO_DEVICE);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
														
 
															+	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
														
 
															+	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
														
 
															+	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
														
 
															+			      CCP_PASSTHRU_BYTESWAP_256BIT);
														
 
															+	if (ret) {
														
 
															+		cmd->engine_error = cmd_q->cmd_error;
														
 
															+		goto e_key;
														
 
															+	}
														
 
															+
														
 
															+	/* The AES context fits in a single (32-byte) KSB entry and
														
 
															+	 * for XTS is already in little endian format so no byte swapping
														
 
															+	 * is needed.
														
 
															+	 */
														
 
															+	ret = ccp_init_dm_workarea(&ctx, cmd_q,
														
 
															+				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
														
 
															+				   DMA_BIDIRECTIONAL);
														
 
															+	if (ret)
														
 
															+		goto e_key;
														
 
															+
														
 
															+	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
														
 
															+	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
														
 
															+			      CCP_PASSTHRU_BYTESWAP_NOOP);
														
 
															+	if (ret) {
														
 
															+		cmd->engine_error = cmd_q->cmd_error;
														
 
															+		goto e_ctx;
														
 
															+	}
														
 
															+
														
 
															+	/* Prepare the input and output data workareas. For in-place
														
 
															+	 * operations we need to set the dma direction to BIDIRECTIONAL
														
 
															+	 * and copy the src workarea to the dst workarea.
														
 
															+	 */
														
 
															+	if (sg_virt(xts->src) == sg_virt(xts->dst))
														
 
															+		in_place = true;
														
 
															+
														
 
															+	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
														
 
															+			    unit_size,
														
 
															+			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
														
 
															+	if (ret)
														
 
															+		goto e_ctx;
														
 
															+
														
 
															+	if (in_place)
														
 
															+		dst = src;
														
 
															+	else {
														
 
															+		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
														
 
															+				    unit_size, DMA_FROM_DEVICE);
														
 
															+		if (ret)
														
 
															+			goto e_src;
														
 
															+	}
														
 
															+
														
 
															+	/* Send data to the CCP AES engine */
														
 
															+	while (src.sg_wa.bytes_left) {
														
 
															+		ccp_prepare_data(&src, &dst, &op, unit_size, true);
														
 
															+		if (!src.sg_wa.bytes_left)
														
 
															+			op.eom = 1;
														
 
															+
														
 
															+		ret = ccp_perform_xts_aes(&op);
														
 
															+		if (ret) {
														
 
															+			cmd->engine_error = cmd_q->cmd_error;
														
 
															+			goto e_dst;
														
 
															+		}
														
 
															+
														
 
															+		ccp_process_data(&src, &dst, &op);
														
 
															+	}
														
 
															+
														
 
															+	/* Retrieve the AES context - convert from LE to BE using
														
 
															+	 * 32-byte (256-bit) byteswapping
														
 
															+	 */
														
 
															+	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
														
 
															+				CCP_PASSTHRU_BYTESWAP_256BIT);
														
 
															+	if (ret) {
														
 
															+		cmd->engine_error = cmd_q->cmd_error;
														
 
															+		goto e_dst;
														
 
															+	}
														
 
															+
														
 
															+	/* ...but we only need AES_BLOCK_SIZE bytes */
														
 
															+	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
														
 
															+	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
														
 
															+
														
 
															+e_dst:
														
 
															+	if (!in_place)
														
 
															+		ccp_free_data(&dst, cmd_q);
														
 
															+
														
 
															+e_src:
														
 
															+	ccp_free_data(&src, cmd_q);
														
 
															+
														
 
															+e_ctx:
														
 
															+	ccp_dm_free(&ctx);
														
 
															+
														
 
															+e_key:
														
 
															+	ccp_dm_free(&key);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
														
 
															+{
														
 
															+	struct ccp_sha_engine *sha = &cmd->u.sha;
														
 
															+	struct ccp_dm_workarea ctx;
														
 
															+	struct ccp_data src;
														
 
															+	struct ccp_op op;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (sha->ctx_len != CCP_SHA_CTXSIZE)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (!sha->ctx)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (!sha->src_len) {
														
 
															+		const u8 *sha_zero;
														
 
															+
														
 
															+		/* Not final, just return */
														
 
															+		if (!sha->final)
														
 
															+			return 0;
														
 
															+
														
 
															+		/* CCP can't do a zero length sha operation so the caller
														
 
															+		 * must buffer the data.
														
 
															+		 */
														
 
															+		if (sha->msg_bits)
														
 
															+			return -EINVAL;
														
 
															+
														
 
															+		/* A sha operation for a message with a total length of zero,
														
 
															+		 * return known result.
														
 
															+		 */
														
 
															+		switch (sha->type) {
														
 
															+		case CCP_SHA_TYPE_1:
														
 
															+			sha_zero = ccp_sha1_zero;
														
 
															+			break;
														
 
															+		case CCP_SHA_TYPE_224:
														
 
															+			sha_zero = ccp_sha224_zero;
														
 
															+			break;
														
 
															+		case CCP_SHA_TYPE_256:
														
 
															+			sha_zero = ccp_sha256_zero;
														
 
															+			break;
														
 
															+		default:
														
 
															+			return -EINVAL;
														
 
															+		}
														
 
															+
														
 
															+		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
														
 
															+					 sha->ctx_len, 1);
														
 
															+
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	if (!sha->src)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
														
 
															+
														
 
															+	memset(&op, 0, sizeof(op));
														
 
															+	op.cmd_q = cmd_q;
														
 
															+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
														
 
															+	op.ksb_ctx = cmd_q->ksb_ctx;
														
 
															+	op.u.sha.type = sha->type;
														
 
															+	op.u.sha.msg_bits = sha->msg_bits;
														
 
															+
														
 
															+	/* The SHA context fits in a single (32-byte) KSB entry and
														
 
															+	 * must be in little endian format. Use the 256-bit byte swap
														
 
															+	 * passthru option to convert from big endian to little endian.
														
 
															+	 */
														
 
															+	ret = ccp_init_dm_workarea(&ctx, cmd_q,
														
 
															+				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
														
 
															+				   DMA_BIDIRECTIONAL);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
														
 
															+	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
														
 
															+			      CCP_PASSTHRU_BYTESWAP_256BIT);
														
 
															+	if (ret) {
														
 
															+		cmd->engine_error = cmd_q->cmd_error;
														
 
															+		goto e_ctx;
														
 
															+	}
														
 
															+
														
 
															+	/* Send data to the CCP SHA engine */
														
 
															+	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
														
 
															+			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
														
 
															+	if (ret)
														
 
															+		goto e_ctx;
														
 
															+
														
 
															+	while (src.sg_wa.bytes_left) {
														
 
															+		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
														
 
															+		if (sha->final && !src.sg_wa.bytes_left)
														
 
															+			op.eom = 1;
														
 
															+
														
 
															+		ret = ccp_perform_sha(&op);
														
 
															+		if (ret) {
														
 
															+			cmd->engine_error = cmd_q->cmd_error;
														
 
															+			goto e_data;
														
 
															+		}
														
 
															+
														
 
															+		ccp_process_data(&src, NULL, &op);
														
 
															+	}
														
 
															+
														
 
															+	/* Retrieve the SHA context - convert from LE to BE using
														
 
															+	 * 32-byte (256-bit) byteswapping to BE
														
 
															+	 */
														
 
															+	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
														
 
															+				CCP_PASSTHRU_BYTESWAP_256BIT);
														
 
															+	if (ret) {
														
 
															+		cmd->engine_error = cmd_q->cmd_error;
														
 
															+		goto e_data;
														
 
															+	}
														
 
															+
														
 
															+	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
														
 
															+
														
 
															+e_data:
														
 
															+	ccp_free_data(&src, cmd_q);
														
 
															+
														
 
															+e_ctx:
														
 
															+	ccp_dm_free(&ctx);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
														
 
															+{
														
 
															+	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
														
 
															+	struct ccp_dm_workarea exp, src;
														
 
															+	struct ccp_data dst;
														
 
															+	struct ccp_op op;
														
 
															+	unsigned int ksb_count, i_len, o_len;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	/* The RSA modulus must precede the message being acted upon, so
														
 
															+	 * it must be copied to a DMA area where the message and the
														
 
															+	 * modulus can be concatenated.  Therefore the input buffer
														
 
															+	 * length required is twice the output buffer length (which
														
 
															+	 * must be a multiple of 256-bits).
														
 
															+	 */
														
 
															+	o_len = ((rsa->key_size + 255) / 256) * 32;
														
 
															+	i_len = o_len * 2;
														
 
															+
														
 
															+	ksb_count = o_len / CCP_KSB_BYTES;
														
 
															+
														
 
															+	memset(&op, 0, sizeof(op));
														
 
															+	op.cmd_q = cmd_q;
														
 
															+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
														
 
															+	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
														
 
															+	if (!op.ksb_key)
														
 
															+		return -EIO;
														
 
															+
														
 
															+	/* The RSA exponent may span multiple (32-byte) KSB entries and must
														
 
															+	 * be in little endian format. Reverse copy each 32-byte chunk
														
 
															+	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
														
 
															+	 * and each byte within that chunk and do not perform any byte swap
														
 
															+	 * operations on the passthru operation.
														
 
															+	 */
														
 
															+	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
														
 
															+	if (ret)
														
 
															+		goto e_ksb;
														
 
															+
														
 
															+	ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
														
 
															+				true);
														
 
															+	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
														
 
															+			      CCP_PASSTHRU_BYTESWAP_NOOP);
														
 
															+	if (ret) {
														
 
															+		cmd->engine_error = cmd_q->cmd_error;
														
 
															+		goto e_exp;
														
 
															+	}
														
 
															+
														
 
															+	/* Concatenate the modulus and the message. Both the modulus and
														
 
															+	 * the operands must be in little endian format.  Since the input
														
 
															+	 * is in big endian format it must be converted.
														
 
															+	 */
														
 
															+	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
														
 
															+	if (ret)
														
 
															+		goto e_exp;
														
 
															+
														
 
															+	ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
														
 
															+				true);
														
 
															+	src.address += o_len;	/* Adjust the address for the copy operation */
														
 
															+	ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
														
 
															+				true);
														
 
															+	src.address -= o_len;	/* Reset the address to original value */
														
 
															+
														
 
															+	/* Prepare the output area for the operation */
														
 
															+	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
														
 
															+			    o_len, DMA_FROM_DEVICE);
														
 
															+	if (ret)
														
 
															+		goto e_src;
														
 
															+
														
 
															+	op.soc = 1;
														
 
															+	op.src.u.dma.address = src.dma.address;
														
 
															+	op.src.u.dma.offset = 0;
														
 
															+	op.src.u.dma.length = i_len;
														
 
															+	op.dst.u.dma.address = dst.dm_wa.dma.address;
														
 
															+	op.dst.u.dma.offset = 0;
														
 
															+	op.dst.u.dma.length = o_len;
														
 
															+
														
 
															+	op.u.rsa.mod_size = rsa->key_size;
														
 
															+	op.u.rsa.input_len = i_len;
														
 
															+
														
 
															+	ret = ccp_perform_rsa(&op);
														
 
															+	if (ret) {
														
 
															+		cmd->engine_error = cmd_q->cmd_error;
														
 
															+		goto e_dst;
														
 
															+	}
														
 
															+
														
 
															+	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
														
 
															+
														
 
															+e_dst:
														
 
															+	ccp_free_data(&dst, cmd_q);
														
 
															+
														
 
															+e_src:
														
 
															+	ccp_dm_free(&src);
														
 
															+
														
 
															+e_exp:
														
 
															+	ccp_dm_free(&exp);
														
 
															+
														
 
															+e_ksb:
														
 
															+	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
														
 
															+				struct ccp_cmd *cmd)
														
 
															+{
														
 
															+	struct ccp_passthru_engine *pt = &cmd->u.passthru;
														
 
															+	struct ccp_dm_workarea mask;
														
 
															+	struct ccp_data src, dst;
														
 
															+	struct ccp_op op;
														
 
															+	bool in_place = false;
														
 
															+	unsigned int i;
														
 
															+	int ret;
														
 
															+
														
 
															+	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (!pt->src || !pt->dst)
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
														
 
															+		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
														
 
															+			return -EINVAL;
														
 
															+		if (!pt->mask)
														
 
															+			return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
														
 
															+
														
 
															+	memset(&op, 0, sizeof(op));
														
 
															+	op.cmd_q = cmd_q;
														
 
															+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
														
 
															+
														
 
															+	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
														
 
															+		/* Load the mask */
														
 
															+		op.ksb_key = cmd_q->ksb_key;
														
 
															+
														
 
															+		ret = ccp_init_dm_workarea(&mask, cmd_q,
														
 
															+					   CCP_PASSTHRU_KSB_COUNT *
														
 
															+					   CCP_KSB_BYTES,
														
 
															+					   DMA_TO_DEVICE);
														
 
															+		if (ret)
														
 
															+			return ret;
														
 
															+
														
 
															+		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
														
 
															+		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
														
 
															+				      CCP_PASSTHRU_BYTESWAP_NOOP);
														
 
															+		if (ret) {
														
 
															+			cmd->engine_error = cmd_q->cmd_error;
														
 
															+			goto e_mask;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/* Prepare the input and output data workareas. For in-place
														
 
															+	 * operations we need to set the dma direction to BIDIRECTIONAL
														
 
															+	 * and copy the src workarea to the dst workarea.
														
 
															+	 */
														
 
															+	if (sg_virt(pt->src) == sg_virt(pt->dst))
														
 
															+		in_place = true;
														
 
															+
														
 
															+	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
														
 
															+			    CCP_PASSTHRU_MASKSIZE,
														
 
															+			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
														
 
															+	if (ret)
														
 
															+		goto e_mask;
														
 
															+
														
 
															+	if (in_place)
														
 
															+		dst = src;
														
 
															+	else {
														
 
															+		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
														
 
															+				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
														
 
															+		if (ret)
														
 
															+			goto e_src;
														
 
															+	}
														
 
															+
														
 
															+	/* Send data to the CCP Passthru engine
														
 
															+	 *   Because the CCP engine works on a single source and destination
														
 
															+	 *   dma address at a time, each entry in the source scatterlist
														
 
															+	 *   (after the dma_map_sg call) must be less than or equal to the
														
 
															+	 *   (remaining) length in the destination scatterlist entry and the
														
 
															+	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
														
 
															+	 */
														
 
															+	dst.sg_wa.sg_used = 0;
														
 
															+	for (i = 1; i <= src.sg_wa.dma_count; i++) {
														
 
															+		if (!dst.sg_wa.sg ||
														
 
															+		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
														
 
															+			ret = -EINVAL;
														
 
															+			goto e_dst;
														
 
															+		}
														
 
															+
														
 
															+		if (i == src.sg_wa.dma_count) {
														
 
															+			op.eom = 1;
														
 
															+			op.soc = 1;
														
 
															+		}
														
 
															+
														
 
															+		op.src.type = CCP_MEMTYPE_SYSTEM;
														
 
															+		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
														
 
															+		op.src.u.dma.offset = 0;
														
 
															+		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
														
 
															+
														
 
															+		op.dst.type = CCP_MEMTYPE_SYSTEM;
														
 
															+		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
														
 
															+		op.src.u.dma.offset = dst.sg_wa.sg_used;
														
 
															+		op.src.u.dma.length = op.src.u.dma.length;
														
 
															+
														
 
															+		ret = ccp_perform_passthru(&op);
														
 
															+		if (ret) {
														
 
															+			cmd->engine_error = cmd_q->cmd_error;
														
 
															+			goto e_dst;
														
 
															+		}
														
 
															+
														
 
															+		dst.sg_wa.sg_used += src.sg_wa.sg->length;
														
 
															+		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
														
 
															+			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
														
 
															+			dst.sg_wa.sg_used = 0;
														
 
															+		}
														
 
															+		src.sg_wa.sg = sg_next(src.sg_wa.sg);
														
 
															+	}
														
 
															+
														
 
															+e_dst:
														
 
															+	if (!in_place)
														
 
															+		ccp_free_data(&dst, cmd_q);
														
 
															+
														
 
															+e_src:
														
 
															+	ccp_free_data(&src, cmd_q);
														
 
															+
														
 
															+e_mask:
														
 
															+	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
														
 
															+		ccp_dm_free(&mask);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
														
 
															+{
														
 
															+	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
														
 
															+	struct ccp_dm_workarea src, dst;
														
 
															+	struct ccp_op op;
														
 
															+	int ret;
														
 
															+	u8 *save;
														
 
															+
														
 
															+	if (!ecc->u.mm.operand_1 ||
														
 
															+	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
														
 
															+		if (!ecc->u.mm.operand_2 ||
														
 
															+		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
														
 
															+			return -EINVAL;
														
 
															+
														
 
															+	if (!ecc->u.mm.result ||
														
 
															+	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	memset(&op, 0, sizeof(op));
														
 
															+	op.cmd_q = cmd_q;
														
 
															+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
														
 
															+
														
 
															+	/* Concatenate the modulus and the operands. Both the modulus and
														
 
															+	 * the operands must be in little endian format.  Since the input
														
 
															+	 * is in big endian format it must be converted and placed in a
														
 
															+	 * fixed length buffer.
														
 
															+	 */
														
 
															+	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
														
 
															+				   DMA_TO_DEVICE);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	/* Save the workarea address since it is updated in order to perform
														
 
															+	 * the concatenation
														
 
															+	 */
														
 
															+	save = src.address;
														
 
															+
														
 
															+	/* Copy the ECC modulus */
														
 
															+	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
														
 
															+				CCP_ECC_OPERAND_SIZE, true);
														
 
															+	src.address += CCP_ECC_OPERAND_SIZE;
														
 
															+
														
 
															+	/* Copy the first operand */
														
 
															+	ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
														
 
															+				ecc->u.mm.operand_1_len,
														
 
															+				CCP_ECC_OPERAND_SIZE, true);
														
 
															+	src.address += CCP_ECC_OPERAND_SIZE;
														
 
															+
														
 
															+	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
														
 
															+		/* Copy the second operand */
														
 
															+		ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
														
 
															+					ecc->u.mm.operand_2_len,
														
 
															+					CCP_ECC_OPERAND_SIZE, true);
														
 
															+		src.address += CCP_ECC_OPERAND_SIZE;
														
 
															+	}
														
 
															+
														
 
															+	/* Restore the workarea address */
														
 
															+	src.address = save;
														
 
															+
														
 
															+	/* Prepare the output area for the operation */
														
 
															+	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
														
 
															+				   DMA_FROM_DEVICE);
														
 
															+	if (ret)
														
 
															+		goto e_src;
														
 
															+
														
 
															+	op.soc = 1;
														
 
															+	op.src.u.dma.address = src.dma.address;
														
 
															+	op.src.u.dma.offset = 0;
														
 
															+	op.src.u.dma.length = src.length;
														
 
															+	op.dst.u.dma.address = dst.dma.address;
														
 
															+	op.dst.u.dma.offset = 0;
														
 
															+	op.dst.u.dma.length = dst.length;
														
 
															+
														
 
															+	op.u.ecc.function = cmd->u.ecc.function;
														
 
															+
														
 
															+	ret = ccp_perform_ecc(&op);
														
 
															+	if (ret) {
														
 
															+		cmd->engine_error = cmd_q->cmd_error;
														
 
															+		goto e_dst;
														
 
															+	}
														
 
															+
														
 
															+	ecc->ecc_result = le16_to_cpup(
														
 
															+		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
														
 
															+	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
														
 
															+		ret = -EIO;
														
 
															+		goto e_dst;
														
 
															+	}
														
 
															+
														
 
															+	/* Save the ECC result */
														
 
															+	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
														
 
															+
														
 
															+e_dst:
														
 
															+	ccp_dm_free(&dst);
														
 
															+
														
 
															+e_src:
														
 
															+	ccp_dm_free(&src);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
														
 
															+{
														
 
															+	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
														
 
															+	struct ccp_dm_workarea src, dst;
														
 
															+	struct ccp_op op;
														
 
															+	int ret;
														
 
															+	u8 *save;
														
 
															+
														
 
															+	if (!ecc->u.pm.point_1.x ||
														
 
															+	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
														
 
															+	    !ecc->u.pm.point_1.y ||
														
 
															+	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
														
 
															+		if (!ecc->u.pm.point_2.x ||
														
 
															+		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
														
 
															+		    !ecc->u.pm.point_2.y ||
														
 
															+		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
														
 
															+			return -EINVAL;
														
 
															+	} else {
														
 
															+		if (!ecc->u.pm.domain_a ||
														
 
															+		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
														
 
															+			return -EINVAL;
														
 
															+
														
 
															+		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
														
 
															+			if (!ecc->u.pm.scalar ||
														
 
															+			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
														
 
															+				return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	if (!ecc->u.pm.result.x ||
														
 
															+	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
														
 
															+	    !ecc->u.pm.result.y ||
														
 
															+	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	memset(&op, 0, sizeof(op));
														
 
															+	op.cmd_q = cmd_q;
														
 
															+	op.jobid = ccp_gen_jobid(cmd_q->ccp);
														
 
															+
														
 
															+	/* Concatenate the modulus and the operands. Both the modulus and
														
 
															+	 * the operands must be in little endian format.  Since the input
														
 
															+	 * is in big endian format it must be converted and placed in a
														
 
															+	 * fixed length buffer.
														
 
															+	 */
														
 
															+	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
														
 
															+				   DMA_TO_DEVICE);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	/* Save the workarea address since it is updated in order to perform
														
 
															+	 * the concatenation
														
 
															+	 */
														
 
															+	save = src.address;
														
 
															+
														
 
															+	/* Copy the ECC modulus */
														
 
															+	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
														
 
															+				CCP_ECC_OPERAND_SIZE, true);
														
 
															+	src.address += CCP_ECC_OPERAND_SIZE;
														
 
															+
														
 
															+	/* Copy the first point X and Y coordinate */
														
 
															+	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
														
 
															+				ecc->u.pm.point_1.x_len,
														
 
															+				CCP_ECC_OPERAND_SIZE, true);
														
 
															+	src.address += CCP_ECC_OPERAND_SIZE;
														
 
															+	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
														
 
															+				ecc->u.pm.point_1.y_len,
														
 
															+				CCP_ECC_OPERAND_SIZE, true);
														
 
															+	src.address += CCP_ECC_OPERAND_SIZE;
														
 
															+
														
 
															+	/* Set the first point Z coordianate to 1 */
														
 
															+	*(src.address) = 0x01;
														
 
															+	src.address += CCP_ECC_OPERAND_SIZE;
														
 
															+
														
 
															+	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
														
 
															+		/* Copy the second point X and Y coordinate */
														
 
															+		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
														
 
															+					ecc->u.pm.point_2.x_len,
														
 
															+					CCP_ECC_OPERAND_SIZE, true);
														
 
															+		src.address += CCP_ECC_OPERAND_SIZE;
														
 
															+		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
														
 
															+					ecc->u.pm.point_2.y_len,
														
 
															+					CCP_ECC_OPERAND_SIZE, true);
														
 
															+		src.address += CCP_ECC_OPERAND_SIZE;
														
 
															+
														
 
															+		/* Set the second point Z coordianate to 1 */
														
 
															+		*(src.address) = 0x01;
														
 
															+		src.address += CCP_ECC_OPERAND_SIZE;
														
 
															+	} else {
														
 
															+		/* Copy the Domain "a" parameter */
														
 
															+		ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
														
 
															+					ecc->u.pm.domain_a_len,
														
 
															+					CCP_ECC_OPERAND_SIZE, true);
														
 
															+		src.address += CCP_ECC_OPERAND_SIZE;
														
 
															+
														
 
															+		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
														
 
															+			/* Copy the scalar value */
														
 
															+			ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
														
 
															+						ecc->u.pm.scalar_len,
														
 
															+						CCP_ECC_OPERAND_SIZE, true);
														
 
															+			src.address += CCP_ECC_OPERAND_SIZE;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	/* Restore the workarea address */
														
 
															+	src.address = save;
														
 
															+
														
 
															+	/* Prepare the output area for the operation */
														
 
															+	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
														
 
															+				   DMA_FROM_DEVICE);
														
 
															+	if (ret)
														
 
															+		goto e_src;
														
 
															+
														
 
															+	op.soc = 1;
														
 
															+	op.src.u.dma.address = src.dma.address;
														
 
															+	op.src.u.dma.offset = 0;
														
 
															+	op.src.u.dma.length = src.length;
														
 
															+	op.dst.u.dma.address = dst.dma.address;
														
 
															+	op.dst.u.dma.offset = 0;
														
 
															+	op.dst.u.dma.length = dst.length;
														
 
															+
														
 
															+	op.u.ecc.function = cmd->u.ecc.function;
														
 
															+
														
 
															+	ret = ccp_perform_ecc(&op);
														
 
															+	if (ret) {
														
 
															+		cmd->engine_error = cmd_q->cmd_error;
														
 
															+		goto e_dst;
														
 
															+	}
														
 
															+
														
 
															+	ecc->ecc_result = le16_to_cpup(
														
 
															+		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
														
 
															+	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
														
 
															+		ret = -EIO;
														
 
															+		goto e_dst;
														
 
															+	}
														
 
															+
														
 
															+	/* Save the workarea address since it is updated as we walk through
														
 
															+	 * to copy the point math result
														
 
															+	 */
														
 
															+	save = dst.address;
														
 
															+
														
 
															+	/* Save the ECC result X and Y coordinates */
														
 
															+	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
														
 
															+				CCP_ECC_MODULUS_BYTES);
														
 
															+	dst.address += CCP_ECC_OUTPUT_SIZE;
														
 
															+	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
														
 
															+				CCP_ECC_MODULUS_BYTES);
														
 
															+	dst.address += CCP_ECC_OUTPUT_SIZE;
														
 
															+
														
 
															+	/* Restore the workarea address */
														
 
															+	dst.address = save;
														
 
															+
														
 
															+e_dst:
														
 
															+	ccp_dm_free(&dst);
														
 
															+
														
 
															+e_src:
														
 
															+	ccp_dm_free(&src);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
														
 
															+{
														
 
															+	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
														
 
															+
														
 
															+	ecc->ecc_result = 0;
														
 
															+
														
 
															+	if (!ecc->mod ||
														
 
															+	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	switch (ecc->function) {
														
 
															+	case CCP_ECC_FUNCTION_MMUL_384BIT:
														
 
															+	case CCP_ECC_FUNCTION_MADD_384BIT:
														
 
															+	case CCP_ECC_FUNCTION_MINV_384BIT:
														
 
															+		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
														
 
															+
														
 
															+	case CCP_ECC_FUNCTION_PADD_384BIT:
														
 
															+	case CCP_ECC_FUNCTION_PMUL_384BIT:
														
 
															+	case CCP_ECC_FUNCTION_PDBL_384BIT:
														
 
															+		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
														
 
															+
														
 
															+	default:
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	cmd->engine_error = 0;
														
 
															+	cmd_q->cmd_error = 0;
														
 
															+	cmd_q->int_rcvd = 0;
														
 
															+	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
														
 
															+
														
 
															+	switch (cmd->engine) {
														
 
															+	case CCP_ENGINE_AES:
														
 
															+		ret = ccp_run_aes_cmd(cmd_q, cmd);
														
 
															+		break;
														
 
															+	case CCP_ENGINE_XTS_AES_128:
														
 
															+		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
														
 
															+		break;
														
 
															+	case CCP_ENGINE_SHA:
														
 
															+		ret = ccp_run_sha_cmd(cmd_q, cmd);
														
 
															+		break;
														
 
															+	case CCP_ENGINE_RSA:
														
 
															+		ret = ccp_run_rsa_cmd(cmd_q, cmd);
														
 
															+		break;
														
 
															+	case CCP_ENGINE_PASSTHRU:
														
 
															+		ret = ccp_run_passthru_cmd(cmd_q, cmd);
														
 
															+		break;
														
 
															+	case CCP_ENGINE_ECC:
														
 
															+		ret = ccp_run_ecc_cmd(cmd_q, cmd);
														
 
															+		break;
														
 
															+	default:
														
 
															+		ret = -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
--- a/drivers/crypto/ccp/ccp-pci.c
+++ b/drivers/crypto/ccp/ccp-pci.c
@@ -0,0 +1,361 @@
 
															+/*
														
 
															+ * AMD Cryptographic Coprocessor (CCP) driver
														
 
															+ *
														
 
															+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
														
 
															+ *
														
 
															+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/pci.h>
														
 
															+#include <linux/pci_ids.h>
														
 
															+#include <linux/kthread.h>
														
 
															+#include <linux/sched.h>
														
 
															+#include <linux/interrupt.h>
														
 
															+#include <linux/spinlock.h>
														
 
															+#include <linux/delay.h>
														
 
															+#include <linux/ccp.h>
														
 
															+
														
 
															+#include "ccp-dev.h"
														
 
															+
														
 
															+#define IO_BAR				2
														
 
															+#define MSIX_VECTORS			2
														
 
															+
														
 
															+struct ccp_msix {
														
 
															+	u32 vector;
														
 
															+	char name[16];
														
 
															+};
														
 
															+
														
 
															+struct ccp_pci {
														
 
															+	int msix_count;
														
 
															+	struct ccp_msix msix[MSIX_VECTORS];
														
 
															+};
														
 
															+
														
 
															+static int ccp_get_msix_irqs(struct ccp_device *ccp)
														
 
															+{
														
 
															+	struct ccp_pci *ccp_pci = ccp->dev_specific;
														
 
															+	struct device *dev = ccp->dev;
														
 
															+	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
														
 
															+	struct msix_entry msix_entry[MSIX_VECTORS];
														
 
															+	unsigned int name_len = sizeof(ccp_pci->msix[0].name) - 1;
														
 
															+	int v, ret;
														
 
															+
														
 
															+	for (v = 0; v < ARRAY_SIZE(msix_entry); v++)
														
 
															+		msix_entry[v].entry = v;
														
 
															+
														
 
															+	while ((ret = pci_enable_msix(pdev, msix_entry, v)) > 0)
														
 
															+		v = ret;
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	ccp_pci->msix_count = v;
														
 
															+	for (v = 0; v < ccp_pci->msix_count; v++) {
														
 
															+		/* Set the interrupt names and request the irqs */
														
 
															+		snprintf(ccp_pci->msix[v].name, name_len, "ccp-%u", v);
														
 
															+		ccp_pci->msix[v].vector = msix_entry[v].vector;
														
 
															+		ret = request_irq(ccp_pci->msix[v].vector, ccp_irq_handler,
														
 
															+				  0, ccp_pci->msix[v].name, dev);
														
 
															+		if (ret) {
														
 
															+			dev_notice(dev, "unable to allocate MSI-X IRQ (%d)\n",
														
 
															+				   ret);
														
 
															+			goto e_irq;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+e_irq:
														
 
															+	while (v--)
														
 
															+		free_irq(ccp_pci->msix[v].vector, dev);
														
 
															+
														
 
															+	pci_disable_msix(pdev);
														
 
															+
														
 
															+	ccp_pci->msix_count = 0;
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_get_msi_irq(struct ccp_device *ccp)
														
 
															+{
														
 
															+	struct device *dev = ccp->dev;
														
 
															+	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = pci_enable_msi(pdev);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	ret = request_irq(pdev->irq, ccp_irq_handler, 0, "ccp", dev);
														
 
															+	if (ret) {
														
 
															+		dev_notice(dev, "unable to allocate MSI IRQ (%d)\n", ret);
														
 
															+		goto e_msi;
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+e_msi:
														
 
															+	pci_disable_msi(pdev);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int ccp_get_irqs(struct ccp_device *ccp)
														
 
															+{
														
 
															+	struct device *dev = ccp->dev;
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = ccp_get_msix_irqs(ccp);
														
 
															+	if (!ret)
														
 
															+		return 0;
														
 
															+
														
 
															+	/* Couldn't get MSI-X vectors, try MSI */
														
 
															+	dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
														
 
															+	ret = ccp_get_msi_irq(ccp);
														
 
															+	if (!ret)
														
 
															+		return 0;
														
 
															+
														
 
															+	/* Couldn't get MSI interrupt */
														
 
															+	dev_notice(dev, "could not enable MSI (%d)\n", ret);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static void ccp_free_irqs(struct ccp_device *ccp)
														
 
															+{
														
 
															+	struct ccp_pci *ccp_pci = ccp->dev_specific;
														
 
															+	struct device *dev = ccp->dev;
														
 
															+	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
														
 
															+
														
 
															+	if (ccp_pci->msix_count) {
														
 
															+		while (ccp_pci->msix_count--)
														
 
															+			free_irq(ccp_pci->msix[ccp_pci->msix_count].vector,
														
 
															+				 dev);
														
 
															+		pci_disable_msix(pdev);
														
 
															+	} else {
														
 
															+		free_irq(pdev->irq, dev);
														
 
															+		pci_disable_msi(pdev);
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+static int ccp_find_mmio_area(struct ccp_device *ccp)
														
 
															+{
														
 
															+	struct device *dev = ccp->dev;
														
 
															+	struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
														
 
															+	resource_size_t io_len;
														
 
															+	unsigned long io_flags;
														
 
															+	int bar;
														
 
															+
														
 
															+	io_flags = pci_resource_flags(pdev, IO_BAR);
														
 
															+	io_len = pci_resource_len(pdev, IO_BAR);
														
 
															+	if ((io_flags & IORESOURCE_MEM) && (io_len >= (IO_OFFSET + 0x800)))
														
 
															+		return IO_BAR;
														
 
															+
														
 
															+	for (bar = 0; bar < PCI_STD_RESOURCE_END; bar++) {
														
 
															+		io_flags = pci_resource_flags(pdev, bar);
														
 
															+		io_len = pci_resource_len(pdev, bar);
														
 
															+		if ((io_flags & IORESOURCE_MEM) &&
														
 
															+		    (io_len >= (IO_OFFSET + 0x800)))
														
 
															+			return bar;
														
 
															+	}
														
 
															+
														
 
															+	return -EIO;
														
 
															+}
														
 
															+
														
 
															+static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
														
 
															+{
														
 
															+	struct ccp_device *ccp;
														
 
															+	struct ccp_pci *ccp_pci;
														
 
															+	struct device *dev = &pdev->dev;
														
 
															+	unsigned int bar;
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = -ENOMEM;
														
 
															+	ccp = ccp_alloc_struct(dev);
														
 
															+	if (!ccp)
														
 
															+		goto e_err;
														
 
															+
														
 
															+	ccp_pci = kzalloc(sizeof(*ccp_pci), GFP_KERNEL);
														
 
															+	if (!ccp_pci) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto e_free1;
														
 
															+	}
														
 
															+	ccp->dev_specific = ccp_pci;
														
 
															+	ccp->get_irq = ccp_get_irqs;
														
 
															+	ccp->free_irq = ccp_free_irqs;
														
 
															+
														
 
															+	ret = pci_request_regions(pdev, "ccp");
														
 
															+	if (ret) {
														
 
															+		dev_err(dev, "pci_request_regions failed (%d)\n", ret);
														
 
															+		goto e_free2;
														
 
															+	}
														
 
															+
														
 
															+	ret = pci_enable_device(pdev);
														
 
															+	if (ret) {
														
 
															+		dev_err(dev, "pci_enable_device failed (%d)\n", ret);
														
 
															+		goto e_regions;
														
 
															+	}
														
 
															+
														
 
															+	pci_set_master(pdev);
														
 
															+
														
 
															+	ret = ccp_find_mmio_area(ccp);
														
 
															+	if (ret < 0)
														
 
															+		goto e_device;
														
 
															+	bar = ret;
														
 
															+
														
 
															+	ret = -EIO;
														
 
															+	ccp->io_map = pci_iomap(pdev, bar, 0);
														
 
															+	if (ccp->io_map == NULL) {
														
 
															+		dev_err(dev, "pci_iomap failed\n");
														
 
															+		goto e_device;
														
 
															+	}
														
 
															+	ccp->io_regs = ccp->io_map + IO_OFFSET;
														
 
															+
														
 
															+	ret = dma_set_mask(dev, DMA_BIT_MASK(48));
														
 
															+	if (ret == 0) {
														
 
															+		ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(48));
														
 
															+		if (ret) {
														
 
															+			dev_err(dev,
														
 
															+				"pci_set_consistent_dma_mask failed (%d)\n",
														
 
															+				ret);
														
 
															+			goto e_bar0;
														
 
															+		}
														
 
															+	} else {
														
 
															+		ret = dma_set_mask(dev, DMA_BIT_MASK(32));
														
 
															+		if (ret) {
														
 
															+			dev_err(dev, "pci_set_dma_mask failed (%d)\n", ret);
														
 
															+			goto e_bar0;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	dev_set_drvdata(dev, ccp);
														
 
															+
														
 
															+	ret = ccp_init(ccp);
														
 
															+	if (ret)
														
 
															+		goto e_bar0;
														
 
															+
														
 
															+	dev_notice(dev, "enabled\n");
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+e_bar0:
														
 
															+	pci_iounmap(pdev, ccp->io_map);
														
 
															+
														
 
															+e_device:
														
 
															+	pci_disable_device(pdev);
														
 
															+
														
 
															+e_regions:
														
 
															+	pci_release_regions(pdev);
														
 
															+
														
 
															+e_free2:
														
 
															+	kfree(ccp_pci);
														
 
															+
														
 
															+e_free1:
														
 
															+	kfree(ccp);
														
 
															+
														
 
															+e_err:
														
 
															+	dev_notice(dev, "initialization failed\n");
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static void ccp_pci_remove(struct pci_dev *pdev)
														
 
															+{
														
 
															+	struct device *dev = &pdev->dev;
														
 
															+	struct ccp_device *ccp = dev_get_drvdata(dev);
														
 
															+
														
 
															+	if (!ccp)
														
 
															+		return;
														
 
															+
														
 
															+	ccp_destroy(ccp);
														
 
															+
														
 
															+	pci_iounmap(pdev, ccp->io_map);
														
 
															+
														
 
															+	pci_disable_device(pdev);
														
 
															+
														
 
															+	pci_release_regions(pdev);
														
 
															+
														
 
															+	kfree(ccp);
														
 
															+
														
 
															+	dev_notice(dev, "disabled\n");
														
 
															+}
														
 
															+
														
 
															+#ifdef CONFIG_PM
														
 
															+static int ccp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
														
 
															+{
														
 
															+	struct device *dev = &pdev->dev;
														
 
															+	struct ccp_device *ccp = dev_get_drvdata(dev);
														
 
															+	unsigned long flags;
														
 
															+	unsigned int i;
														
 
															+
														
 
															+	spin_lock_irqsave(&ccp->cmd_lock, flags);
														
 
															+
														
 
															+	ccp->suspending = 1;
														
 
															+
														
 
															+	/* Wake all the queue kthreads to prepare for suspend */
														
 
															+	for (i = 0; i < ccp->cmd_q_count; i++)
														
 
															+		wake_up_process(ccp->cmd_q[i].kthread);
														
 
															+
														
 
															+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
														
 
															+
														
 
															+	/* Wait for all queue kthreads to say they're done */
														
 
															+	while (!ccp_queues_suspended(ccp))
														
 
															+		wait_event_interruptible(ccp->suspend_queue,
														
 
															+					 ccp_queues_suspended(ccp));
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int ccp_pci_resume(struct pci_dev *pdev)
														
 
															+{
														
 
															+	struct device *dev = &pdev->dev;
														
 
															+	struct ccp_device *ccp = dev_get_drvdata(dev);
														
 
															+	unsigned long flags;
														
 
															+	unsigned int i;
														
 
															+
														
 
															+	spin_lock_irqsave(&ccp->cmd_lock, flags);
														
 
															+
														
 
															+	ccp->suspending = 0;
														
 
															+
														
 
															+	/* Wake up all the kthreads */
														
 
															+	for (i = 0; i < ccp->cmd_q_count; i++) {
														
 
															+		ccp->cmd_q[i].suspended = 0;
														
 
															+		wake_up_process(ccp->cmd_q[i].kthread);
														
 
															+	}
														
 
															+
														
 
															+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+static DEFINE_PCI_DEVICE_TABLE(ccp_pci_table) = {
														
 
															+	{ PCI_VDEVICE(AMD, 0x1537), },
														
 
															+	/* Last entry must be zero */
														
 
															+	{ 0, }
														
 
															+};
														
 
															+MODULE_DEVICE_TABLE(pci, ccp_pci_table);
														
 
															+
														
 
															+static struct pci_driver ccp_pci_driver = {
														
 
															+	.name = "AMD Cryptographic Coprocessor",
														
 
															+	.id_table = ccp_pci_table,
														
 
															+	.probe = ccp_pci_probe,
														
 
															+	.remove = ccp_pci_remove,
														
 
															+#ifdef CONFIG_PM
														
 
															+	.suspend = ccp_pci_suspend,
														
 
															+	.resume = ccp_pci_resume,
														
 
															+#endif
														
 
															+};
														
 
															+
														
 
															+int ccp_pci_init(void)
														
 
															+{
														
 
															+	return pci_register_driver(&ccp_pci_driver);
														
 
															+}
														
 
															+
														
 
															+void ccp_pci_exit(void)
														
 
															+{
														
 
															+	pci_unregister_driver(&ccp_pci_driver);
														
 
															+}
														
--- a/drivers/crypto/dcp.c
+++ b/drivers/crypto/dcp.c
@@ -1,903 +0,0 @@
 
															-/*
														
 
															- * Cryptographic API.
														
 
															- *
														
 
															- * Support for DCP cryptographic accelerator.
														
 
															- *
														
 
															- * Copyright (c) 2013
														
 
															- * Author: Tobias Rauter <tobias.rauter@gmail.com>
														
 
															- *
														
 
															- * This program is free software; you can redistribute it and/or modify
														
 
															- * it under the terms of the GNU General Public License version 2 as published
														
 
															- * by the Free Software Foundation.
														
 
															- *
														
 
															- * Based on tegra-aes.c, dcp.c (from freescale SDK) and sahara.c
														
 
															- */
														
 
															-#include <linux/module.h>
														
 
															-#include <linux/init.h>
														
 
															-#include <linux/errno.h>
														
 
															-#include <linux/kernel.h>
														
 
															-#include <linux/platform_device.h>
														
 
															-#include <linux/dma-mapping.h>
														
 
															-#include <linux/io.h>
														
 
															-#include <linux/mutex.h>
														
 
															-#include <linux/interrupt.h>
														
 
															-#include <linux/completion.h>
														
 
															-#include <linux/workqueue.h>
														
 
															-#include <linux/delay.h>
														
 
															-#include <linux/crypto.h>
														
 
															-#include <linux/miscdevice.h>
														
 
															-
														
 
															-#include <crypto/scatterwalk.h>
														
 
															-#include <crypto/aes.h>
														
 
															-
														
 
															-
														
 
															-/* IOCTL for DCP OTP Key AES - taken from Freescale's SDK*/
														
 
															-#define DBS_IOCTL_BASE   'd'
														
 
															-#define DBS_ENC	_IOW(DBS_IOCTL_BASE, 0x00, uint8_t[16])
														
 
															-#define DBS_DEC _IOW(DBS_IOCTL_BASE, 0x01, uint8_t[16])
														
 
															-
														
 
															-/* DCP channel used for AES */
														
 
															-#define USED_CHANNEL 1
														
 
															-/* Ring Buffers' maximum size */
														
 
															-#define DCP_MAX_PKG 20
														
 
															-
														
 
															-/* Control Register */
														
 
															-#define DCP_REG_CTRL 0x000
														
 
															-#define DCP_CTRL_SFRST (1<<31)
														
 
															-#define DCP_CTRL_CLKGATE (1<<30)
														
 
															-#define DCP_CTRL_CRYPTO_PRESENT (1<<29)
														
 
															-#define DCP_CTRL_SHA_PRESENT (1<<28)
														
 
															-#define DCP_CTRL_GATHER_RES_WRITE (1<<23)
														
 
															-#define DCP_CTRL_ENABLE_CONTEXT_CACHE (1<<22)
														
 
															-#define DCP_CTRL_ENABLE_CONTEXT_SWITCH (1<<21)
														
 
															-#define DCP_CTRL_CH_IRQ_E_0 0x01
														
 
															-#define DCP_CTRL_CH_IRQ_E_1 0x02
														
 
															-#define DCP_CTRL_CH_IRQ_E_2 0x04
														
 
															-#define DCP_CTRL_CH_IRQ_E_3 0x08
														
 
															-
														
 
															-/* Status register */
														
 
															-#define DCP_REG_STAT 0x010
														
 
															-#define DCP_STAT_OTP_KEY_READY (1<<28)
														
 
															-#define DCP_STAT_CUR_CHANNEL(stat) ((stat>>24)&0x0F)
														
 
															-#define DCP_STAT_READY_CHANNEL(stat) ((stat>>16)&0x0F)
														
 
															-#define DCP_STAT_IRQ(stat) (stat&0x0F)
														
 
															-#define DCP_STAT_CHAN_0 (0x01)
														
 
															-#define DCP_STAT_CHAN_1 (0x02)
														
 
															-#define DCP_STAT_CHAN_2 (0x04)
														
 
															-#define DCP_STAT_CHAN_3 (0x08)
														
 
															-
														
 
															-/* Channel Control Register */
														
 
															-#define DCP_REG_CHAN_CTRL 0x020
														
 
															-#define DCP_CHAN_CTRL_CH0_IRQ_MERGED (1<<16)
														
 
															-#define DCP_CHAN_CTRL_HIGH_PRIO_0 (0x0100)
														
 
															-#define DCP_CHAN_CTRL_HIGH_PRIO_1 (0x0200)
														
 
															-#define DCP_CHAN_CTRL_HIGH_PRIO_2 (0x0400)
														
 
															-#define DCP_CHAN_CTRL_HIGH_PRIO_3 (0x0800)
														
 
															-#define DCP_CHAN_CTRL_ENABLE_0 (0x01)
														
 
															-#define DCP_CHAN_CTRL_ENABLE_1 (0x02)
														
 
															-#define DCP_CHAN_CTRL_ENABLE_2 (0x04)
														
 
															-#define DCP_CHAN_CTRL_ENABLE_3 (0x08)
														
 
															-
														
 
															-/*
														
 
															- * Channel Registers:
														
 
															- * The DCP has 4 channels. Each of this channels
														
 
															- * has 4 registers (command pointer, semaphore, status and options).
														
 
															- * The address of register REG of channel CHAN is obtained by
														
 
															- * dcp_chan_reg(REG, CHAN)
														
 
															- */
														
 
															-#define DCP_REG_CHAN_PTR	0x00000100
														
 
															-#define DCP_REG_CHAN_SEMA	0x00000110
														
 
															-#define DCP_REG_CHAN_STAT	0x00000120
														
 
															-#define DCP_REG_CHAN_OPT	0x00000130
														
 
															-
														
 
															-#define DCP_CHAN_STAT_NEXT_CHAIN_IS_0	0x010000
														
 
															-#define DCP_CHAN_STAT_NO_CHAIN		0x020000
														
 
															-#define DCP_CHAN_STAT_CONTEXT_ERROR	0x030000
														
 
															-#define DCP_CHAN_STAT_PAYLOAD_ERROR	0x040000
														
 
															-#define DCP_CHAN_STAT_INVALID_MODE	0x050000
														
 
															-#define DCP_CHAN_STAT_PAGEFAULT		0x40
														
 
															-#define DCP_CHAN_STAT_DST		0x20
														
 
															-#define DCP_CHAN_STAT_SRC		0x10
														
 
															-#define DCP_CHAN_STAT_PACKET		0x08
														
 
															-#define DCP_CHAN_STAT_SETUP		0x04
														
 
															-#define DCP_CHAN_STAT_MISMATCH		0x02
														
 
															-
														
 
															-/* hw packet control*/
														
 
															-
														
 
															-#define DCP_PKT_PAYLOAD_KEY	(1<<11)
														
 
															-#define DCP_PKT_OTP_KEY		(1<<10)
														
 
															-#define DCP_PKT_CIPHER_INIT	(1<<9)
														
 
															-#define DCP_PKG_CIPHER_ENCRYPT	(1<<8)
														
 
															-#define DCP_PKT_CIPHER_ENABLE	(1<<5)
														
 
															-#define DCP_PKT_DECR_SEM	(1<<1)
														
 
															-#define DCP_PKT_CHAIN		(1<<2)
														
 
															-#define DCP_PKT_IRQ		1
														
 
															-
														
 
															-#define DCP_PKT_MODE_CBC	(1<<4)
														
 
															-#define DCP_PKT_KEYSELECT_OTP	(0xFF<<8)
														
 
															-
														
 
															-/* cipher flags */
														
 
															-#define DCP_ENC		0x0001
														
 
															-#define DCP_DEC		0x0002
														
 
															-#define DCP_ECB		0x0004
														
 
															-#define DCP_CBC		0x0008
														
 
															-#define DCP_CBC_INIT	0x0010
														
 
															-#define DCP_NEW_KEY	0x0040
														
 
															-#define DCP_OTP_KEY	0x0080
														
 
															-#define DCP_AES		0x1000
														
 
															-
														
 
															-/* DCP Flags */
														
 
															-#define DCP_FLAG_BUSY	0x01
														
 
															-#define DCP_FLAG_PRODUCING	0x02
														
 
															-
														
 
															-/* clock defines */
														
 
															-#define CLOCK_ON	1
														
 
															-#define CLOCK_OFF	0
														
 
															-
														
 
															-struct dcp_dev_req_ctx {
														
 
															-	int mode;
														
 
															-};
														
 
															-
														
 
															-struct dcp_op {
														
 
															-	unsigned int		flags;
														
 
															-	u8			key[AES_KEYSIZE_128];
														
 
															-	int			keylen;
														
 
															-
														
 
															-	struct ablkcipher_request	*req;
														
 
															-	struct crypto_ablkcipher	*fallback;
														
 
															-
														
 
															-	uint32_t stat;
														
 
															-	uint32_t pkt1;
														
 
															-	uint32_t pkt2;
														
 
															-	struct ablkcipher_walk walk;
														
 
															-};
														
 
															-
														
 
															-struct dcp_dev {
														
 
															-	struct device *dev;
														
 
															-	void __iomem *dcp_regs_base;
														
 
															-
														
 
															-	int dcp_vmi_irq;
														
 
															-	int dcp_irq;
														
 
															-
														
 
															-	spinlock_t queue_lock;
														
 
															-	struct crypto_queue queue;
														
 
															-
														
 
															-	uint32_t pkt_produced;
														
 
															-	uint32_t pkt_consumed;
														
 
															-
														
 
															-	struct dcp_hw_packet *hw_pkg[DCP_MAX_PKG];
														
 
															-	dma_addr_t hw_phys_pkg;
														
 
															-
														
 
															-	/* [KEY][IV] Both with 16 Bytes */
														
 
															-	u8 *payload_base;
														
 
															-	dma_addr_t payload_base_dma;
														
 
															-
														
 
															-
														
 
															-	struct tasklet_struct	done_task;
														
 
															-	struct tasklet_struct	queue_task;
														
 
															-	struct timer_list	watchdog;
														
 
															-
														
 
															-	unsigned long		flags;
														
 
															-
														
 
															-	struct dcp_op *ctx;
														
 
															-
														
 
															-	struct miscdevice dcp_bootstream_misc;
														
 
															-};
														
 
															-
														
 
															-struct dcp_hw_packet {
														
 
															-	uint32_t next;
														
 
															-	uint32_t pkt1;
														
 
															-	uint32_t pkt2;
														
 
															-	uint32_t src;
														
 
															-	uint32_t dst;
														
 
															-	uint32_t size;
														
 
															-	uint32_t payload;
														
 
															-	uint32_t stat;
														
 
															-};
														
 
															-
														
 
															-static struct dcp_dev *global_dev;
														
 
															-
														
 
															-static inline u32 dcp_chan_reg(u32 reg, int chan)
														
 
															-{
														
 
															-	return reg + (chan) * 0x40;
														
 
															-}
														
 
															-
														
 
															-static inline void dcp_write(struct dcp_dev *dev, u32 data, u32 reg)
														
 
															-{
														
 
															-	writel(data, dev->dcp_regs_base + reg);
														
 
															-}
														
 
															-
														
 
															-static inline void dcp_set(struct dcp_dev *dev, u32 data, u32 reg)
														
 
															-{
														
 
															-	writel(data, dev->dcp_regs_base + (reg | 0x04));
														
 
															-}
														
 
															-
														
 
															-static inline void dcp_clear(struct dcp_dev *dev, u32 data, u32 reg)
														
 
															-{
														
 
															-	writel(data, dev->dcp_regs_base + (reg | 0x08));
														
 
															-}
														
 
															-
														
 
															-static inline void dcp_toggle(struct dcp_dev *dev, u32 data, u32 reg)
														
 
															-{
														
 
															-	writel(data, dev->dcp_regs_base + (reg | 0x0C));
														
 
															-}
														
 
															-
														
 
															-static inline unsigned int dcp_read(struct dcp_dev *dev, u32 reg)
														
 
															-{
														
 
															-	return readl(dev->dcp_regs_base + reg);
														
 
															-}
														
 
															-
														
 
															-static void dcp_dma_unmap(struct dcp_dev *dev, struct dcp_hw_packet *pkt)
														
 
															-{
														
 
															-	dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE);
														
 
															-	dma_unmap_page(dev->dev, pkt->dst, pkt->size, DMA_FROM_DEVICE);
														
 
															-	dev_dbg(dev->dev, "unmap packet %x", (unsigned int) pkt);
														
 
															-}
														
 
															-
														
 
															-static int dcp_dma_map(struct dcp_dev *dev,
														
 
															-	struct ablkcipher_walk *walk, struct dcp_hw_packet *pkt)
														
 
															-{
														
 
															-	dev_dbg(dev->dev, "map packet %x", (unsigned int) pkt);
														
 
															-	/* align to length = 16 */
														
 
															-	pkt->size = walk->nbytes - (walk->nbytes % 16);
														
 
															-
														
 
															-	pkt->src = dma_map_page(dev->dev, walk->src.page, walk->src.offset,
														
 
															-		pkt->size, DMA_TO_DEVICE);
														
 
															-
														
 
															-	if (pkt->src == 0) {
														
 
															-		dev_err(dev->dev, "Unable to map src");
														
 
															-		return -ENOMEM;
														
 
															-	}
														
 
															-
														
 
															-	pkt->dst = dma_map_page(dev->dev, walk->dst.page, walk->dst.offset,
														
 
															-		pkt->size, DMA_FROM_DEVICE);
														
 
															-
														
 
															-	if (pkt->dst == 0) {
														
 
															-		dev_err(dev->dev, "Unable to map dst");
														
 
															-		dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE);
														
 
															-		return -ENOMEM;
														
 
															-	}
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void dcp_op_one(struct dcp_dev *dev, struct dcp_hw_packet *pkt,
														
 
															-			uint8_t last)
														
 
															-{
														
 
															-	struct dcp_op *ctx = dev->ctx;
														
 
															-	pkt->pkt1 = ctx->pkt1;
														
 
															-	pkt->pkt2 = ctx->pkt2;
														
 
															-
														
 
															-	pkt->payload = (u32) dev->payload_base_dma;
														
 
															-	pkt->stat = 0;
														
 
															-
														
 
															-	if (ctx->flags & DCP_CBC_INIT) {
														
 
															-		pkt->pkt1 |= DCP_PKT_CIPHER_INIT;
														
 
															-		ctx->flags &= ~DCP_CBC_INIT;
														
 
															-	}
														
 
															-
														
 
															-	mod_timer(&dev->watchdog, jiffies + msecs_to_jiffies(500));
														
 
															-	pkt->pkt1 |= DCP_PKT_IRQ;
														
 
															-	if (!last)
														
 
															-		pkt->pkt1 |= DCP_PKT_CHAIN;
														
 
															-
														
 
															-	dev->pkt_produced++;
														
 
															-
														
 
															-	dcp_write(dev, 1,
														
 
															-		dcp_chan_reg(DCP_REG_CHAN_SEMA, USED_CHANNEL));
														
 
															-}
														
 
															-
														
 
															-static void dcp_op_proceed(struct dcp_dev *dev)
														
 
															-{
														
 
															-	struct dcp_op *ctx = dev->ctx;
														
 
															-	struct dcp_hw_packet *pkt;
														
 
															-
														
 
															-	while (ctx->walk.nbytes) {
														
 
															-		int err = 0;
														
 
															-
														
 
															-		pkt = dev->hw_pkg[dev->pkt_produced % DCP_MAX_PKG];
														
 
															-		err = dcp_dma_map(dev, &ctx->walk, pkt);
														
 
															-		if (err) {
														
 
															-			dev->ctx->stat |= err;
														
 
															-			/* start timer to wait for already set up calls */
														
 
															-			mod_timer(&dev->watchdog,
														
 
															-				jiffies + msecs_to_jiffies(500));
														
 
															-			break;
														
 
															-		}
														
 
															-
														
 
															-
														
 
															-		err = ctx->walk.nbytes - pkt->size;
														
 
															-		ablkcipher_walk_done(dev->ctx->req, &dev->ctx->walk, err);
														
 
															-
														
 
															-		dcp_op_one(dev, pkt, ctx->walk.nbytes == 0);
														
 
															-		/* we have to wait if no space is left in buffer */
														
 
															-		if (dev->pkt_produced - dev->pkt_consumed == DCP_MAX_PKG)
														
 
															-			break;
														
 
															-	}
														
 
															-	clear_bit(DCP_FLAG_PRODUCING, &dev->flags);
														
 
															-}
														
 
															-
														
 
															-static void dcp_op_start(struct dcp_dev *dev, uint8_t use_walk)
														
 
															-{
														
 
															-	struct dcp_op *ctx = dev->ctx;
														
 
															-
														
 
															-	if (ctx->flags & DCP_NEW_KEY) {
														
 
															-		memcpy(dev->payload_base, ctx->key, ctx->keylen);
														
 
															-		ctx->flags &= ~DCP_NEW_KEY;
														
 
															-	}
														
 
															-
														
 
															-	ctx->pkt1 = 0;
														
 
															-	ctx->pkt1 |= DCP_PKT_CIPHER_ENABLE;
														
 
															-	ctx->pkt1 |= DCP_PKT_DECR_SEM;
														
 
															-
														
 
															-	if (ctx->flags & DCP_OTP_KEY)
														
 
															-		ctx->pkt1 |= DCP_PKT_OTP_KEY;
														
 
															-	else
														
 
															-		ctx->pkt1 |= DCP_PKT_PAYLOAD_KEY;
														
 
															-
														
 
															-	if (ctx->flags & DCP_ENC)
														
 
															-		ctx->pkt1 |= DCP_PKG_CIPHER_ENCRYPT;
														
 
															-
														
 
															-	ctx->pkt2 = 0;
														
 
															-	if (ctx->flags & DCP_CBC)
														
 
															-		ctx->pkt2 |= DCP_PKT_MODE_CBC;
														
 
															-
														
 
															-	dev->pkt_produced = 0;
														
 
															-	dev->pkt_consumed = 0;
														
 
															-
														
 
															-	ctx->stat = 0;
														
 
															-	dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
														
 
															-	dcp_write(dev, (u32) dev->hw_phys_pkg,
														
 
															-		dcp_chan_reg(DCP_REG_CHAN_PTR, USED_CHANNEL));
														
 
															-
														
 
															-	set_bit(DCP_FLAG_PRODUCING, &dev->flags);
														
 
															-
														
 
															-	if (use_walk) {
														
 
															-		ablkcipher_walk_init(&ctx->walk, ctx->req->dst,
														
 
															-				ctx->req->src, ctx->req->nbytes);
														
 
															-		ablkcipher_walk_phys(ctx->req, &ctx->walk);
														
 
															-		dcp_op_proceed(dev);
														
 
															-	} else {
														
 
															-		dcp_op_one(dev, dev->hw_pkg[0], 1);
														
 
															-		clear_bit(DCP_FLAG_PRODUCING, &dev->flags);
														
 
															-	}
														
 
															-}
														
 
															-
														
 
															-static void dcp_done_task(unsigned long data)
														
 
															-{
														
 
															-	struct dcp_dev *dev = (struct dcp_dev *)data;
														
 
															-	struct dcp_hw_packet *last_packet;
														
 
															-	int fin;
														
 
															-	fin = 0;
														
 
															-
														
 
															-	for (last_packet = dev->hw_pkg[(dev->pkt_consumed) % DCP_MAX_PKG];
														
 
															-		last_packet->stat == 1;
														
 
															-		last_packet =
														
 
															-			dev->hw_pkg[++(dev->pkt_consumed) % DCP_MAX_PKG]) {
														
 
															-
														
 
															-		dcp_dma_unmap(dev, last_packet);
														
 
															-		last_packet->stat = 0;
														
 
															-		fin++;
														
 
															-	}
														
 
															-	/* the last call of this function already consumed this IRQ's packet */
														
 
															-	if (fin == 0)
														
 
															-		return;
														
 
															-
														
 
															-	dev_dbg(dev->dev,
														
 
															-		"Packet(s) done with status %x; finished: %d, produced:%d, complete consumed: %d",
														
 
															-		dev->ctx->stat, fin, dev->pkt_produced, dev->pkt_consumed);
														
 
															-
														
 
															-	last_packet = dev->hw_pkg[(dev->pkt_consumed - 1) % DCP_MAX_PKG];
														
 
															-	if (!dev->ctx->stat && last_packet->pkt1 & DCP_PKT_CHAIN) {
														
 
															-		if (!test_and_set_bit(DCP_FLAG_PRODUCING, &dev->flags))
														
 
															-			dcp_op_proceed(dev);
														
 
															-		return;
														
 
															-	}
														
 
															-
														
 
															-	while (unlikely(dev->pkt_consumed < dev->pkt_produced)) {
														
 
															-		dcp_dma_unmap(dev,
														
 
															-			dev->hw_pkg[dev->pkt_consumed++ % DCP_MAX_PKG]);
														
 
															-	}
														
 
															-
														
 
															-	if (dev->ctx->flags & DCP_OTP_KEY) {
														
 
															-		/* we used the miscdevice, no walk to finish */
														
 
															-		clear_bit(DCP_FLAG_BUSY, &dev->flags);
														
 
															-		return;
														
 
															-	}
														
 
															-
														
 
															-	ablkcipher_walk_complete(&dev->ctx->walk);
														
 
															-	dev->ctx->req->base.complete(&dev->ctx->req->base,
														
 
															-			dev->ctx->stat);
														
 
															-	dev->ctx->req = NULL;
														
 
															-	/* in case there are other requests in the queue */
														
 
															-	tasklet_schedule(&dev->queue_task);
														
 
															-}
														
 
															-
														
 
															-static void dcp_watchdog(unsigned long data)
														
 
															-{
														
 
															-	struct dcp_dev *dev = (struct dcp_dev *)data;
														
 
															-	dev->ctx->stat |= dcp_read(dev,
														
 
															-			dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
														
 
															-
														
 
															-	dev_err(dev->dev, "Timeout, Channel status: %x", dev->ctx->stat);
														
 
															-
														
 
															-	if (!dev->ctx->stat)
														
 
															-		dev->ctx->stat = -ETIMEDOUT;
														
 
															-
														
 
															-	dcp_done_task(data);
														
 
															-}
														
 
															-
														
 
															-
														
 
															-static irqreturn_t dcp_common_irq(int irq, void *context)
														
 
															-{
														
 
															-	u32 msk;
														
 
															-	struct dcp_dev *dev = (struct dcp_dev *) context;
														
 
															-
														
 
															-	del_timer(&dev->watchdog);
														
 
															-
														
 
															-	msk = DCP_STAT_IRQ(dcp_read(dev, DCP_REG_STAT));
														
 
															-	dcp_clear(dev, msk, DCP_REG_STAT);
														
 
															-	if (msk == 0)
														
 
															-		return IRQ_NONE;
														
 
															-
														
 
															-	dev->ctx->stat |= dcp_read(dev,
														
 
															-			dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
														
 
															-
														
 
															-	if (msk & DCP_STAT_CHAN_1)
														
 
															-		tasklet_schedule(&dev->done_task);
														
 
															-
														
 
															-	return IRQ_HANDLED;
														
 
															-}
														
 
															-
														
 
															-static irqreturn_t dcp_vmi_irq(int irq, void *context)
														
 
															-{
														
 
															-	return dcp_common_irq(irq, context);
														
 
															-}
														
 
															-
														
 
															-static irqreturn_t dcp_irq(int irq, void *context)
														
 
															-{
														
 
															-	return dcp_common_irq(irq, context);
														
 
															-}
														
 
															-
														
 
															-static void dcp_crypt(struct dcp_dev *dev, struct dcp_op *ctx)
														
 
															-{
														
 
															-	dev->ctx = ctx;
														
 
															-
														
 
															-	if ((ctx->flags & DCP_CBC) && ctx->req->info) {
														
 
															-		ctx->flags |= DCP_CBC_INIT;
														
 
															-		memcpy(dev->payload_base + AES_KEYSIZE_128,
														
 
															-			ctx->req->info, AES_KEYSIZE_128);
														
 
															-	}
														
 
															-
														
 
															-	dcp_op_start(dev, 1);
														
 
															-}
														
 
															-
														
 
															-static void dcp_queue_task(unsigned long data)
														
 
															-{
														
 
															-	struct dcp_dev *dev = (struct dcp_dev *) data;
														
 
															-	struct crypto_async_request *async_req, *backlog;
														
 
															-	struct crypto_ablkcipher *tfm;
														
 
															-	struct dcp_op *ctx;
														
 
															-	struct dcp_dev_req_ctx *rctx;
														
 
															-	struct ablkcipher_request *req;
														
 
															-	unsigned long flags;
														
 
															-
														
 
															-	spin_lock_irqsave(&dev->queue_lock, flags);
														
 
															-
														
 
															-	backlog = crypto_get_backlog(&dev->queue);
														
 
															-	async_req = crypto_dequeue_request(&dev->queue);
														
 
															-
														
 
															-	spin_unlock_irqrestore(&dev->queue_lock, flags);
														
 
															-
														
 
															-	if (!async_req)
														
 
															-		goto ret_nothing_done;
														
 
															-
														
 
															-	if (backlog)
														
 
															-		backlog->complete(backlog, -EINPROGRESS);
														
 
															-
														
 
															-	req = ablkcipher_request_cast(async_req);
														
 
															-	tfm = crypto_ablkcipher_reqtfm(req);
														
 
															-	rctx = ablkcipher_request_ctx(req);
														
 
															-	ctx = crypto_ablkcipher_ctx(tfm);
														
 
															-
														
 
															-	if (!req->src || !req->dst)
														
 
															-		goto ret_nothing_done;
														
 
															-
														
 
															-	ctx->flags |= rctx->mode;
														
 
															-	ctx->req = req;
														
 
															-
														
 
															-	dcp_crypt(dev, ctx);
														
 
															-
														
 
															-	return;
														
 
															-
														
 
															-ret_nothing_done:
														
 
															-	clear_bit(DCP_FLAG_BUSY, &dev->flags);
														
 
															-}
														
 
															-
														
 
															-
														
 
															-static int dcp_cra_init(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	const char *name = tfm->__crt_alg->cra_name;
														
 
															-	struct dcp_op *ctx = crypto_tfm_ctx(tfm);
														
 
															-
														
 
															-	tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_dev_req_ctx);
														
 
															-
														
 
															-	ctx->fallback = crypto_alloc_ablkcipher(name, 0,
														
 
															-				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
														
 
															-
														
 
															-	if (IS_ERR(ctx->fallback)) {
														
 
															-		dev_err(global_dev->dev, "Error allocating fallback algo %s\n",
														
 
															-			name);
														
 
															-		return PTR_ERR(ctx->fallback);
														
 
															-	}
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static void dcp_cra_exit(struct crypto_tfm *tfm)
														
 
															-{
														
 
															-	struct dcp_op *ctx = crypto_tfm_ctx(tfm);
														
 
															-
														
 
															-	if (ctx->fallback)
														
 
															-		crypto_free_ablkcipher(ctx->fallback);
														
 
															-
														
 
															-	ctx->fallback = NULL;
														
 
															-}
														
 
															-
														
 
															-/* async interface */
														
 
															-static int dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
														
 
															-		unsigned int len)
														
 
															-{
														
 
															-	struct dcp_op *ctx = crypto_ablkcipher_ctx(tfm);
														
 
															-	unsigned int ret = 0;
														
 
															-	ctx->keylen = len;
														
 
															-	ctx->flags = 0;
														
 
															-	if (len == AES_KEYSIZE_128) {
														
 
															-		if (memcmp(ctx->key, key, AES_KEYSIZE_128)) {
														
 
															-			memcpy(ctx->key, key, len);
														
 
															-			ctx->flags |= DCP_NEW_KEY;
														
 
															-		}
														
 
															-		return 0;
														
 
															-	}
														
 
															-
														
 
															-	ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
														
 
															-	ctx->fallback->base.crt_flags |=
														
 
															-		(tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
														
 
															-
														
 
															-	ret = crypto_ablkcipher_setkey(ctx->fallback, key, len);
														
 
															-	if (ret) {
														
 
															-		struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm);
														
 
															-
														
 
															-		tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK;
														
 
															-		tfm_aux->crt_flags |=
														
 
															-			(ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK);
														
 
															-	}
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															-static int dcp_aes_cbc_crypt(struct ablkcipher_request *req, int mode)
														
 
															-{
														
 
															-	struct dcp_dev_req_ctx *rctx = ablkcipher_request_ctx(req);
														
 
															-	struct dcp_dev *dev = global_dev;
														
 
															-	unsigned long flags;
														
 
															-	int err = 0;
														
 
															-
														
 
															-	if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE))
														
 
															-		return -EINVAL;
														
 
															-
														
 
															-	rctx->mode = mode;
														
 
															-
														
 
															-	spin_lock_irqsave(&dev->queue_lock, flags);
														
 
															-	err = ablkcipher_enqueue_request(&dev->queue, req);
														
 
															-	spin_unlock_irqrestore(&dev->queue_lock, flags);
														
 
															-
														
 
															-	flags = test_and_set_bit(DCP_FLAG_BUSY, &dev->flags);
														
 
															-
														
 
															-	if (!(flags & DCP_FLAG_BUSY))
														
 
															-		tasklet_schedule(&dev->queue_task);
														
 
															-
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-static int dcp_aes_cbc_encrypt(struct ablkcipher_request *req)
														
 
															-{
														
 
															-	struct crypto_tfm *tfm =
														
 
															-		crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
														
 
															-	struct dcp_op *ctx = crypto_ablkcipher_ctx(
														
 
															-		crypto_ablkcipher_reqtfm(req));
														
 
															-
														
 
															-	if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
														
 
															-		int err = 0;
														
 
															-		ablkcipher_request_set_tfm(req, ctx->fallback);
														
 
															-		err = crypto_ablkcipher_encrypt(req);
														
 
															-		ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
														
 
															-		return err;
														
 
															-	}
														
 
															-
														
 
															-	return dcp_aes_cbc_crypt(req, DCP_AES | DCP_ENC | DCP_CBC);
														
 
															-}
														
 
															-
														
 
															-static int dcp_aes_cbc_decrypt(struct ablkcipher_request *req)
														
 
															-{
														
 
															-	struct crypto_tfm *tfm =
														
 
															-		crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
														
 
															-	struct dcp_op *ctx = crypto_ablkcipher_ctx(
														
 
															-		crypto_ablkcipher_reqtfm(req));
														
 
															-
														
 
															-	if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
														
 
															-		int err = 0;
														
 
															-		ablkcipher_request_set_tfm(req, ctx->fallback);
														
 
															-		err = crypto_ablkcipher_decrypt(req);
														
 
															-		ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
														
 
															-		return err;
														
 
															-	}
														
 
															-	return dcp_aes_cbc_crypt(req, DCP_AES | DCP_DEC | DCP_CBC);
														
 
															-}
														
 
															-
														
 
															-static struct crypto_alg algs[] = {
														
 
															-	{
														
 
															-		.cra_name = "cbc(aes)",
														
 
															-		.cra_driver_name = "dcp-cbc-aes",
														
 
															-		.cra_alignmask = 3,
														
 
															-		.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
														
 
															-			  CRYPTO_ALG_NEED_FALLBACK,
														
 
															-		.cra_blocksize = AES_KEYSIZE_128,
														
 
															-		.cra_type = &crypto_ablkcipher_type,
														
 
															-		.cra_priority = 300,
														
 
															-		.cra_u.ablkcipher = {
														
 
															-			.min_keysize =	AES_KEYSIZE_128,
														
 
															-			.max_keysize = AES_KEYSIZE_128,
														
 
															-			.setkey = dcp_aes_setkey,
														
 
															-			.encrypt = dcp_aes_cbc_encrypt,
														
 
															-			.decrypt = dcp_aes_cbc_decrypt,
														
 
															-			.ivsize = AES_KEYSIZE_128,
														
 
															-		}
														
 
															-
														
 
															-	},
														
 
															-};
														
 
															-
														
 
															-/* DCP bootstream verification interface: uses OTP key for crypto */
														
 
															-static int dcp_bootstream_open(struct inode *inode, struct file *file)
														
 
															-{
														
 
															-	file->private_data = container_of((file->private_data),
														
 
															-			struct dcp_dev, dcp_bootstream_misc);
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static long dcp_bootstream_ioctl(struct file *file,
														
 
															-					 unsigned int cmd, unsigned long arg)
														
 
															-{
														
 
															-	struct dcp_dev *dev = (struct dcp_dev *) file->private_data;
														
 
															-	void __user *argp = (void __user *)arg;
														
 
															-	int ret;
														
 
															-
														
 
															-	if (dev == NULL)
														
 
															-		return -EBADF;
														
 
															-
														
 
															-	if (cmd != DBS_ENC && cmd != DBS_DEC)
														
 
															-		return -EINVAL;
														
 
															-
														
 
															-	if (copy_from_user(dev->payload_base, argp, 16))
														
 
															-		return -EFAULT;
														
 
															-
														
 
															-	if (test_and_set_bit(DCP_FLAG_BUSY, &dev->flags))
														
 
															-		return -EAGAIN;
														
 
															-
														
 
															-	dev->ctx = kzalloc(sizeof(struct dcp_op), GFP_KERNEL);
														
 
															-	if (!dev->ctx) {
														
 
															-		dev_err(dev->dev,
														
 
															-			"cannot allocate context for OTP crypto");
														
 
															-		clear_bit(DCP_FLAG_BUSY, &dev->flags);
														
 
															-		return -ENOMEM;
														
 
															-	}
														
 
															-
														
 
															-	dev->ctx->flags = DCP_AES | DCP_ECB | DCP_OTP_KEY | DCP_CBC_INIT;
														
 
															-	dev->ctx->flags |= (cmd == DBS_ENC) ? DCP_ENC : DCP_DEC;
														
 
															-	dev->hw_pkg[0]->src = dev->payload_base_dma;
														
 
															-	dev->hw_pkg[0]->dst = dev->payload_base_dma;
														
 
															-	dev->hw_pkg[0]->size = 16;
														
 
															-
														
 
															-	dcp_op_start(dev, 0);
														
 
															-
														
 
															-	while (test_bit(DCP_FLAG_BUSY, &dev->flags))
														
 
															-		cpu_relax();
														
 
															-
														
 
															-	ret = dev->ctx->stat;
														
 
															-	if (!ret && copy_to_user(argp, dev->payload_base, 16))
														
 
															-		ret =  -EFAULT;
														
 
															-
														
 
															-	kfree(dev->ctx);
														
 
															-
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															-static const struct file_operations dcp_bootstream_fops = {
														
 
															-	.owner =		THIS_MODULE,
														
 
															-	.unlocked_ioctl =	dcp_bootstream_ioctl,
														
 
															-	.open =			dcp_bootstream_open,
														
 
															-};
														
 
															-
														
 
															-static int dcp_probe(struct platform_device *pdev)
														
 
															-{
														
 
															-	struct dcp_dev *dev = NULL;
														
 
															-	struct resource *r;
														
 
															-	int i, ret, j;
														
 
															-
														
 
															-	dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
														
 
															-	if (!dev)
														
 
															-		return -ENOMEM;
														
 
															-
														
 
															-	global_dev = dev;
														
 
															-	dev->dev = &pdev->dev;
														
 
															-
														
 
															-	platform_set_drvdata(pdev, dev);
														
 
															-
														
 
															-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
														
 
															-	dev->dcp_regs_base = devm_ioremap_resource(&pdev->dev, r);
														
 
															-	if (IS_ERR(dev->dcp_regs_base))
														
 
															-		return PTR_ERR(dev->dcp_regs_base);
														
 
															-
														
 
															-	dcp_set(dev, DCP_CTRL_SFRST, DCP_REG_CTRL);
														
 
															-	udelay(10);
														
 
															-	dcp_clear(dev, DCP_CTRL_SFRST | DCP_CTRL_CLKGATE, DCP_REG_CTRL);
														
 
															-
														
 
															-	dcp_write(dev, DCP_CTRL_GATHER_RES_WRITE |
														
 
															-		DCP_CTRL_ENABLE_CONTEXT_CACHE | DCP_CTRL_CH_IRQ_E_1,
														
 
															-		DCP_REG_CTRL);
														
 
															-
														
 
															-	dcp_write(dev, DCP_CHAN_CTRL_ENABLE_1, DCP_REG_CHAN_CTRL);
														
 
															-
														
 
															-	for (i = 0; i < 4; i++)
														
 
															-		dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, i));
														
 
															-
														
 
															-	dcp_clear(dev, -1, DCP_REG_STAT);
														
 
															-
														
 
															-
														
 
															-	r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
														
 
															-	if (!r) {
														
 
															-		dev_err(&pdev->dev, "can't get IRQ resource (0)\n");
														
 
															-		return -EIO;
														
 
															-	}
														
 
															-	dev->dcp_vmi_irq = r->start;
														
 
															-	ret = devm_request_irq(&pdev->dev, dev->dcp_vmi_irq, dcp_vmi_irq, 0,
														
 
															-			       "dcp", dev);
														
 
															-	if (ret != 0) {
														
 
															-		dev_err(&pdev->dev, "can't request_irq (0)\n");
														
 
															-		return -EIO;
														
 
															-	}
														
 
															-
														
 
															-	r = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
														
 
															-	if (!r) {
														
 
															-		dev_err(&pdev->dev, "can't get IRQ resource (1)\n");
														
 
															-		return -EIO;
														
 
															-	}
														
 
															-	dev->dcp_irq = r->start;
														
 
															-	ret = devm_request_irq(&pdev->dev, dev->dcp_irq, dcp_irq, 0, "dcp",
														
 
															-			       dev);
														
 
															-	if (ret != 0) {
														
 
															-		dev_err(&pdev->dev, "can't request_irq (1)\n");
														
 
															-		return -EIO;
														
 
															-	}
														
 
															-
														
 
															-	dev->hw_pkg[0] = dma_alloc_coherent(&pdev->dev,
														
 
															-			DCP_MAX_PKG * sizeof(struct dcp_hw_packet),
														
 
															-			&dev->hw_phys_pkg,
														
 
															-			GFP_KERNEL);
														
 
															-	if (!dev->hw_pkg[0]) {
														
 
															-		dev_err(&pdev->dev, "Could not allocate hw descriptors\n");
														
 
															-		return -ENOMEM;
														
 
															-	}
														
 
															-
														
 
															-	for (i = 1; i < DCP_MAX_PKG; i++) {
														
 
															-		dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg
														
 
															-				+ i * sizeof(struct dcp_hw_packet);
														
 
															-		dev->hw_pkg[i] = dev->hw_pkg[i - 1] + 1;
														
 
															-	}
														
 
															-	dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg;
														
 
															-
														
 
															-
														
 
															-	dev->payload_base = dma_alloc_coherent(&pdev->dev, 2 * AES_KEYSIZE_128,
														
 
															-			&dev->payload_base_dma, GFP_KERNEL);
														
 
															-	if (!dev->payload_base) {
														
 
															-		dev_err(&pdev->dev, "Could not allocate memory for key\n");
														
 
															-		ret = -ENOMEM;
														
 
															-		goto err_free_hw_packet;
														
 
															-	}
														
 
															-	tasklet_init(&dev->queue_task, dcp_queue_task,
														
 
															-		(unsigned long) dev);
														
 
															-	tasklet_init(&dev->done_task, dcp_done_task,
														
 
															-		(unsigned long) dev);
														
 
															-	spin_lock_init(&dev->queue_lock);
														
 
															-
														
 
															-	crypto_init_queue(&dev->queue, 10);
														
 
															-
														
 
															-	init_timer(&dev->watchdog);
														
 
															-	dev->watchdog.function = &dcp_watchdog;
														
 
															-	dev->watchdog.data = (unsigned long)dev;
														
 
															-
														
 
															-	dev->dcp_bootstream_misc.minor = MISC_DYNAMIC_MINOR,
														
 
															-	dev->dcp_bootstream_misc.name = "dcpboot",
														
 
															-	dev->dcp_bootstream_misc.fops = &dcp_bootstream_fops,
														
 
															-	ret = misc_register(&dev->dcp_bootstream_misc);
														
 
															-	if (ret != 0) {
														
 
															-		dev_err(dev->dev, "Unable to register misc device\n");
														
 
															-		goto err_free_key_iv;
														
 
															-	}
														
 
															-
														
 
															-	for (i = 0; i < ARRAY_SIZE(algs); i++) {
														
 
															-		algs[i].cra_priority = 300;
														
 
															-		algs[i].cra_ctxsize = sizeof(struct dcp_op);
														
 
															-		algs[i].cra_module = THIS_MODULE;
														
 
															-		algs[i].cra_init = dcp_cra_init;
														
 
															-		algs[i].cra_exit = dcp_cra_exit;
														
 
															-		if (crypto_register_alg(&algs[i])) {
														
 
															-			dev_err(&pdev->dev, "register algorithm failed\n");
														
 
															-			ret = -ENOMEM;
														
 
															-			goto err_unregister;
														
 
															-		}
														
 
															-	}
														
 
															-	dev_notice(&pdev->dev, "DCP crypto enabled.!\n");
														
 
															-
														
 
															-	return 0;
														
 
															-
														
 
															-err_unregister:
														
 
															-	for (j = 0; j < i; j++)
														
 
															-		crypto_unregister_alg(&algs[j]);
														
 
															-err_free_key_iv:
														
 
															-	tasklet_kill(&dev->done_task);
														
 
															-	tasklet_kill(&dev->queue_task);
														
 
															-	dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base,
														
 
															-			dev->payload_base_dma);
														
 
															-err_free_hw_packet:
														
 
															-	dma_free_coherent(&pdev->dev, DCP_MAX_PKG *
														
 
															-		sizeof(struct dcp_hw_packet), dev->hw_pkg[0],
														
 
															-		dev->hw_phys_pkg);
														
 
															-
														
 
															-	return ret;
														
 
															-}
														
 
															-
														
 
															-static int dcp_remove(struct platform_device *pdev)
														
 
															-{
														
 
															-	struct dcp_dev *dev;
														
 
															-	int j;
														
 
															-	dev = platform_get_drvdata(pdev);
														
 
															-
														
 
															-	misc_deregister(&dev->dcp_bootstream_misc);
														
 
															-
														
 
															-	for (j = 0; j < ARRAY_SIZE(algs); j++)
														
 
															-		crypto_unregister_alg(&algs[j]);
														
 
															-
														
 
															-	tasklet_kill(&dev->done_task);
														
 
															-	tasklet_kill(&dev->queue_task);
														
 
															-
														
 
															-	dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base,
														
 
															-			dev->payload_base_dma);
														
 
															-
														
 
															-	dma_free_coherent(&pdev->dev,
														
 
															-			DCP_MAX_PKG * sizeof(struct dcp_hw_packet),
														
 
															-			dev->hw_pkg[0],	dev->hw_phys_pkg);
														
 
															-
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-static struct of_device_id fs_dcp_of_match[] = {
														
 
															-	{	.compatible = "fsl-dcp"},
														
 
															-	{},
														
 
															-};
														
 
															-
														
 
															-static struct platform_driver fs_dcp_driver = {
														
 
															-	.probe = dcp_probe,
														
 
															-	.remove = dcp_remove,
														
 
															-	.driver = {
														
 
															-		.name = "fsl-dcp",
														
 
															-		.owner = THIS_MODULE,
														
 
															-		.of_match_table = fs_dcp_of_match
														
 
															-	}
														
 
															-};
														
 
															-
														
 
															-module_platform_driver(fs_dcp_driver);
														
 
															-
														
 
															-
														
 
															-MODULE_AUTHOR("Tobias Rauter <tobias.rauter@gmail.com>");
														
 
															-MODULE_DESCRIPTION("Freescale DCP Crypto Driver");
														
 
															-MODULE_LICENSE("GPL");
														
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -0,0 +1,1100 @@
 
															+/*
														
 
															+ * Freescale i.MX23/i.MX28 Data Co-Processor driver
														
 
															+ *
														
 
															+ * Copyright (C) 2013 Marek Vasut <marex@denx.de>
														
 
															+ *
														
 
															+ * The code contained herein is licensed under the GNU General Public
														
 
															+ * License. You may obtain a copy of the GNU General Public License
														
 
															+ * Version 2 or later at the following locations:
														
 
															+ *
														
 
															+ * http://www.opensource.org/licenses/gpl-license.html
														
 
															+ * http://www.gnu.org/copyleft/gpl.html
														
 
															+ */
														
 
															+
														
 
															+#include <linux/crypto.h>
														
 
															+#include <linux/dma-mapping.h>
														
 
															+#include <linux/interrupt.h>
														
 
															+#include <linux/io.h>
														
 
															+#include <linux/kernel.h>
														
 
															+#include <linux/kthread.h>
														
 
															+#include <linux/module.h>
														
 
															+#include <linux/of.h>
														
 
															+#include <linux/platform_device.h>
														
 
															+#include <linux/stmp_device.h>
														
 
															+
														
 
															+#include <crypto/aes.h>
														
 
															+#include <crypto/sha.h>
														
 
															+#include <crypto/internal/hash.h>
														
 
															+
														
 
															+#define DCP_MAX_CHANS	4
														
 
															+#define DCP_BUF_SZ	PAGE_SIZE
														
 
															+
														
 
															+/* DCP DMA descriptor. */
														
 
															+struct dcp_dma_desc {
														
 
															+	uint32_t	next_cmd_addr;
														
 
															+	uint32_t	control0;
														
 
															+	uint32_t	control1;
														
 
															+	uint32_t	source;
														
 
															+	uint32_t	destination;
														
 
															+	uint32_t	size;
														
 
															+	uint32_t	payload;
														
 
															+	uint32_t	status;
														
 
															+};
														
 
															+
														
 
															+/* Coherent aligned block for bounce buffering. */
														
 
															+struct dcp_coherent_block {
														
 
															+	uint8_t			aes_in_buf[DCP_BUF_SZ];
														
 
															+	uint8_t			aes_out_buf[DCP_BUF_SZ];
														
 
															+	uint8_t			sha_in_buf[DCP_BUF_SZ];
														
 
															+
														
 
															+	uint8_t			aes_key[2 * AES_KEYSIZE_128];
														
 
															+	uint8_t			sha_digest[SHA256_DIGEST_SIZE];
														
 
															+
														
 
															+	struct dcp_dma_desc	desc[DCP_MAX_CHANS];
														
 
															+};
														
 
															+
														
 
															+struct dcp {
														
 
															+	struct device			*dev;
														
 
															+	void __iomem			*base;
														
 
															+
														
 
															+	uint32_t			caps;
														
 
															+
														
 
															+	struct dcp_coherent_block	*coh;
														
 
															+
														
 
															+	struct completion		completion[DCP_MAX_CHANS];
														
 
															+	struct mutex			mutex[DCP_MAX_CHANS];
														
 
															+	struct task_struct		*thread[DCP_MAX_CHANS];
														
 
															+	struct crypto_queue		queue[DCP_MAX_CHANS];
														
 
															+};
														
 
															+
														
 
															+enum dcp_chan {
														
 
															+	DCP_CHAN_HASH_SHA	= 0,
														
 
															+	DCP_CHAN_CRYPTO		= 2,
														
 
															+};
														
 
															+
														
 
															+struct dcp_async_ctx {
														
 
															+	/* Common context */
														
 
															+	enum dcp_chan	chan;
														
 
															+	uint32_t	fill;
														
 
															+
														
 
															+	/* SHA Hash-specific context */
														
 
															+	struct mutex			mutex;
														
 
															+	uint32_t			alg;
														
 
															+	unsigned int			hot:1;
														
 
															+
														
 
															+	/* Crypto-specific context */
														
 
															+	unsigned int			enc:1;
														
 
															+	unsigned int			ecb:1;
														
 
															+	struct crypto_ablkcipher	*fallback;
														
 
															+	unsigned int			key_len;
														
 
															+	uint8_t				key[AES_KEYSIZE_128];
														
 
															+};
														
 
															+
														
 
															+struct dcp_sha_req_ctx {
														
 
															+	unsigned int	init:1;
														
 
															+	unsigned int	fini:1;
														
 
															+};
														
 
															+
														
 
															+/*
														
 
															+ * There can even be only one instance of the MXS DCP due to the
														
 
															+ * design of Linux Crypto API.
														
 
															+ */
														
 
															+static struct dcp *global_sdcp;
														
 
															+static DEFINE_MUTEX(global_mutex);
														
 
															+
														
 
															+/* DCP register layout. */
														
 
															+#define MXS_DCP_CTRL				0x00
														
 
															+#define MXS_DCP_CTRL_GATHER_RESIDUAL_WRITES	(1 << 23)
														
 
															+#define MXS_DCP_CTRL_ENABLE_CONTEXT_CACHING	(1 << 22)
														
 
															+
														
 
															+#define MXS_DCP_STAT				0x10
														
 
															+#define MXS_DCP_STAT_CLR			0x18
														
 
															+#define MXS_DCP_STAT_IRQ_MASK			0xf
														
 
															+
														
 
															+#define MXS_DCP_CHANNELCTRL			0x20
														
 
															+#define MXS_DCP_CHANNELCTRL_ENABLE_CHANNEL_MASK	0xff
														
 
															+
														
 
															+#define MXS_DCP_CAPABILITY1			0x40
														
 
															+#define MXS_DCP_CAPABILITY1_SHA256		(4 << 16)
														
 
															+#define MXS_DCP_CAPABILITY1_SHA1		(1 << 16)
														
 
															+#define MXS_DCP_CAPABILITY1_AES128		(1 << 0)
														
 
															+
														
 
															+#define MXS_DCP_CONTEXT				0x50
														
 
															+
														
 
															+#define MXS_DCP_CH_N_CMDPTR(n)			(0x100 + ((n) * 0x40))
														
 
															+
														
 
															+#define MXS_DCP_CH_N_SEMA(n)			(0x110 + ((n) * 0x40))
														
 
															+
														
 
															+#define MXS_DCP_CH_N_STAT(n)			(0x120 + ((n) * 0x40))
														
 
															+#define MXS_DCP_CH_N_STAT_CLR(n)		(0x128 + ((n) * 0x40))
														
 
															+
														
 
															+/* DMA descriptor bits. */
														
 
															+#define MXS_DCP_CONTROL0_HASH_TERM		(1 << 13)
														
 
															+#define MXS_DCP_CONTROL0_HASH_INIT		(1 << 12)
														
 
															+#define MXS_DCP_CONTROL0_PAYLOAD_KEY		(1 << 11)
														
 
															+#define MXS_DCP_CONTROL0_CIPHER_ENCRYPT		(1 << 8)
														
 
															+#define MXS_DCP_CONTROL0_CIPHER_INIT		(1 << 9)
														
 
															+#define MXS_DCP_CONTROL0_ENABLE_HASH		(1 << 6)
														
 
															+#define MXS_DCP_CONTROL0_ENABLE_CIPHER		(1 << 5)
														
 
															+#define MXS_DCP_CONTROL0_DECR_SEMAPHORE		(1 << 1)
														
 
															+#define MXS_DCP_CONTROL0_INTERRUPT		(1 << 0)
														
 
															+
														
 
															+#define MXS_DCP_CONTROL1_HASH_SELECT_SHA256	(2 << 16)
														
 
															+#define MXS_DCP_CONTROL1_HASH_SELECT_SHA1	(0 << 16)
														
 
															+#define MXS_DCP_CONTROL1_CIPHER_MODE_CBC	(1 << 4)
														
 
															+#define MXS_DCP_CONTROL1_CIPHER_MODE_ECB	(0 << 4)
														
 
															+#define MXS_DCP_CONTROL1_CIPHER_SELECT_AES128	(0 << 0)
														
 
															+
														
 
															+static int mxs_dcp_start_dma(struct dcp_async_ctx *actx)
														
 
															+{
														
 
															+	struct dcp *sdcp = global_sdcp;
														
 
															+	const int chan = actx->chan;
														
 
															+	uint32_t stat;
														
 
															+	int ret;
														
 
															+	struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
														
 
															+
														
 
															+	dma_addr_t desc_phys = dma_map_single(sdcp->dev, desc, sizeof(*desc),
														
 
															+					      DMA_TO_DEVICE);
														
 
															+
														
 
															+	reinit_completion(&sdcp->completion[chan]);
														
 
															+
														
 
															+	/* Clear status register. */
														
 
															+	writel(0xffffffff, sdcp->base + MXS_DCP_CH_N_STAT_CLR(chan));
														
 
															+
														
 
															+	/* Load the DMA descriptor. */
														
 
															+	writel(desc_phys, sdcp->base + MXS_DCP_CH_N_CMDPTR(chan));
														
 
															+
														
 
															+	/* Increment the semaphore to start the DMA transfer. */
														
 
															+	writel(1, sdcp->base + MXS_DCP_CH_N_SEMA(chan));
														
 
															+
														
 
															+	ret = wait_for_completion_timeout(&sdcp->completion[chan],
														
 
															+					  msecs_to_jiffies(1000));
														
 
															+	if (!ret) {
														
 
															+		dev_err(sdcp->dev, "Channel %i timeout (DCP_STAT=0x%08x)\n",
														
 
															+			chan, readl(sdcp->base + MXS_DCP_STAT));
														
 
															+		return -ETIMEDOUT;
														
 
															+	}
														
 
															+
														
 
															+	stat = readl(sdcp->base + MXS_DCP_CH_N_STAT(chan));
														
 
															+	if (stat & 0xff) {
														
 
															+		dev_err(sdcp->dev, "Channel %i error (CH_STAT=0x%08x)\n",
														
 
															+			chan, stat);
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	dma_unmap_single(sdcp->dev, desc_phys, sizeof(*desc), DMA_TO_DEVICE);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Encryption (AES128)
														
 
															+ */
														
 
															+static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, int init)
														
 
															+{
														
 
															+	struct dcp *sdcp = global_sdcp;
														
 
															+	struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
														
 
															+	int ret;
														
 
															+
														
 
															+	dma_addr_t key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key,
														
 
															+					     2 * AES_KEYSIZE_128,
														
 
															+					     DMA_TO_DEVICE);
														
 
															+	dma_addr_t src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf,
														
 
															+					     DCP_BUF_SZ, DMA_TO_DEVICE);
														
 
															+	dma_addr_t dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf,
														
 
															+					     DCP_BUF_SZ, DMA_FROM_DEVICE);
														
 
															+
														
 
															+	/* Fill in the DMA descriptor. */
														
 
															+	desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE |
														
 
															+		    MXS_DCP_CONTROL0_INTERRUPT |
														
 
															+		    MXS_DCP_CONTROL0_ENABLE_CIPHER;
														
 
															+
														
 
															+	/* Payload contains the key. */
														
 
															+	desc->control0 |= MXS_DCP_CONTROL0_PAYLOAD_KEY;
														
 
															+
														
 
															+	if (actx->enc)
														
 
															+		desc->control0 |= MXS_DCP_CONTROL0_CIPHER_ENCRYPT;
														
 
															+	if (init)
														
 
															+		desc->control0 |= MXS_DCP_CONTROL0_CIPHER_INIT;
														
 
															+
														
 
															+	desc->control1 = MXS_DCP_CONTROL1_CIPHER_SELECT_AES128;
														
 
															+
														
 
															+	if (actx->ecb)
														
 
															+		desc->control1 |= MXS_DCP_CONTROL1_CIPHER_MODE_ECB;
														
 
															+	else
														
 
															+		desc->control1 |= MXS_DCP_CONTROL1_CIPHER_MODE_CBC;
														
 
															+
														
 
															+	desc->next_cmd_addr = 0;
														
 
															+	desc->source = src_phys;
														
 
															+	desc->destination = dst_phys;
														
 
															+	desc->size = actx->fill;
														
 
															+	desc->payload = key_phys;
														
 
															+	desc->status = 0;
														
 
															+
														
 
															+	ret = mxs_dcp_start_dma(actx);
														
 
															+
														
 
															+	dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128,
														
 
															+			 DMA_TO_DEVICE);
														
 
															+	dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
														
 
															+	dma_unmap_single(sdcp->dev, dst_phys, DCP_BUF_SZ, DMA_FROM_DEVICE);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq)
														
 
															+{
														
 
															+	struct dcp *sdcp = global_sdcp;
														
 
															+
														
 
															+	struct ablkcipher_request *req = ablkcipher_request_cast(arq);
														
 
															+	struct dcp_async_ctx *actx = crypto_tfm_ctx(arq->tfm);
														
 
															+
														
 
															+	struct scatterlist *dst = req->dst;
														
 
															+	struct scatterlist *src = req->src;
														
 
															+	const int nents = sg_nents(req->src);
														
 
															+
														
 
															+	const int out_off = DCP_BUF_SZ;
														
 
															+	uint8_t *in_buf = sdcp->coh->aes_in_buf;
														
 
															+	uint8_t *out_buf = sdcp->coh->aes_out_buf;
														
 
															+
														
 
															+	uint8_t *out_tmp, *src_buf, *dst_buf = NULL;
														
 
															+	uint32_t dst_off = 0;
														
 
															+
														
 
															+	uint8_t *key = sdcp->coh->aes_key;
														
 
															+
														
 
															+	int ret = 0;
														
 
															+	int split = 0;
														
 
															+	unsigned int i, len, clen, rem = 0;
														
 
															+	int init = 0;
														
 
															+
														
 
															+	actx->fill = 0;
														
 
															+
														
 
															+	/* Copy the key from the temporary location. */
														
 
															+	memcpy(key, actx->key, actx->key_len);
														
 
															+
														
 
															+	if (!actx->ecb) {
														
 
															+		/* Copy the CBC IV just past the key. */
														
 
															+		memcpy(key + AES_KEYSIZE_128, req->info, AES_KEYSIZE_128);
														
 
															+		/* CBC needs the INIT set. */
														
 
															+		init = 1;
														
 
															+	} else {
														
 
															+		memset(key + AES_KEYSIZE_128, 0, AES_KEYSIZE_128);
														
 
															+	}
														
 
															+
														
 
															+	for_each_sg(req->src, src, nents, i) {
														
 
															+		src_buf = sg_virt(src);
														
 
															+		len = sg_dma_len(src);
														
 
															+
														
 
															+		do {
														
 
															+			if (actx->fill + len > out_off)
														
 
															+				clen = out_off - actx->fill;
														
 
															+			else
														
 
															+				clen = len;
														
 
															+
														
 
															+			memcpy(in_buf + actx->fill, src_buf, clen);
														
 
															+			len -= clen;
														
 
															+			src_buf += clen;
														
 
															+			actx->fill += clen;
														
 
															+
														
 
															+			/*
														
 
															+			 * If we filled the buffer or this is the last SG,
														
 
															+			 * submit the buffer.
														
 
															+			 */
														
 
															+			if (actx->fill == out_off || sg_is_last(src)) {
														
 
															+				ret = mxs_dcp_run_aes(actx, init);
														
 
															+				if (ret)
														
 
															+					return ret;
														
 
															+				init = 0;
														
 
															+
														
 
															+				out_tmp = out_buf;
														
 
															+				while (dst && actx->fill) {
														
 
															+					if (!split) {
														
 
															+						dst_buf = sg_virt(dst);
														
 
															+						dst_off = 0;
														
 
															+					}
														
 
															+					rem = min(sg_dma_len(dst) - dst_off,
														
 
															+						  actx->fill);
														
 
															+
														
 
															+					memcpy(dst_buf + dst_off, out_tmp, rem);
														
 
															+					out_tmp += rem;
														
 
															+					dst_off += rem;
														
 
															+					actx->fill -= rem;
														
 
															+
														
 
															+					if (dst_off == sg_dma_len(dst)) {
														
 
															+						dst = sg_next(dst);
														
 
															+						split = 0;
														
 
															+					} else {
														
 
															+						split = 1;
														
 
															+					}
														
 
															+				}
														
 
															+			}
														
 
															+		} while (len);
														
 
															+	}
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int dcp_chan_thread_aes(void *data)
														
 
															+{
														
 
															+	struct dcp *sdcp = global_sdcp;
														
 
															+	const int chan = DCP_CHAN_CRYPTO;
														
 
															+
														
 
															+	struct crypto_async_request *backlog;
														
 
															+	struct crypto_async_request *arq;
														
 
															+
														
 
															+	int ret;
														
 
															+
														
 
															+	do {
														
 
															+		__set_current_state(TASK_INTERRUPTIBLE);
														
 
															+
														
 
															+		mutex_lock(&sdcp->mutex[chan]);
														
 
															+		backlog = crypto_get_backlog(&sdcp->queue[chan]);
														
 
															+		arq = crypto_dequeue_request(&sdcp->queue[chan]);
														
 
															+		mutex_unlock(&sdcp->mutex[chan]);
														
 
															+
														
 
															+		if (backlog)
														
 
															+			backlog->complete(backlog, -EINPROGRESS);
														
 
															+
														
 
															+		if (arq) {
														
 
															+			ret = mxs_dcp_aes_block_crypt(arq);
														
 
															+			arq->complete(arq, ret);
														
 
															+			continue;
														
 
															+		}
														
 
															+
														
 
															+		schedule();
														
 
															+	} while (!kthread_should_stop());
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int mxs_dcp_block_fallback(struct ablkcipher_request *req, int enc)
														
 
															+{
														
 
															+	struct crypto_tfm *tfm =
														
 
															+		crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
														
 
															+	struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx(
														
 
															+		crypto_ablkcipher_reqtfm(req));
														
 
															+	int ret;
														
 
															+
														
 
															+	ablkcipher_request_set_tfm(req, ctx->fallback);
														
 
															+
														
 
															+	if (enc)
														
 
															+		ret = crypto_ablkcipher_encrypt(req);
														
 
															+	else
														
 
															+		ret = crypto_ablkcipher_decrypt(req);
														
 
															+
														
 
															+	ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int mxs_dcp_aes_enqueue(struct ablkcipher_request *req, int enc, int ecb)
														
 
															+{
														
 
															+	struct dcp *sdcp = global_sdcp;
														
 
															+	struct crypto_async_request *arq = &req->base;
														
 
															+	struct dcp_async_ctx *actx = crypto_tfm_ctx(arq->tfm);
														
 
															+	int ret;
														
 
															+
														
 
															+	if (unlikely(actx->key_len != AES_KEYSIZE_128))
														
 
															+		return mxs_dcp_block_fallback(req, enc);
														
 
															+
														
 
															+	actx->enc = enc;
														
 
															+	actx->ecb = ecb;
														
 
															+	actx->chan = DCP_CHAN_CRYPTO;
														
 
															+
														
 
															+	mutex_lock(&sdcp->mutex[actx->chan]);
														
 
															+	ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base);
														
 
															+	mutex_unlock(&sdcp->mutex[actx->chan]);
														
 
															+
														
 
															+	wake_up_process(sdcp->thread[actx->chan]);
														
 
															+
														
 
															+	return -EINPROGRESS;
														
 
															+}
														
 
															+
														
 
															+static int mxs_dcp_aes_ecb_decrypt(struct ablkcipher_request *req)
														
 
															+{
														
 
															+	return mxs_dcp_aes_enqueue(req, 0, 1);
														
 
															+}
														
 
															+
														
 
															+static int mxs_dcp_aes_ecb_encrypt(struct ablkcipher_request *req)
														
 
															+{
														
 
															+	return mxs_dcp_aes_enqueue(req, 1, 1);
														
 
															+}
														
 
															+
														
 
															+static int mxs_dcp_aes_cbc_decrypt(struct ablkcipher_request *req)
														
 
															+{
														
 
															+	return mxs_dcp_aes_enqueue(req, 0, 0);
														
 
															+}
														
 
															+
														
 
															+static int mxs_dcp_aes_cbc_encrypt(struct ablkcipher_request *req)
														
 
															+{
														
 
															+	return mxs_dcp_aes_enqueue(req, 1, 0);
														
 
															+}
														
 
															+
														
 
															+static int mxs_dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
														
 
															+			      unsigned int len)
														
 
															+{
														
 
															+	struct dcp_async_ctx *actx = crypto_ablkcipher_ctx(tfm);
														
 
															+	unsigned int ret;
														
 
															+
														
 
															+	/*
														
 
															+	 * AES 128 is supposed by the hardware, store key into temporary
														
 
															+	 * buffer and exit. We must use the temporary buffer here, since
														
 
															+	 * there can still be an operation in progress.
														
 
															+	 */
														
 
															+	actx->key_len = len;
														
 
															+	if (len == AES_KEYSIZE_128) {
														
 
															+		memcpy(actx->key, key, len);
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	/* Check if the key size is supported by kernel at all. */
														
 
															+	if (len != AES_KEYSIZE_192 && len != AES_KEYSIZE_256) {
														
 
															+		tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
														
 
															+		return -EINVAL;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * If the requested AES key size is not supported by the hardware,
														
 
															+	 * but is supported by in-kernel software implementation, we use
														
 
															+	 * software fallback.
														
 
															+	 */
														
 
															+	actx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
														
 
															+	actx->fallback->base.crt_flags |=
														
 
															+		tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK;
														
 
															+
														
 
															+	ret = crypto_ablkcipher_setkey(actx->fallback, key, len);
														
 
															+	if (!ret)
														
 
															+		return 0;
														
 
															+
														
 
															+	tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK;
														
 
															+	tfm->base.crt_flags |=
														
 
															+		actx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK;
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int mxs_dcp_aes_fallback_init(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+	const char *name = tfm->__crt_alg->cra_name;
														
 
															+	const uint32_t flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK;
														
 
															+	struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm);
														
 
															+	struct crypto_ablkcipher *blk;
														
 
															+
														
 
															+	blk = crypto_alloc_ablkcipher(name, 0, flags);
														
 
															+	if (IS_ERR(blk))
														
 
															+		return PTR_ERR(blk);
														
 
															+
														
 
															+	actx->fallback = blk;
														
 
															+	tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_async_ctx);
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void mxs_dcp_aes_fallback_exit(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+	struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm);
														
 
															+
														
 
															+	crypto_free_ablkcipher(actx->fallback);
														
 
															+	actx->fallback = NULL;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * Hashing (SHA1/SHA256)
														
 
															+ */
														
 
															+static int mxs_dcp_run_sha(struct ahash_request *req)
														
 
															+{
														
 
															+	struct dcp *sdcp = global_sdcp;
														
 
															+	int ret;
														
 
															+
														
 
															+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															+	struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm);
														
 
															+	struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req);
														
 
															+
														
 
															+	struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
														
 
															+	dma_addr_t digest_phys = dma_map_single(sdcp->dev,
														
 
															+						sdcp->coh->sha_digest,
														
 
															+						SHA256_DIGEST_SIZE,
														
 
															+						DMA_FROM_DEVICE);
														
 
															+
														
 
															+	dma_addr_t buf_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_in_buf,
														
 
															+					     DCP_BUF_SZ, DMA_TO_DEVICE);
														
 
															+
														
 
															+	/* Fill in the DMA descriptor. */
														
 
															+	desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE |
														
 
															+		    MXS_DCP_CONTROL0_INTERRUPT |
														
 
															+		    MXS_DCP_CONTROL0_ENABLE_HASH;
														
 
															+	if (rctx->init)
														
 
															+		desc->control0 |= MXS_DCP_CONTROL0_HASH_INIT;
														
 
															+
														
 
															+	desc->control1 = actx->alg;
														
 
															+	desc->next_cmd_addr = 0;
														
 
															+	desc->source = buf_phys;
														
 
															+	desc->destination = 0;
														
 
															+	desc->size = actx->fill;
														
 
															+	desc->payload = 0;
														
 
															+	desc->status = 0;
														
 
															+
														
 
															+	/* Set HASH_TERM bit for last transfer block. */
														
 
															+	if (rctx->fini) {
														
 
															+		desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM;
														
 
															+		desc->payload = digest_phys;
														
 
															+	}
														
 
															+
														
 
															+	ret = mxs_dcp_start_dma(actx);
														
 
															+
														
 
															+	dma_unmap_single(sdcp->dev, digest_phys, SHA256_DIGEST_SIZE,
														
 
															+			 DMA_FROM_DEVICE);
														
 
															+	dma_unmap_single(sdcp->dev, buf_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int dcp_sha_req_to_buf(struct crypto_async_request *arq)
														
 
															+{
														
 
															+	struct dcp *sdcp = global_sdcp;
														
 
															+
														
 
															+	struct ahash_request *req = ahash_request_cast(arq);
														
 
															+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															+	struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm);
														
 
															+	struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req);
														
 
															+	struct hash_alg_common *halg = crypto_hash_alg_common(tfm);
														
 
															+	const int nents = sg_nents(req->src);
														
 
															+
														
 
															+	uint8_t *digest = sdcp->coh->sha_digest;
														
 
															+	uint8_t *in_buf = sdcp->coh->sha_in_buf;
														
 
															+
														
 
															+	uint8_t *src_buf;
														
 
															+
														
 
															+	struct scatterlist *src;
														
 
															+
														
 
															+	unsigned int i, len, clen;
														
 
															+	int ret;
														
 
															+
														
 
															+	int fin = rctx->fini;
														
 
															+	if (fin)
														
 
															+		rctx->fini = 0;
														
 
															+
														
 
															+	for_each_sg(req->src, src, nents, i) {
														
 
															+		src_buf = sg_virt(src);
														
 
															+		len = sg_dma_len(src);
														
 
															+
														
 
															+		do {
														
 
															+			if (actx->fill + len > DCP_BUF_SZ)
														
 
															+				clen = DCP_BUF_SZ - actx->fill;
														
 
															+			else
														
 
															+				clen = len;
														
 
															+
														
 
															+			memcpy(in_buf + actx->fill, src_buf, clen);
														
 
															+			len -= clen;
														
 
															+			src_buf += clen;
														
 
															+			actx->fill += clen;
														
 
															+
														
 
															+			/*
														
 
															+			 * If we filled the buffer and still have some
														
 
															+			 * more data, submit the buffer.
														
 
															+			 */
														
 
															+			if (len && actx->fill == DCP_BUF_SZ) {
														
 
															+				ret = mxs_dcp_run_sha(req);
														
 
															+				if (ret)
														
 
															+					return ret;
														
 
															+				actx->fill = 0;
														
 
															+				rctx->init = 0;
														
 
															+			}
														
 
															+		} while (len);
														
 
															+	}
														
 
															+
														
 
															+	if (fin) {
														
 
															+		rctx->fini = 1;
														
 
															+
														
 
															+		/* Submit whatever is left. */
														
 
															+		ret = mxs_dcp_run_sha(req);
														
 
															+		if (ret || !req->result)
														
 
															+			return ret;
														
 
															+		actx->fill = 0;
														
 
															+
														
 
															+		/* For some reason, the result is flipped. */
														
 
															+		for (i = 0; i < halg->digestsize; i++)
														
 
															+			req->result[i] = digest[halg->digestsize - i - 1];
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int dcp_chan_thread_sha(void *data)
														
 
															+{
														
 
															+	struct dcp *sdcp = global_sdcp;
														
 
															+	const int chan = DCP_CHAN_HASH_SHA;
														
 
															+
														
 
															+	struct crypto_async_request *backlog;
														
 
															+	struct crypto_async_request *arq;
														
 
															+
														
 
															+	struct dcp_sha_req_ctx *rctx;
														
 
															+
														
 
															+	struct ahash_request *req;
														
 
															+	int ret, fini;
														
 
															+
														
 
															+	do {
														
 
															+		__set_current_state(TASK_INTERRUPTIBLE);
														
 
															+
														
 
															+		mutex_lock(&sdcp->mutex[chan]);
														
 
															+		backlog = crypto_get_backlog(&sdcp->queue[chan]);
														
 
															+		arq = crypto_dequeue_request(&sdcp->queue[chan]);
														
 
															+		mutex_unlock(&sdcp->mutex[chan]);
														
 
															+
														
 
															+		if (backlog)
														
 
															+			backlog->complete(backlog, -EINPROGRESS);
														
 
															+
														
 
															+		if (arq) {
														
 
															+			req = ahash_request_cast(arq);
														
 
															+			rctx = ahash_request_ctx(req);
														
 
															+
														
 
															+			ret = dcp_sha_req_to_buf(arq);
														
 
															+			fini = rctx->fini;
														
 
															+			arq->complete(arq, ret);
														
 
															+			if (!fini)
														
 
															+				continue;
														
 
															+		}
														
 
															+
														
 
															+		schedule();
														
 
															+	} while (!kthread_should_stop());
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int dcp_sha_init(struct ahash_request *req)
														
 
															+{
														
 
															+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															+	struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm);
														
 
															+
														
 
															+	struct hash_alg_common *halg = crypto_hash_alg_common(tfm);
														
 
															+
														
 
															+	/*
														
 
															+	 * Start hashing session. The code below only inits the
														
 
															+	 * hashing session context, nothing more.
														
 
															+	 */
														
 
															+	memset(actx, 0, sizeof(*actx));
														
 
															+
														
 
															+	if (strcmp(halg->base.cra_name, "sha1") == 0)
														
 
															+		actx->alg = MXS_DCP_CONTROL1_HASH_SELECT_SHA1;
														
 
															+	else
														
 
															+		actx->alg = MXS_DCP_CONTROL1_HASH_SELECT_SHA256;
														
 
															+
														
 
															+	actx->fill = 0;
														
 
															+	actx->hot = 0;
														
 
															+	actx->chan = DCP_CHAN_HASH_SHA;
														
 
															+
														
 
															+	mutex_init(&actx->mutex);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int dcp_sha_update_fx(struct ahash_request *req, int fini)
														
 
															+{
														
 
															+	struct dcp *sdcp = global_sdcp;
														
 
															+
														
 
															+	struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req);
														
 
															+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
														
 
															+	struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm);
														
 
															+
														
 
															+	int ret;
														
 
															+
														
 
															+	/*
														
 
															+	 * Ignore requests that have no data in them and are not
														
 
															+	 * the trailing requests in the stream of requests.
														
 
															+	 */
														
 
															+	if (!req->nbytes && !fini)
														
 
															+		return 0;
														
 
															+
														
 
															+	mutex_lock(&actx->mutex);
														
 
															+
														
 
															+	rctx->fini = fini;
														
 
															+
														
 
															+	if (!actx->hot) {
														
 
															+		actx->hot = 1;
														
 
															+		rctx->init = 1;
														
 
															+	}
														
 
															+
														
 
															+	mutex_lock(&sdcp->mutex[actx->chan]);
														
 
															+	ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base);
														
 
															+	mutex_unlock(&sdcp->mutex[actx->chan]);
														
 
															+
														
 
															+	wake_up_process(sdcp->thread[actx->chan]);
														
 
															+	mutex_unlock(&actx->mutex);
														
 
															+
														
 
															+	return -EINPROGRESS;
														
 
															+}
														
 
															+
														
 
															+static int dcp_sha_update(struct ahash_request *req)
														
 
															+{
														
 
															+	return dcp_sha_update_fx(req, 0);
														
 
															+}
														
 
															+
														
 
															+static int dcp_sha_final(struct ahash_request *req)
														
 
															+{
														
 
															+	ahash_request_set_crypt(req, NULL, req->result, 0);
														
 
															+	req->nbytes = 0;
														
 
															+	return dcp_sha_update_fx(req, 1);
														
 
															+}
														
 
															+
														
 
															+static int dcp_sha_finup(struct ahash_request *req)
														
 
															+{
														
 
															+	return dcp_sha_update_fx(req, 1);
														
 
															+}
														
 
															+
														
 
															+static int dcp_sha_digest(struct ahash_request *req)
														
 
															+{
														
 
															+	int ret;
														
 
															+
														
 
															+	ret = dcp_sha_init(req);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	return dcp_sha_finup(req);
														
 
															+}
														
 
															+
														
 
															+static int dcp_sha_cra_init(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
														
 
															+				 sizeof(struct dcp_sha_req_ctx));
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static void dcp_sha_cra_exit(struct crypto_tfm *tfm)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+/* AES 128 ECB and AES 128 CBC */
														
 
															+static struct crypto_alg dcp_aes_algs[] = {
														
 
															+	{
														
 
															+		.cra_name		= "ecb(aes)",
														
 
															+		.cra_driver_name	= "ecb-aes-dcp",
														
 
															+		.cra_priority		= 400,
														
 
															+		.cra_alignmask		= 15,
														
 
															+		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
														
 
															+					  CRYPTO_ALG_ASYNC |
														
 
															+					  CRYPTO_ALG_NEED_FALLBACK,
														
 
															+		.cra_init		= mxs_dcp_aes_fallback_init,
														
 
															+		.cra_exit		= mxs_dcp_aes_fallback_exit,
														
 
															+		.cra_blocksize		= AES_BLOCK_SIZE,
														
 
															+		.cra_ctxsize		= sizeof(struct dcp_async_ctx),
														
 
															+		.cra_type		= &crypto_ablkcipher_type,
														
 
															+		.cra_module		= THIS_MODULE,
														
 
															+		.cra_u	= {
														
 
															+			.ablkcipher = {
														
 
															+				.min_keysize	= AES_MIN_KEY_SIZE,
														
 
															+				.max_keysize	= AES_MAX_KEY_SIZE,
														
 
															+				.setkey		= mxs_dcp_aes_setkey,
														
 
															+				.encrypt	= mxs_dcp_aes_ecb_encrypt,
														
 
															+				.decrypt	= mxs_dcp_aes_ecb_decrypt
														
 
															+			},
														
 
															+		},
														
 
															+	}, {
														
 
															+		.cra_name		= "cbc(aes)",
														
 
															+		.cra_driver_name	= "cbc-aes-dcp",
														
 
															+		.cra_priority		= 400,
														
 
															+		.cra_alignmask		= 15,
														
 
															+		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
														
 
															+					  CRYPTO_ALG_ASYNC |
														
 
															+					  CRYPTO_ALG_NEED_FALLBACK,
														
 
															+		.cra_init		= mxs_dcp_aes_fallback_init,
														
 
															+		.cra_exit		= mxs_dcp_aes_fallback_exit,
														
 
															+		.cra_blocksize		= AES_BLOCK_SIZE,
														
 
															+		.cra_ctxsize		= sizeof(struct dcp_async_ctx),
														
 
															+		.cra_type		= &crypto_ablkcipher_type,
														
 
															+		.cra_module		= THIS_MODULE,
														
 
															+		.cra_u = {
														
 
															+			.ablkcipher = {
														
 
															+				.min_keysize	= AES_MIN_KEY_SIZE,
														
 
															+				.max_keysize	= AES_MAX_KEY_SIZE,
														
 
															+				.setkey		= mxs_dcp_aes_setkey,
														
 
															+				.encrypt	= mxs_dcp_aes_cbc_encrypt,
														
 
															+				.decrypt	= mxs_dcp_aes_cbc_decrypt,
														
 
															+				.ivsize		= AES_BLOCK_SIZE,
														
 
															+			},
														
 
															+		},
														
 
															+	},
														
 
															+};
														
 
															+
														
 
															+/* SHA1 */
														
 
															+static struct ahash_alg dcp_sha1_alg = {
														
 
															+	.init	= dcp_sha_init,
														
 
															+	.update	= dcp_sha_update,
														
 
															+	.final	= dcp_sha_final,
														
 
															+	.finup	= dcp_sha_finup,
														
 
															+	.digest	= dcp_sha_digest,
														
 
															+	.halg	= {
														
 
															+		.digestsize	= SHA1_DIGEST_SIZE,
														
 
															+		.base		= {
														
 
															+			.cra_name		= "sha1",
														
 
															+			.cra_driver_name	= "sha1-dcp",
														
 
															+			.cra_priority		= 400,
														
 
															+			.cra_alignmask		= 63,
														
 
															+			.cra_flags		= CRYPTO_ALG_ASYNC,
														
 
															+			.cra_blocksize		= SHA1_BLOCK_SIZE,
														
 
															+			.cra_ctxsize		= sizeof(struct dcp_async_ctx),
														
 
															+			.cra_module		= THIS_MODULE,
														
 
															+			.cra_init		= dcp_sha_cra_init,
														
 
															+			.cra_exit		= dcp_sha_cra_exit,
														
 
															+		},
														
 
															+	},
														
 
															+};
														
 
															+
														
 
															+/* SHA256 */
														
 
															+static struct ahash_alg dcp_sha256_alg = {
														
 
															+	.init	= dcp_sha_init,
														
 
															+	.update	= dcp_sha_update,
														
 
															+	.final	= dcp_sha_final,
														
 
															+	.finup	= dcp_sha_finup,
														
 
															+	.digest	= dcp_sha_digest,
														
 
															+	.halg	= {
														
 
															+		.digestsize	= SHA256_DIGEST_SIZE,
														
 
															+		.base		= {
														
 
															+			.cra_name		= "sha256",
														
 
															+			.cra_driver_name	= "sha256-dcp",
														
 
															+			.cra_priority		= 400,
														
 
															+			.cra_alignmask		= 63,
														
 
															+			.cra_flags		= CRYPTO_ALG_ASYNC,
														
 
															+			.cra_blocksize		= SHA256_BLOCK_SIZE,
														
 
															+			.cra_ctxsize		= sizeof(struct dcp_async_ctx),
														
 
															+			.cra_module		= THIS_MODULE,
														
 
															+			.cra_init		= dcp_sha_cra_init,
														
 
															+			.cra_exit		= dcp_sha_cra_exit,
														
 
															+		},
														
 
															+	},
														
 
															+};
														
 
															+
														
 
															+static irqreturn_t mxs_dcp_irq(int irq, void *context)
														
 
															+{
														
 
															+	struct dcp *sdcp = context;
														
 
															+	uint32_t stat;
														
 
															+	int i;
														
 
															+
														
 
															+	stat = readl(sdcp->base + MXS_DCP_STAT);
														
 
															+	stat &= MXS_DCP_STAT_IRQ_MASK;
														
 
															+	if (!stat)
														
 
															+		return IRQ_NONE;
														
 
															+
														
 
															+	/* Clear the interrupts. */
														
 
															+	writel(stat, sdcp->base + MXS_DCP_STAT_CLR);
														
 
															+
														
 
															+	/* Complete the DMA requests that finished. */
														
 
															+	for (i = 0; i < DCP_MAX_CHANS; i++)
														
 
															+		if (stat & (1 << i))
														
 
															+			complete(&sdcp->completion[i]);
														
 
															+
														
 
															+	return IRQ_HANDLED;
														
 
															+}
														
 
															+
														
 
															+static int mxs_dcp_probe(struct platform_device *pdev)
														
 
															+{
														
 
															+	struct device *dev = &pdev->dev;
														
 
															+	struct dcp *sdcp = NULL;
														
 
															+	int i, ret;
														
 
															+
														
 
															+	struct resource *iores;
														
 
															+	int dcp_vmi_irq, dcp_irq;
														
 
															+
														
 
															+	mutex_lock(&global_mutex);
														
 
															+	if (global_sdcp) {
														
 
															+		dev_err(dev, "Only one DCP instance allowed!\n");
														
 
															+		ret = -ENODEV;
														
 
															+		goto err_mutex;
														
 
															+	}
														
 
															+
														
 
															+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
														
 
															+	dcp_vmi_irq = platform_get_irq(pdev, 0);
														
 
															+	dcp_irq = platform_get_irq(pdev, 1);
														
 
															+	if (dcp_vmi_irq < 0 || dcp_irq < 0) {
														
 
															+		ret = -EINVAL;
														
 
															+		goto err_mutex;
														
 
															+	}
														
 
															+
														
 
															+	sdcp = devm_kzalloc(dev, sizeof(*sdcp), GFP_KERNEL);
														
 
															+	if (!sdcp) {
														
 
															+		ret = -ENOMEM;
														
 
															+		goto err_mutex;
														
 
															+	}
														
 
															+
														
 
															+	sdcp->dev = dev;
														
 
															+	sdcp->base = devm_ioremap_resource(dev, iores);
														
 
															+	if (IS_ERR(sdcp->base)) {
														
 
															+		ret = PTR_ERR(sdcp->base);
														
 
															+		goto err_mutex;
														
 
															+	}
														
 
															+
														
 
															+	ret = devm_request_irq(dev, dcp_vmi_irq, mxs_dcp_irq, 0,
														
 
															+			       "dcp-vmi-irq", sdcp);
														
 
															+	if (ret) {
														
 
															+		dev_err(dev, "Failed to claim DCP VMI IRQ!\n");
														
 
															+		goto err_mutex;
														
 
															+	}
														
 
															+
														
 
															+	ret = devm_request_irq(dev, dcp_irq, mxs_dcp_irq, 0,
														
 
															+			       "dcp-irq", sdcp);
														
 
															+	if (ret) {
														
 
															+		dev_err(dev, "Failed to claim DCP IRQ!\n");
														
 
															+		goto err_mutex;
														
 
															+	}
														
 
															+
														
 
															+	/* Allocate coherent helper block. */
														
 
															+	sdcp->coh = kzalloc(sizeof(struct dcp_coherent_block), GFP_KERNEL);
														
 
															+	if (!sdcp->coh) {
														
 
															+		dev_err(dev, "Error allocating coherent block\n");
														
 
															+		ret = -ENOMEM;
														
 
															+		goto err_mutex;
														
 
															+	}
														
 
															+
														
 
															+	/* Restart the DCP block. */
														
 
															+	stmp_reset_block(sdcp->base);
														
 
															+
														
 
															+	/* Initialize control register. */
														
 
															+	writel(MXS_DCP_CTRL_GATHER_RESIDUAL_WRITES |
														
 
															+	       MXS_DCP_CTRL_ENABLE_CONTEXT_CACHING | 0xf,
														
 
															+	       sdcp->base + MXS_DCP_CTRL);
														
 
															+
														
 
															+	/* Enable all DCP DMA channels. */
														
 
															+	writel(MXS_DCP_CHANNELCTRL_ENABLE_CHANNEL_MASK,
														
 
															+	       sdcp->base + MXS_DCP_CHANNELCTRL);
														
 
															+
														
 
															+	/*
														
 
															+	 * We do not enable context switching. Give the context buffer a
														
 
															+	 * pointer to an illegal address so if context switching is
														
 
															+	 * inadvertantly enabled, the DCP will return an error instead of
														
 
															+	 * trashing good memory. The DCP DMA cannot access ROM, so any ROM
														
 
															+	 * address will do.
														
 
															+	 */
														
 
															+	writel(0xffff0000, sdcp->base + MXS_DCP_CONTEXT);
														
 
															+	for (i = 0; i < DCP_MAX_CHANS; i++)
														
 
															+		writel(0xffffffff, sdcp->base + MXS_DCP_CH_N_STAT_CLR(i));
														
 
															+	writel(0xffffffff, sdcp->base + MXS_DCP_STAT_CLR);
														
 
															+
														
 
															+	global_sdcp = sdcp;
														
 
															+
														
 
															+	platform_set_drvdata(pdev, sdcp);
														
 
															+
														
 
															+	for (i = 0; i < DCP_MAX_CHANS; i++) {
														
 
															+		mutex_init(&sdcp->mutex[i]);
														
 
															+		init_completion(&sdcp->completion[i]);
														
 
															+		crypto_init_queue(&sdcp->queue[i], 50);
														
 
															+	}
														
 
															+
														
 
															+	/* Create the SHA and AES handler threads. */
														
 
															+	sdcp->thread[DCP_CHAN_HASH_SHA] = kthread_run(dcp_chan_thread_sha,
														
 
															+						      NULL, "mxs_dcp_chan/sha");
														
 
															+	if (IS_ERR(sdcp->thread[DCP_CHAN_HASH_SHA])) {
														
 
															+		dev_err(dev, "Error starting SHA thread!\n");
														
 
															+		ret = PTR_ERR(sdcp->thread[DCP_CHAN_HASH_SHA]);
														
 
															+		goto err_free_coherent;
														
 
															+	}
														
 
															+
														
 
															+	sdcp->thread[DCP_CHAN_CRYPTO] = kthread_run(dcp_chan_thread_aes,
														
 
															+						    NULL, "mxs_dcp_chan/aes");
														
 
															+	if (IS_ERR(sdcp->thread[DCP_CHAN_CRYPTO])) {
														
 
															+		dev_err(dev, "Error starting SHA thread!\n");
														
 
															+		ret = PTR_ERR(sdcp->thread[DCP_CHAN_CRYPTO]);
														
 
															+		goto err_destroy_sha_thread;
														
 
															+	}
														
 
															+
														
 
															+	/* Register the various crypto algorithms. */
														
 
															+	sdcp->caps = readl(sdcp->base + MXS_DCP_CAPABILITY1);
														
 
															+
														
 
															+	if (sdcp->caps & MXS_DCP_CAPABILITY1_AES128) {
														
 
															+		ret = crypto_register_algs(dcp_aes_algs,
														
 
															+					   ARRAY_SIZE(dcp_aes_algs));
														
 
															+		if (ret) {
														
 
															+			/* Failed to register algorithm. */
														
 
															+			dev_err(dev, "Failed to register AES crypto!\n");
														
 
															+			goto err_destroy_aes_thread;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA1) {
														
 
															+		ret = crypto_register_ahash(&dcp_sha1_alg);
														
 
															+		if (ret) {
														
 
															+			dev_err(dev, "Failed to register %s hash!\n",
														
 
															+				dcp_sha1_alg.halg.base.cra_name);
														
 
															+			goto err_unregister_aes;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA256) {
														
 
															+		ret = crypto_register_ahash(&dcp_sha256_alg);
														
 
															+		if (ret) {
														
 
															+			dev_err(dev, "Failed to register %s hash!\n",
														
 
															+				dcp_sha256_alg.halg.base.cra_name);
														
 
															+			goto err_unregister_sha1;
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+
														
 
															+err_unregister_sha1:
														
 
															+	if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA1)
														
 
															+		crypto_unregister_ahash(&dcp_sha1_alg);
														
 
															+
														
 
															+err_unregister_aes:
														
 
															+	if (sdcp->caps & MXS_DCP_CAPABILITY1_AES128)
														
 
															+		crypto_unregister_algs(dcp_aes_algs, ARRAY_SIZE(dcp_aes_algs));
														
 
															+
														
 
															+err_destroy_aes_thread:
														
 
															+	kthread_stop(sdcp->thread[DCP_CHAN_CRYPTO]);
														
 
															+
														
 
															+err_destroy_sha_thread:
														
 
															+	kthread_stop(sdcp->thread[DCP_CHAN_HASH_SHA]);
														
 
															+
														
 
															+err_free_coherent:
														
 
															+	kfree(sdcp->coh);
														
 
															+err_mutex:
														
 
															+	mutex_unlock(&global_mutex);
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static int mxs_dcp_remove(struct platform_device *pdev)
														
 
															+{
														
 
															+	struct dcp *sdcp = platform_get_drvdata(pdev);
														
 
															+
														
 
															+	kfree(sdcp->coh);
														
 
															+
														
 
															+	if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA256)
														
 
															+		crypto_unregister_ahash(&dcp_sha256_alg);
														
 
															+
														
 
															+	if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA1)
														
 
															+		crypto_unregister_ahash(&dcp_sha1_alg);
														
 
															+
														
 
															+	if (sdcp->caps & MXS_DCP_CAPABILITY1_AES128)
														
 
															+		crypto_unregister_algs(dcp_aes_algs, ARRAY_SIZE(dcp_aes_algs));
														
 
															+
														
 
															+	kthread_stop(sdcp->thread[DCP_CHAN_HASH_SHA]);
														
 
															+	kthread_stop(sdcp->thread[DCP_CHAN_CRYPTO]);
														
 
															+
														
 
															+	platform_set_drvdata(pdev, NULL);
														
 
															+
														
 
															+	mutex_lock(&global_mutex);
														
 
															+	global_sdcp = NULL;
														
 
															+	mutex_unlock(&global_mutex);
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static const struct of_device_id mxs_dcp_dt_ids[] = {
														
 
															+	{ .compatible = "fsl,imx23-dcp", .data = NULL, },
														
 
															+	{ .compatible = "fsl,imx28-dcp", .data = NULL, },
														
 
															+	{ /* sentinel */ }
														
 
															+};
														
 
															+
														
 
															+MODULE_DEVICE_TABLE(of, mxs_dcp_dt_ids);
														
 
															+
														
 
															+static struct platform_driver mxs_dcp_driver = {
														
 
															+	.probe	= mxs_dcp_probe,
														
 
															+	.remove	= mxs_dcp_remove,
														
 
															+	.driver	= {
														
 
															+		.name		= "mxs-dcp",
														
 
															+		.owner		= THIS_MODULE,
														
 
															+		.of_match_table	= mxs_dcp_dt_ids,
														
 
															+	},
														
 
															+};
														
 
															+
														
 
															+module_platform_driver(mxs_dcp_driver);
														
 
															+
														
 
															+MODULE_AUTHOR("Marek Vasut <marex@denx.de>");
														
 
															+MODULE_DESCRIPTION("Freescale MXS DCP Driver");
														
 
															+MODULE_LICENSE("GPL");
														
 
															+MODULE_ALIAS("platform:mxs-dcp");
														
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -784,6 +784,7 @@ static int omap_aes_ctr_decrypt(struct ablkcipher_request *req)
 
															 static int omap_aes_cra_init(struct crypto_tfm *tfm)
														
 
															 {
														
 
															 	struct omap_aes_dev *dd = NULL;
														
 
															+	int err;
														
 
															 	/* Find AES device, currently picks the first device */
														
 
															 	spin_lock_bh(&list_lock);
														
@@ -792,7 +793,13 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm)
 
															 	}
														
 
															 	spin_unlock_bh(&list_lock);
														
 
															-	pm_runtime_get_sync(dd->dev);
														
 
															+	err = pm_runtime_get_sync(dd->dev);
														
 
															+	if (err < 0) {
														
 
															+		dev_err(dd->dev, "%s: failed to get_sync(%d)\n",
														
 
															+			__func__, err);
														
 
															+		return err;
														
 
															+	}
														
 
															+
														
 
															 	tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx);
														
 
															 	return 0;
														
@@ -1182,7 +1189,12 @@ static int omap_aes_probe(struct platform_device *pdev)
 
															 	dd->phys_base = res.start;
														
 
															 	pm_runtime_enable(dev);
														
 
															-	pm_runtime_get_sync(dev);
														
 
															+	err = pm_runtime_get_sync(dev);
														
 
															+	if (err < 0) {
														
 
															+		dev_err(dev, "%s: failed to get_sync(%d)\n",
														
 
															+			__func__, err);
														
 
															+		goto err_res;
														
 
															+	}
														
 
															 	omap_aes_dma_stop(dd);
														
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -789,10 +789,13 @@ static int omap_sham_update_cpu(struct omap_sham_dev *dd)
 
															 	dev_dbg(dd->dev, "cpu: bufcnt: %u, digcnt: %d, final: %d\n",
														
 
															 		ctx->bufcnt, ctx->digcnt, final);
														
 
															-	bufcnt = ctx->bufcnt;
														
 
															-	ctx->bufcnt = 0;
														
 
															+	if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
														
 
															+		bufcnt = ctx->bufcnt;
														
 
															+		ctx->bufcnt = 0;
														
 
															+		return omap_sham_xmit_cpu(dd, ctx->buffer, bufcnt, final);
														
 
															+	}
														
 
															-	return omap_sham_xmit_cpu(dd, ctx->buffer, bufcnt, final);
														
 
															+	return 0;
														
 
															 }
														
 
															 static int omap_sham_update_dma_stop(struct omap_sham_dev *dd)
														
@@ -1103,6 +1106,9 @@ static int omap_sham_update(struct ahash_request *req)
 
															 		return 0;
														
 
															 	}
														
 
															+	if (dd->polling_mode)
														
 
															+		ctx->flags |= BIT(FLAGS_CPU);
														
 
															+
														
 
															 	return omap_sham_enqueue(req, OP_UPDATE);
														
 
															 }
														
@@ -1970,7 +1976,8 @@ static int omap_sham_probe(struct platform_device *pdev)
 
															 			crypto_unregister_ahash(
														
 
															 					&dd->pdata->algs_info[i].algs_list[j]);
														
 
															 	pm_runtime_disable(dev);
														
 
															-	dma_release_channel(dd->dma_lch);
														
 
															+	if (dd->dma_lch)
														
 
															+		dma_release_channel(dd->dma_lch);
														
 
															 data_err:
														
 
															 	dev_err(dev, "initialization failed.\n");
														
@@ -1994,7 +2001,9 @@ static int omap_sham_remove(struct platform_device *pdev)
 
															 					&dd->pdata->algs_info[i].algs_list[j]);
														
 
															 	tasklet_kill(&dd->done_task);
														
 
															 	pm_runtime_disable(&pdev->dev);
														
 
															-	dma_release_channel(dd->dma_lch);
														
 
															+
														
 
															+	if (dd->dma_lch)
														
 
															+		dma_release_channel(dd->dma_lch);
														
 
															 	return 0;
														
 
															 }
														
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -338,20 +338,29 @@ DEF_TALITOS_DONE(ch1_3, TALITOS_ISR_CH_1_3_DONE)
 
															 static u32 current_desc_hdr(struct device *dev, int ch)
														
 
															 {
														
 
															 	struct talitos_private *priv = dev_get_drvdata(dev);
														
 
															-	int tail = priv->chan[ch].tail;
														
 
															+	int tail, iter;
														
 
															 	dma_addr_t cur_desc;
														
 
															-	cur_desc = in_be32(priv->chan[ch].reg + TALITOS_CDPR_LO);
														
 
															+	cur_desc = ((u64)in_be32(priv->chan[ch].reg + TALITOS_CDPR)) << 32;
														
 
															+	cur_desc |= in_be32(priv->chan[ch].reg + TALITOS_CDPR_LO);
														
 
															-	while (priv->chan[ch].fifo[tail].dma_desc != cur_desc) {
														
 
															-		tail = (tail + 1) & (priv->fifo_len - 1);
														
 
															-		if (tail == priv->chan[ch].tail) {
														
 
															+	if (!cur_desc) {
														
 
															+		dev_err(dev, "CDPR is NULL, giving up search for offending descriptor\n");
														
 
															+		return 0;
														
 
															+	}
														
 
															+
														
 
															+	tail = priv->chan[ch].tail;
														
 
															+
														
 
															+	iter = tail;
														
 
															+	while (priv->chan[ch].fifo[iter].dma_desc != cur_desc) {
														
 
															+		iter = (iter + 1) & (priv->fifo_len - 1);
														
 
															+		if (iter == tail) {
														
 
															 			dev_err(dev, "couldn't locate current descriptor\n");
														
 
															 			return 0;
														
 
															 		}
														
 
															 	}
														
 
															-	return priv->chan[ch].fifo[tail].desc->hdr;
														
 
															+	return priv->chan[ch].fifo[iter].desc->hdr;
														
 
															 }
														
 
															 /*
														
@@ -2486,8 +2495,6 @@ static int talitos_remove(struct platform_device *ofdev)
 
															 	iounmap(priv->reg);
														
 
															-	dev_set_drvdata(dev, NULL);
														
 
															-
														
 
															 	kfree(priv);
														
 
															 	return 0;
														
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -0,0 +1,537 @@
 
															+/*
														
 
															+ * AMD Cryptographic Coprocessor (CCP) driver
														
 
															+ *
														
 
															+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
														
 
															+ *
														
 
															+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2 as
														
 
															+ * published by the Free Software Foundation.
														
 
															+ */
														
 
															+
														
 
															+#ifndef __CPP_H__
														
 
															+#define __CPP_H__
														
 
															+
														
 
															+#include <linux/scatterlist.h>
														
 
															+#include <linux/workqueue.h>
														
 
															+#include <linux/list.h>
														
 
															+#include <crypto/aes.h>
														
 
															+#include <crypto/sha.h>
														
 
															+
														
 
															+
														
 
															+struct ccp_device;
														
 
															+struct ccp_cmd;
														
 
															+
														
 
															+#if defined(CONFIG_CRYPTO_DEV_CCP_DD) || \
														
 
															+	defined(CONFIG_CRYPTO_DEV_CCP_DD_MODULE)
														
 
															+
														
 
															+/**
														
 
															+ * ccp_enqueue_cmd - queue an operation for processing by the CCP
														
 
															+ *
														
 
															+ * @cmd: ccp_cmd struct to be processed
														
 
															+ *
														
 
															+ * Refer to the ccp_cmd struct below for required fields.
														
 
															+ *
														
 
															+ * Queue a cmd to be processed by the CCP. If queueing the cmd
														
 
															+ * would exceed the defined length of the cmd queue the cmd will
														
 
															+ * only be queued if the CCP_CMD_MAY_BACKLOG flag is set and will
														
 
															+ * result in a return code of -EBUSY.
														
 
															+ *
														
 
															+ * The callback routine specified in the ccp_cmd struct will be
														
 
															+ * called to notify the caller of completion (if the cmd was not
														
 
															+ * backlogged) or advancement out of the backlog. If the cmd has
														
 
															+ * advanced out of the backlog the "err" value of the callback
														
 
															+ * will be -EINPROGRESS. Any other "err" value during callback is
														
 
															+ * the result of the operation.
														
 
															+ *
														
 
															+ * The cmd has been successfully queued if:
														
 
															+ *   the return code is -EINPROGRESS or
														
 
															+ *   the return code is -EBUSY and CCP_CMD_MAY_BACKLOG flag is set
														
 
															+ */
														
 
															+int ccp_enqueue_cmd(struct ccp_cmd *cmd);
														
 
															+
														
 
															+#else /* CONFIG_CRYPTO_DEV_CCP_DD is not enabled */
														
 
															+
														
 
															+static inline int ccp_enqueue_cmd(struct ccp_cmd *cmd)
														
 
															+{
														
 
															+	return -ENODEV;
														
 
															+}
														
 
															+
														
 
															+#endif /* CONFIG_CRYPTO_DEV_CCP_DD */
														
 
															+
														
 
															+
														
 
															+/***** AES engine *****/
														
 
															+/**
														
 
															+ * ccp_aes_type - AES key size
														
 
															+ *
														
 
															+ * @CCP_AES_TYPE_128: 128-bit key
														
 
															+ * @CCP_AES_TYPE_192: 192-bit key
														
 
															+ * @CCP_AES_TYPE_256: 256-bit key
														
 
															+ */
														
 
															+enum ccp_aes_type {
														
 
															+	CCP_AES_TYPE_128 = 0,
														
 
															+	CCP_AES_TYPE_192,
														
 
															+	CCP_AES_TYPE_256,
														
 
															+	CCP_AES_TYPE__LAST,
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * ccp_aes_mode - AES operation mode
														
 
															+ *
														
 
															+ * @CCP_AES_MODE_ECB: ECB mode
														
 
															+ * @CCP_AES_MODE_CBC: CBC mode
														
 
															+ * @CCP_AES_MODE_OFB: OFB mode
														
 
															+ * @CCP_AES_MODE_CFB: CFB mode
														
 
															+ * @CCP_AES_MODE_CTR: CTR mode
														
 
															+ * @CCP_AES_MODE_CMAC: CMAC mode
														
 
															+ */
														
 
															+enum ccp_aes_mode {
														
 
															+	CCP_AES_MODE_ECB = 0,
														
 
															+	CCP_AES_MODE_CBC,
														
 
															+	CCP_AES_MODE_OFB,
														
 
															+	CCP_AES_MODE_CFB,
														
 
															+	CCP_AES_MODE_CTR,
														
 
															+	CCP_AES_MODE_CMAC,
														
 
															+	CCP_AES_MODE__LAST,
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * ccp_aes_mode - AES operation mode
														
 
															+ *
														
 
															+ * @CCP_AES_ACTION_DECRYPT: AES decrypt operation
														
 
															+ * @CCP_AES_ACTION_ENCRYPT: AES encrypt operation
														
 
															+ */
														
 
															+enum ccp_aes_action {
														
 
															+	CCP_AES_ACTION_DECRYPT = 0,
														
 
															+	CCP_AES_ACTION_ENCRYPT,
														
 
															+	CCP_AES_ACTION__LAST,
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * struct ccp_aes_engine - CCP AES operation
														
 
															+ * @type: AES operation key size
														
 
															+ * @mode: AES operation mode
														
 
															+ * @action: AES operation (decrypt/encrypt)
														
 
															+ * @key: key to be used for this AES operation
														
 
															+ * @key_len: length in bytes of key
														
 
															+ * @iv: IV to be used for this AES operation
														
 
															+ * @iv_len: length in bytes of iv
														
 
															+ * @src: data to be used for this operation
														
 
															+ * @dst: data produced by this operation
														
 
															+ * @src_len: length in bytes of data used for this operation
														
 
															+ * @cmac_final: indicates final operation when running in CMAC mode
														
 
															+ * @cmac_key: K1/K2 key used in final CMAC operation
														
 
															+ * @cmac_key_len: length in bytes of cmac_key
														
 
															+ *
														
 
															+ * Variables required to be set when calling ccp_enqueue_cmd():
														
 
															+ *   - type, mode, action, key, key_len, src, dst, src_len
														
 
															+ *   - iv, iv_len for any mode other than ECB
														
 
															+ *   - cmac_final for CMAC mode
														
 
															+ *   - cmac_key, cmac_key_len for CMAC mode if cmac_final is non-zero
														
 
															+ *
														
 
															+ * The iv variable is used as both input and output. On completion of the
														
 
															+ * AES operation the new IV overwrites the old IV.
														
 
															+ */
														
 
															+struct ccp_aes_engine {
														
 
															+	enum ccp_aes_type type;
														
 
															+	enum ccp_aes_mode mode;
														
 
															+	enum ccp_aes_action action;
														
 
															+
														
 
															+	struct scatterlist *key;
														
 
															+	u32 key_len;		/* In bytes */
														
 
															+
														
 
															+	struct scatterlist *iv;
														
 
															+	u32 iv_len;		/* In bytes */
														
 
															+
														
 
															+	struct scatterlist *src, *dst;
														
 
															+	u64 src_len;		/* In bytes */
														
 
															+
														
 
															+	u32 cmac_final;		/* Indicates final cmac cmd */
														
 
															+	struct scatterlist *cmac_key;	/* K1/K2 cmac key required for
														
 
															+					 * final cmac cmd */
														
 
															+	u32 cmac_key_len;	/* In bytes */
														
 
															+};
														
 
															+
														
 
															+/***** XTS-AES engine *****/
														
 
															+/**
														
 
															+ * ccp_xts_aes_unit_size - XTS unit size
														
 
															+ *
														
 
															+ * @CCP_XTS_AES_UNIT_SIZE_16: Unit size of 16 bytes
														
 
															+ * @CCP_XTS_AES_UNIT_SIZE_512: Unit size of 512 bytes
														
 
															+ * @CCP_XTS_AES_UNIT_SIZE_1024: Unit size of 1024 bytes
														
 
															+ * @CCP_XTS_AES_UNIT_SIZE_2048: Unit size of 2048 bytes
														
 
															+ * @CCP_XTS_AES_UNIT_SIZE_4096: Unit size of 4096 bytes
														
 
															+ */
														
 
															+enum ccp_xts_aes_unit_size {
														
 
															+	CCP_XTS_AES_UNIT_SIZE_16 = 0,
														
 
															+	CCP_XTS_AES_UNIT_SIZE_512,
														
 
															+	CCP_XTS_AES_UNIT_SIZE_1024,
														
 
															+	CCP_XTS_AES_UNIT_SIZE_2048,
														
 
															+	CCP_XTS_AES_UNIT_SIZE_4096,
														
 
															+	CCP_XTS_AES_UNIT_SIZE__LAST,
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * struct ccp_xts_aes_engine - CCP XTS AES operation
														
 
															+ * @action: AES operation (decrypt/encrypt)
														
 
															+ * @unit_size: unit size of the XTS operation
														
 
															+ * @key: key to be used for this XTS AES operation
														
 
															+ * @key_len: length in bytes of key
														
 
															+ * @iv: IV to be used for this XTS AES operation
														
 
															+ * @iv_len: length in bytes of iv
														
 
															+ * @src: data to be used for this operation
														
 
															+ * @dst: data produced by this operation
														
 
															+ * @src_len: length in bytes of data used for this operation
														
 
															+ * @final: indicates final XTS operation
														
 
															+ *
														
 
															+ * Variables required to be set when calling ccp_enqueue_cmd():
														
 
															+ *   - action, unit_size, key, key_len, iv, iv_len, src, dst, src_len, final
														
 
															+ *
														
 
															+ * The iv variable is used as both input and output. On completion of the
														
 
															+ * AES operation the new IV overwrites the old IV.
														
 
															+ */
														
 
															+struct ccp_xts_aes_engine {
														
 
															+	enum ccp_aes_action action;
														
 
															+	enum ccp_xts_aes_unit_size unit_size;
														
 
															+
														
 
															+	struct scatterlist *key;
														
 
															+	u32 key_len;		/* In bytes */
														
 
															+
														
 
															+	struct scatterlist *iv;
														
 
															+	u32 iv_len;		/* In bytes */
														
 
															+
														
 
															+	struct scatterlist *src, *dst;
														
 
															+	u64 src_len;		/* In bytes */
														
 
															+
														
 
															+	u32 final;
														
 
															+};
														
 
															+
														
 
															+/***** SHA engine *****/
														
 
															+#define CCP_SHA_BLOCKSIZE               SHA256_BLOCK_SIZE
														
 
															+#define CCP_SHA_CTXSIZE                 SHA256_DIGEST_SIZE
														
 
															+
														
 
															+/**
														
 
															+ * ccp_sha_type - type of SHA operation
														
 
															+ *
														
 
															+ * @CCP_SHA_TYPE_1: SHA-1 operation
														
 
															+ * @CCP_SHA_TYPE_224: SHA-224 operation
														
 
															+ * @CCP_SHA_TYPE_256: SHA-256 operation
														
 
															+ */
														
 
															+enum ccp_sha_type {
														
 
															+	CCP_SHA_TYPE_1 = 1,
														
 
															+	CCP_SHA_TYPE_224,
														
 
															+	CCP_SHA_TYPE_256,
														
 
															+	CCP_SHA_TYPE__LAST,
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * struct ccp_sha_engine - CCP SHA operation
														
 
															+ * @type: Type of SHA operation
														
 
															+ * @ctx: current hash value
														
 
															+ * @ctx_len: length in bytes of hash value
														
 
															+ * @src: data to be used for this operation
														
 
															+ * @src_len: length in bytes of data used for this operation
														
 
															+ * @final: indicates final SHA operation
														
 
															+ * @msg_bits: total length of the message in bits used in final SHA operation
														
 
															+ *
														
 
															+ * Variables required to be set when calling ccp_enqueue_cmd():
														
 
															+ *   - type, ctx, ctx_len, src, src_len, final
														
 
															+ *   - msg_bits if final is non-zero
														
 
															+ *
														
 
															+ * The ctx variable is used as both input and output. On completion of the
														
 
															+ * SHA operation the new hash value overwrites the old hash value.
														
 
															+ */
														
 
															+struct ccp_sha_engine {
														
 
															+	enum ccp_sha_type type;
														
 
															+
														
 
															+	struct scatterlist *ctx;
														
 
															+	u32 ctx_len;		/* In bytes */
														
 
															+
														
 
															+	struct scatterlist *src;
														
 
															+	u64 src_len;		/* In bytes */
														
 
															+
														
 
															+	u32 final;		/* Indicates final sha cmd */
														
 
															+	u64 msg_bits;		/* Message length in bits required for
														
 
															+				 * final sha cmd */
														
 
															+};
														
 
															+
														
 
															+/***** RSA engine *****/
														
 
															+/**
														
 
															+ * struct ccp_rsa_engine - CCP RSA operation
														
 
															+ * @key_size: length in bits of RSA key
														
 
															+ * @exp: RSA exponent
														
 
															+ * @exp_len: length in bytes of exponent
														
 
															+ * @mod: RSA modulus
														
 
															+ * @mod_len: length in bytes of modulus
														
 
															+ * @src: data to be used for this operation
														
 
															+ * @dst: data produced by this operation
														
 
															+ * @src_len: length in bytes of data used for this operation
														
 
															+ *
														
 
															+ * Variables required to be set when calling ccp_enqueue_cmd():
														
 
															+ *   - key_size, exp, exp_len, mod, mod_len, src, dst, src_len
														
 
															+ */
														
 
															+struct ccp_rsa_engine {
														
 
															+	u32 key_size;		/* In bits */
														
 
															+
														
 
															+	struct scatterlist *exp;
														
 
															+	u32 exp_len;		/* In bytes */
														
 
															+
														
 
															+	struct scatterlist *mod;
														
 
															+	u32 mod_len;		/* In bytes */
														
 
															+
														
 
															+	struct scatterlist *src, *dst;
														
 
															+	u32 src_len;		/* In bytes */
														
 
															+};
														
 
															+
														
 
															+/***** Passthru engine *****/
														
 
															+/**
														
 
															+ * ccp_passthru_bitwise - type of bitwise passthru operation
														
 
															+ *
														
 
															+ * @CCP_PASSTHRU_BITWISE_NOOP: no bitwise operation performed
														
 
															+ * @CCP_PASSTHRU_BITWISE_AND: perform bitwise AND of src with mask
														
 
															+ * @CCP_PASSTHRU_BITWISE_OR: perform bitwise OR of src with mask
														
 
															+ * @CCP_PASSTHRU_BITWISE_XOR: perform bitwise XOR of src with mask
														
 
															+ * @CCP_PASSTHRU_BITWISE_MASK: overwrite with mask
														
 
															+ */
														
 
															+enum ccp_passthru_bitwise {
														
 
															+	CCP_PASSTHRU_BITWISE_NOOP = 0,
														
 
															+	CCP_PASSTHRU_BITWISE_AND,
														
 
															+	CCP_PASSTHRU_BITWISE_OR,
														
 
															+	CCP_PASSTHRU_BITWISE_XOR,
														
 
															+	CCP_PASSTHRU_BITWISE_MASK,
														
 
															+	CCP_PASSTHRU_BITWISE__LAST,
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * ccp_passthru_byteswap - type of byteswap passthru operation
														
 
															+ *
														
 
															+ * @CCP_PASSTHRU_BYTESWAP_NOOP: no byte swapping performed
														
 
															+ * @CCP_PASSTHRU_BYTESWAP_32BIT: swap bytes within 32-bit words
														
 
															+ * @CCP_PASSTHRU_BYTESWAP_256BIT: swap bytes within 256-bit words
														
 
															+ */
														
 
															+enum ccp_passthru_byteswap {
														
 
															+	CCP_PASSTHRU_BYTESWAP_NOOP = 0,
														
 
															+	CCP_PASSTHRU_BYTESWAP_32BIT,
														
 
															+	CCP_PASSTHRU_BYTESWAP_256BIT,
														
 
															+	CCP_PASSTHRU_BYTESWAP__LAST,
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * struct ccp_passthru_engine - CCP pass-through operation
														
 
															+ * @bit_mod: bitwise operation to perform
														
 
															+ * @byte_swap: byteswap operation to perform
														
 
															+ * @mask: mask to be applied to data
														
 
															+ * @mask_len: length in bytes of mask
														
 
															+ * @src: data to be used for this operation
														
 
															+ * @dst: data produced by this operation
														
 
															+ * @src_len: length in bytes of data used for this operation
														
 
															+ * @final: indicate final pass-through operation
														
 
															+ *
														
 
															+ * Variables required to be set when calling ccp_enqueue_cmd():
														
 
															+ *   - bit_mod, byte_swap, src, dst, src_len
														
 
															+ *   - mask, mask_len if bit_mod is not CCP_PASSTHRU_BITWISE_NOOP
														
 
															+ */
														
 
															+struct ccp_passthru_engine {
														
 
															+	enum ccp_passthru_bitwise bit_mod;
														
 
															+	enum ccp_passthru_byteswap byte_swap;
														
 
															+
														
 
															+	struct scatterlist *mask;
														
 
															+	u32 mask_len;		/* In bytes */
														
 
															+
														
 
															+	struct scatterlist *src, *dst;
														
 
															+	u64 src_len;		/* In bytes */
														
 
															+
														
 
															+	u32 final;
														
 
															+};
														
 
															+
														
 
															+/***** ECC engine *****/
														
 
															+#define CCP_ECC_MODULUS_BYTES	48	/* 384-bits */
														
 
															+#define CCP_ECC_MAX_OPERANDS	6
														
 
															+#define CCP_ECC_MAX_OUTPUTS	3
														
 
															+
														
 
															+/**
														
 
															+ * ccp_ecc_function - type of ECC function
														
 
															+ *
														
 
															+ * @CCP_ECC_FUNCTION_MMUL_384BIT: 384-bit modular multiplication
														
 
															+ * @CCP_ECC_FUNCTION_MADD_384BIT: 384-bit modular addition
														
 
															+ * @CCP_ECC_FUNCTION_MINV_384BIT: 384-bit multiplicative inverse
														
 
															+ * @CCP_ECC_FUNCTION_PADD_384BIT: 384-bit point addition
														
 
															+ * @CCP_ECC_FUNCTION_PMUL_384BIT: 384-bit point multiplication
														
 
															+ * @CCP_ECC_FUNCTION_PDBL_384BIT: 384-bit point doubling
														
 
															+ */
														
 
															+enum ccp_ecc_function {
														
 
															+	CCP_ECC_FUNCTION_MMUL_384BIT = 0,
														
 
															+	CCP_ECC_FUNCTION_MADD_384BIT,
														
 
															+	CCP_ECC_FUNCTION_MINV_384BIT,
														
 
															+	CCP_ECC_FUNCTION_PADD_384BIT,
														
 
															+	CCP_ECC_FUNCTION_PMUL_384BIT,
														
 
															+	CCP_ECC_FUNCTION_PDBL_384BIT,
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * struct ccp_ecc_modular_math - CCP ECC modular math parameters
														
 
															+ * @operand_1: first operand for the modular math operation
														
 
															+ * @operand_1_len: length of the first operand
														
 
															+ * @operand_2: second operand for the modular math operation
														
 
															+ *	       (not used for CCP_ECC_FUNCTION_MINV_384BIT)
														
 
															+ * @operand_2_len: length of the second operand
														
 
															+ *	       (not used for CCP_ECC_FUNCTION_MINV_384BIT)
														
 
															+ * @result: result of the modular math operation
														
 
															+ * @result_len: length of the supplied result buffer
														
 
															+ */
														
 
															+struct ccp_ecc_modular_math {
														
 
															+	struct scatterlist *operand_1;
														
 
															+	unsigned int operand_1_len;	/* In bytes */
														
 
															+
														
 
															+	struct scatterlist *operand_2;
														
 
															+	unsigned int operand_2_len;	/* In bytes */
														
 
															+
														
 
															+	struct scatterlist *result;
														
 
															+	unsigned int result_len;	/* In bytes */
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * struct ccp_ecc_point - CCP ECC point definition
														
 
															+ * @x: the x coordinate of the ECC point
														
 
															+ * @x_len: the length of the x coordinate
														
 
															+ * @y: the y coordinate of the ECC point
														
 
															+ * @y_len: the length of the y coordinate
														
 
															+ */
														
 
															+struct ccp_ecc_point {
														
 
															+	struct scatterlist *x;
														
 
															+	unsigned int x_len;	/* In bytes */
														
 
															+
														
 
															+	struct scatterlist *y;
														
 
															+	unsigned int y_len;	/* In bytes */
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * struct ccp_ecc_point_math - CCP ECC point math parameters
														
 
															+ * @point_1: the first point of the ECC point math operation
														
 
															+ * @point_2: the second point of the ECC point math operation
														
 
															+ *	     (only used for CCP_ECC_FUNCTION_PADD_384BIT)
														
 
															+ * @domain_a: the a parameter of the ECC curve
														
 
															+ * @domain_a_len: the length of the a parameter
														
 
															+ * @scalar: the scalar parameter for the point match operation
														
 
															+ *	    (only used for CCP_ECC_FUNCTION_PMUL_384BIT)
														
 
															+ * @scalar_len: the length of the scalar parameter
														
 
															+ *		(only used for CCP_ECC_FUNCTION_PMUL_384BIT)
														
 
															+ * @result: the point resulting from the point math operation
														
 
															+ */
														
 
															+struct ccp_ecc_point_math {
														
 
															+	struct ccp_ecc_point point_1;
														
 
															+	struct ccp_ecc_point point_2;
														
 
															+
														
 
															+	struct scatterlist *domain_a;
														
 
															+	unsigned int domain_a_len;	/* In bytes */
														
 
															+
														
 
															+	struct scatterlist *scalar;
														
 
															+	unsigned int scalar_len;	/* In bytes */
														
 
															+
														
 
															+	struct ccp_ecc_point result;
														
 
															+};
														
 
															+
														
 
															+/**
														
 
															+ * struct ccp_ecc_engine - CCP ECC operation
														
 
															+ * @function: ECC function to perform
														
 
															+ * @mod: ECC modulus
														
 
															+ * @mod_len: length in bytes of modulus
														
 
															+ * @mm: module math parameters
														
 
															+ * @pm: point math parameters
														
 
															+ * @ecc_result: result of the ECC operation
														
 
															+ *
														
 
															+ * Variables required to be set when calling ccp_enqueue_cmd():
														
 
															+ *   - function, mod, mod_len
														
 
															+ *   - operand, operand_len, operand_count, output, output_len, output_count
														
 
															+ *   - ecc_result
														
 
															+ */
														
 
															+struct ccp_ecc_engine {
														
 
															+	enum ccp_ecc_function function;
														
 
															+
														
 
															+	struct scatterlist *mod;
														
 
															+	u32 mod_len;		/* In bytes */
														
 
															+
														
 
															+	union {
														
 
															+		struct ccp_ecc_modular_math mm;
														
 
															+		struct ccp_ecc_point_math pm;
														
 
															+	} u;
														
 
															+
														
 
															+	u16 ecc_result;
														
 
															+};
														
 
															+
														
 
															+
														
 
															+/**
														
 
															+ * ccp_engine - CCP operation identifiers
														
 
															+ *
														
 
															+ * @CCP_ENGINE_AES: AES operation
														
 
															+ * @CCP_ENGINE_XTS_AES: 128-bit XTS AES operation
														
 
															+ * @CCP_ENGINE_RSVD1: unused
														
 
															+ * @CCP_ENGINE_SHA: SHA operation
														
 
															+ * @CCP_ENGINE_RSA: RSA operation
														
 
															+ * @CCP_ENGINE_PASSTHRU: pass-through operation
														
 
															+ * @CCP_ENGINE_ZLIB_DECOMPRESS: unused
														
 
															+ * @CCP_ENGINE_ECC: ECC operation
														
 
															+ */
														
 
															+enum ccp_engine {
														
 
															+	CCP_ENGINE_AES = 0,
														
 
															+	CCP_ENGINE_XTS_AES_128,
														
 
															+	CCP_ENGINE_RSVD1,
														
 
															+	CCP_ENGINE_SHA,
														
 
															+	CCP_ENGINE_RSA,
														
 
															+	CCP_ENGINE_PASSTHRU,
														
 
															+	CCP_ENGINE_ZLIB_DECOMPRESS,
														
 
															+	CCP_ENGINE_ECC,
														
 
															+	CCP_ENGINE__LAST,
														
 
															+};
														
 
															+
														
 
															+/* Flag values for flags member of ccp_cmd */
														
 
															+#define CCP_CMD_MAY_BACKLOG	0x00000001
														
 
															+
														
 
															+/**
														
 
															+ * struct ccp_cmd - CPP operation request
														
 
															+ * @entry: list element (ccp driver use only)
														
 
															+ * @work: work element used for callbacks (ccp driver use only)
														
 
															+ * @ccp: CCP device to be run on (ccp driver use only)
														
 
															+ * @ret: operation return code (ccp driver use only)
														
 
															+ * @flags: cmd processing flags
														
 
															+ * @engine: CCP operation to perform
														
 
															+ * @engine_error: CCP engine return code
														
 
															+ * @u: engine specific structures, refer to specific engine struct below
														
 
															+ * @callback: operation completion callback function
														
 
															+ * @data: parameter value to be supplied to the callback function
														
 
															+ *
														
 
															+ * Variables required to be set when calling ccp_enqueue_cmd():
														
 
															+ *   - engine, callback
														
 
															+ *   - See the operation structures below for what is required for each
														
 
															+ *     operation.
														
 
															+ */
														
 
															+struct ccp_cmd {
														
 
															+	/* The list_head, work_struct, ccp and ret variables are for use
														
 
															+	 * by the CCP driver only.
														
 
															+	 */
														
 
															+	struct list_head entry;
														
 
															+	struct work_struct work;
														
 
															+	struct ccp_device *ccp;
														
 
															+	int ret;
														
 
															+
														
 
															+	u32 flags;
														
 
															+
														
 
															+	enum ccp_engine engine;
														
 
															+	u32 engine_error;
														
 
															+
														
 
															+	union {
														
 
															+		struct ccp_aes_engine aes;
														
 
															+		struct ccp_xts_aes_engine xts;
														
 
															+		struct ccp_sha_engine sha;
														
 
															+		struct ccp_rsa_engine rsa;
														
 
															+		struct ccp_passthru_engine passthru;
														
 
															+		struct ccp_ecc_engine ecc;
														
 
															+	} u;
														
 
															+
														
 
															+	/* Completion callback support */
														
 
															+	void (*callback)(void *data, int err);
														
 
															+	void *data;
														
 
															+};
														
 
															+
														
 
															+#endif
														
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -37,6 +37,9 @@
 
															     __asm__ ("" : "=r"(__ptr) : "0"(ptr));		\
														
 
															     (typeof(ptr)) (__ptr + (off)); })
														
 
															+/* Make the optimizer believe the variable can be manipulated arbitrarily. */
														
 
															+#define OPTIMIZER_HIDE_VAR(var) __asm__ ("" : "=r" (var) : "0" (var))
														
 
															+
														
 
															 #ifdef __CHECKER__
														
 
															 #define __must_be_array(arr) 0
														
 
															 #else
														
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -15,6 +15,7 @@
 
															  */
														
 
															 #undef barrier
														
 
															 #undef RELOC_HIDE
														
 
															+#undef OPTIMIZER_HIDE_VAR
														
 
															 #define barrier() __memory_barrier()
														
@@ -23,6 +24,12 @@
 
															      __ptr = (unsigned long) (ptr);				\
														
 
															     (typeof(ptr)) (__ptr + (off)); })
														
 
															+/* This should act as an optimization barrier on var.
														
 
															+ * Given that this compiler does not have inline assembly, a compiler barrier
														
 
															+ * is the best we can do.
														
 
															+ */
														
 
															+#define OPTIMIZER_HIDE_VAR(var) barrier()
														
 
															+
														
 
															 /* Intel ECC compiler doesn't support __builtin_types_compatible_p() */
														
 
															 #define __must_be_array(a) 0
														
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -170,6 +170,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 
															     (typeof(ptr)) (__ptr + (off)); })
														
 
															 #endif
														
 
															+#ifndef OPTIMIZER_HIDE_VAR
														
 
															+#define OPTIMIZER_HIDE_VAR(var) barrier()
														
 
															+#endif
														
 
															+
														
 
															 /* Not-quite-unique ID. */
														
 
															 #ifndef __UNIQUE_ID
														
 
															 # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
														
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -112,7 +112,7 @@ int padata_do_parallel(struct padata_instance *pinst,
 
															 	rcu_read_lock_bh();
														
 
															-	pd = rcu_dereference(pinst->pd);
														
 
															+	pd = rcu_dereference_bh(pinst->pd);
														
 
															 	err = -EINVAL;
														
 
															 	if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)