7 lat temu · b20209c91e
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -256,6 +256,37 @@ aad_shift_arr:
 
				 	pxor      \TMP1, \GH            # result is in TMP1
			
 
				 .endm
			
 
				 
			
 
				+# Reads DLEN bytes starting at DPTR and stores in XMMDst
			
 
				+# where 0 < DLEN < 16
			
 
				+# Clobbers %rax, DLEN and XMM1
			
 
				+.macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst
			
 
				+        cmp $8, \DLEN
			
 
				+        jl _read_lt8_\@
			
 
				+        mov (\DPTR), %rax
			
 
				+        MOVQ_R64_XMM %rax, \XMMDst
			
 
				+        sub $8, \DLEN
			
 
				+        jz _done_read_partial_block_\@
			
 
				+	xor %eax, %eax
			
 
				+_read_next_byte_\@:
			
 
				+        shl $8, %rax
			
 
				+        mov 7(\DPTR, \DLEN, 1), %al
			
 
				+        dec \DLEN
			
 
				+        jnz _read_next_byte_\@
			
 
				+        MOVQ_R64_XMM %rax, \XMM1
			
 
				+	pslldq $8, \XMM1
			
 
				+        por \XMM1, \XMMDst
			
 
				+	jmp _done_read_partial_block_\@
			
 
				+_read_lt8_\@:
			
 
				+	xor %eax, %eax
			
 
				+_read_next_byte_lt8_\@:
			
 
				+        shl $8, %rax
			
 
				+        mov -1(\DPTR, \DLEN, 1), %al
			
 
				+        dec \DLEN
			
 
				+        jnz _read_next_byte_lt8_\@
			
 
				+        MOVQ_R64_XMM %rax, \XMMDst
			
 
				+_done_read_partial_block_\@:
			
 
				+.endm
			
 
				+
			
 
				 /*
			
 
				 * if a = number of total plaintext bytes
			
 
				 * b = floor(a/16)
			
@@ -1385,14 +1416,6 @@ _esb_loop_\@:
 
				 *
			
 
				 *                        AAD Format with 64-bit Extended Sequence Number
			
 
				 *
			
 
				-* aadLen:
			
 
				-*       from the definition of the spec, aadLen can only be 8 or 12 bytes.
			
 
				-*       The code supports 16 too but for other sizes, the code will fail.
			
 
				-*
			
 
				-* TLen:
			
 
				-*       from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
			
 
				-*       For other sizes, the code will fail.
			
 
				-*
			
 
				 * poly = x^128 + x^127 + x^126 + x^121 + 1
			
 
				 *
			
 
				 *****************************************************************************/
			
@@ -1486,19 +1509,16 @@ _zero_cipher_left_decrypt:
 
				 	PSHUFB_XMM %xmm10, %xmm0
			
 
				 
			
 
				 	ENCRYPT_SINGLE_BLOCK  %xmm0, %xmm1    # E(K, Yn)
			
 
				-	sub $16, %r11
			
 
				-	add %r13, %r11
			
 
				-	movdqu (%arg3,%r11,1), %xmm1   # receive the last <16 byte block
			
 
				-	lea SHIFT_MASK+16(%rip), %r12
			
 
				-	sub %r13, %r12
			
 
				-# adjust the shuffle mask pointer to be able to shift 16-%r13 bytes
			
 
				-# (%r13 is the number of bytes in plaintext mod 16)
			
 
				-	movdqu (%r12), %xmm2           # get the appropriate shuffle mask
			
 
				-	PSHUFB_XMM %xmm2, %xmm1            # right shift 16-%r13 butes
			
 
				 
			
 
				+	lea (%arg3,%r11,1), %r10
			
 
				+	mov %r13, %r12
			
 
				+	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
			
 
				+
			
 
				+	lea ALL_F+16(%rip), %r12
			
 
				+	sub %r13, %r12
			
 
				 	movdqa  %xmm1, %xmm2
			
 
				 	pxor %xmm1, %xmm0            # Ciphertext XOR E(K, Yn)
			
 
				-	movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
			
 
				+	movdqu (%r12), %xmm1
			
 
				 	# get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
			
 
				 	pand %xmm1, %xmm0            # mask out top 16-%r13 bytes of %xmm0
			
 
				 	pand    %xmm1, %xmm2
			
@@ -1507,9 +1527,6 @@ _zero_cipher_left_decrypt:
 
				 
			
 
				 	pxor %xmm2, %xmm8
			
 
				 	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
			
 
				-	          # GHASH computation for the last <16 byte block
			
 
				-	sub %r13, %r11
			
 
				-	add $16, %r11
			
 
				 
			
 
				         # output %r13 bytes
			
 
				 	MOVQ_R64_XMM	%xmm0, %rax
			
@@ -1663,14 +1680,6 @@ ENDPROC(aesni_gcm_dec)
 
				 *
			
 
				 *                         AAD Format with 64-bit Extended Sequence Number
			
 
				 *
			
 
				-* aadLen:
			
 
				-*       from the definition of the spec, aadLen can only be 8 or 12 bytes.
			
 
				-*       The code supports 16 too but for other sizes, the code will fail.
			
 
				-*
			
 
				-* TLen:
			
 
				-*       from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
			
 
				-*       For other sizes, the code will fail.
			
 
				-*
			
 
				 * poly = x^128 + x^127 + x^126 + x^121 + 1
			
 
				 ***************************************************************************/
			
 
				 ENTRY(aesni_gcm_enc)
			
@@ -1763,19 +1772,16 @@ _zero_cipher_left_encrypt:
 
				         movdqa SHUF_MASK(%rip), %xmm10
			
 
				 	PSHUFB_XMM %xmm10, %xmm0
			
 
				 
			
 
				-
			
 
				 	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm1        # Encrypt(K, Yn)
			
 
				-	sub $16, %r11
			
 
				-	add %r13, %r11
			
 
				-	movdqu (%arg3,%r11,1), %xmm1     # receive the last <16 byte blocks
			
 
				-	lea SHIFT_MASK+16(%rip), %r12
			
 
				+
			
 
				+	lea (%arg3,%r11,1), %r10
			
 
				+	mov %r13, %r12
			
 
				+	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
			
 
				+
			
 
				+	lea ALL_F+16(%rip), %r12
			
 
				 	sub %r13, %r12
			
 
				-	# adjust the shuffle mask pointer to be able to shift 16-r13 bytes
			
 
				-	# (%r13 is the number of bytes in plaintext mod 16)
			
 
				-	movdqu	(%r12), %xmm2           # get the appropriate shuffle mask
			
 
				-	PSHUFB_XMM	%xmm2, %xmm1            # shift right 16-r13 byte
			
 
				 	pxor	%xmm1, %xmm0            # Plaintext XOR Encrypt(K, Yn)
			
 
				-	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
			
 
				+	movdqu	(%r12), %xmm1
			
 
				 	# get the appropriate mask to mask out top 16-r13 bytes of xmm0
			
 
				 	pand	%xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
			
 
				         movdqa SHUF_MASK(%rip), %xmm10
			
@@ -1784,9 +1790,6 @@ _zero_cipher_left_encrypt:
 
				 	pxor	%xmm0, %xmm8
			
 
				 	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
			
 
				 	# GHASH computation for the last <16 byte block
			
 
				-	sub	%r13, %r11
			
 
				-	add	$16, %r11
			
 
				-
			
 
				 	movdqa SHUF_MASK(%rip), %xmm10
			
 
				 	PSHUFB_XMM %xmm10, %xmm0