|
@@ -223,34 +223,34 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
|
|
pcmpeqd TWOONE(%rip), \TMP2
|
|
|
pand POLY(%rip), \TMP2
|
|
|
pxor \TMP2, \TMP3
|
|
|
- movdqa \TMP3, HashKey(%arg2)
|
|
|
+ movdqu \TMP3, HashKey(%arg2)
|
|
|
|
|
|
movdqa \TMP3, \TMP5
|
|
|
pshufd $78, \TMP3, \TMP1
|
|
|
pxor \TMP3, \TMP1
|
|
|
- movdqa \TMP1, HashKey_k(%arg2)
|
|
|
+ movdqu \TMP1, HashKey_k(%arg2)
|
|
|
|
|
|
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
|
|
|
# TMP5 = HashKey^2<<1 (mod poly)
|
|
|
- movdqa \TMP5, HashKey_2(%arg2)
|
|
|
+ movdqu \TMP5, HashKey_2(%arg2)
|
|
|
# HashKey_2 = HashKey^2<<1 (mod poly)
|
|
|
pshufd $78, \TMP5, \TMP1
|
|
|
pxor \TMP5, \TMP1
|
|
|
- movdqa \TMP1, HashKey_2_k(%arg2)
|
|
|
+ movdqu \TMP1, HashKey_2_k(%arg2)
|
|
|
|
|
|
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
|
|
|
# TMP5 = HashKey^3<<1 (mod poly)
|
|
|
- movdqa \TMP5, HashKey_3(%arg2)
|
|
|
+ movdqu \TMP5, HashKey_3(%arg2)
|
|
|
pshufd $78, \TMP5, \TMP1
|
|
|
pxor \TMP5, \TMP1
|
|
|
- movdqa \TMP1, HashKey_3_k(%arg2)
|
|
|
+ movdqu \TMP1, HashKey_3_k(%arg2)
|
|
|
|
|
|
GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
|
|
|
# TMP5 = HashKey^3<<1 (mod poly)
|
|
|
- movdqa \TMP5, HashKey_4(%arg2)
|
|
|
+ movdqu \TMP5, HashKey_4(%arg2)
|
|
|
pshufd $78, \TMP5, \TMP1
|
|
|
pxor \TMP5, \TMP1
|
|
|
- movdqa \TMP1, HashKey_4_k(%arg2)
|
|
|
+ movdqu \TMP1, HashKey_4_k(%arg2)
|
|
|
.endm
|
|
|
|
|
|
# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
|
|
@@ -271,7 +271,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
|
|
movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
|
|
|
|
|
|
PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
|
|
|
- movdqa HashKey(%arg2), %xmm13
|
|
|
+ movdqu HashKey(%arg2), %xmm13
|
|
|
|
|
|
CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
|
|
|
%xmm4, %xmm5, %xmm6
|
|
@@ -997,7 +997,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|
|
pshufd $78, \XMM5, \TMP6
|
|
|
pxor \XMM5, \TMP6
|
|
|
paddd ONE(%rip), \XMM0 # INCR CNT
|
|
|
- movdqa HashKey_4(%arg2), \TMP5
|
|
|
+ movdqu HashKey_4(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
|
|
|
movdqa \XMM0, \XMM1
|
|
|
paddd ONE(%rip), \XMM0 # INCR CNT
|
|
@@ -1016,7 +1016,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|
|
pxor (%arg1), \XMM2
|
|
|
pxor (%arg1), \XMM3
|
|
|
pxor (%arg1), \XMM4
|
|
|
- movdqa HashKey_4_k(%arg2), \TMP5
|
|
|
+ movdqu HashKey_4_k(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
|
|
|
movaps 0x10(%arg1), \TMP1
|
|
|
AESENC \TMP1, \XMM1 # Round 1
|
|
@@ -1031,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|
|
movdqa \XMM6, \TMP1
|
|
|
pshufd $78, \XMM6, \TMP2
|
|
|
pxor \XMM6, \TMP2
|
|
|
- movdqa HashKey_3(%arg2), \TMP5
|
|
|
+ movdqu HashKey_3(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
|
|
|
movaps 0x30(%arg1), \TMP3
|
|
|
AESENC \TMP3, \XMM1 # Round 3
|
|
@@ -1044,7 +1044,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|
|
AESENC \TMP3, \XMM2
|
|
|
AESENC \TMP3, \XMM3
|
|
|
AESENC \TMP3, \XMM4
|
|
|
- movdqa HashKey_3_k(%arg2), \TMP5
|
|
|
+ movdqu HashKey_3_k(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
|
|
movaps 0x50(%arg1), \TMP3
|
|
|
AESENC \TMP3, \XMM1 # Round 5
|
|
@@ -1058,7 +1058,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|
|
movdqa \XMM7, \TMP1
|
|
|
pshufd $78, \XMM7, \TMP2
|
|
|
pxor \XMM7, \TMP2
|
|
|
- movdqa HashKey_2(%arg2), \TMP5
|
|
|
+ movdqu HashKey_2(%arg2), \TMP5
|
|
|
|
|
|
# Multiply TMP5 * HashKey using karatsuba
|
|
|
|
|
@@ -1074,7 +1074,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|
|
AESENC \TMP3, \XMM2
|
|
|
AESENC \TMP3, \XMM3
|
|
|
AESENC \TMP3, \XMM4
|
|
|
- movdqa HashKey_2_k(%arg2), \TMP5
|
|
|
+ movdqu HashKey_2_k(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
|
|
movaps 0x80(%arg1), \TMP3
|
|
|
AESENC \TMP3, \XMM1 # Round 8
|
|
@@ -1092,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|
|
movdqa \XMM8, \TMP1
|
|
|
pshufd $78, \XMM8, \TMP2
|
|
|
pxor \XMM8, \TMP2
|
|
|
- movdqa HashKey(%arg2), \TMP5
|
|
|
+ movdqu HashKey(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
|
|
movaps 0x90(%arg1), \TMP3
|
|
|
AESENC \TMP3, \XMM1 # Round 9
|
|
@@ -1121,7 +1121,7 @@ aes_loop_par_enc_done\@:
|
|
|
AESENCLAST \TMP3, \XMM2
|
|
|
AESENCLAST \TMP3, \XMM3
|
|
|
AESENCLAST \TMP3, \XMM4
|
|
|
- movdqa HashKey_k(%arg2), \TMP5
|
|
|
+ movdqu HashKey_k(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
|
|
movdqu (%arg4,%r11,1), \TMP3
|
|
|
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
|
|
@@ -1205,7 +1205,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|
|
pshufd $78, \XMM5, \TMP6
|
|
|
pxor \XMM5, \TMP6
|
|
|
paddd ONE(%rip), \XMM0 # INCR CNT
|
|
|
- movdqa HashKey_4(%arg2), \TMP5
|
|
|
+ movdqu HashKey_4(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
|
|
|
movdqa \XMM0, \XMM1
|
|
|
paddd ONE(%rip), \XMM0 # INCR CNT
|
|
@@ -1224,7 +1224,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|
|
pxor (%arg1), \XMM2
|
|
|
pxor (%arg1), \XMM3
|
|
|
pxor (%arg1), \XMM4
|
|
|
- movdqa HashKey_4_k(%arg2), \TMP5
|
|
|
+ movdqu HashKey_4_k(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
|
|
|
movaps 0x10(%arg1), \TMP1
|
|
|
AESENC \TMP1, \XMM1 # Round 1
|
|
@@ -1239,7 +1239,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|
|
movdqa \XMM6, \TMP1
|
|
|
pshufd $78, \XMM6, \TMP2
|
|
|
pxor \XMM6, \TMP2
|
|
|
- movdqa HashKey_3(%arg2), \TMP5
|
|
|
+ movdqu HashKey_3(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
|
|
|
movaps 0x30(%arg1), \TMP3
|
|
|
AESENC \TMP3, \XMM1 # Round 3
|
|
@@ -1252,7 +1252,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|
|
AESENC \TMP3, \XMM2
|
|
|
AESENC \TMP3, \XMM3
|
|
|
AESENC \TMP3, \XMM4
|
|
|
- movdqa HashKey_3_k(%arg2), \TMP5
|
|
|
+ movdqu HashKey_3_k(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
|
|
movaps 0x50(%arg1), \TMP3
|
|
|
AESENC \TMP3, \XMM1 # Round 5
|
|
@@ -1266,7 +1266,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|
|
movdqa \XMM7, \TMP1
|
|
|
pshufd $78, \XMM7, \TMP2
|
|
|
pxor \XMM7, \TMP2
|
|
|
- movdqa HashKey_2(%arg2), \TMP5
|
|
|
+ movdqu HashKey_2(%arg2), \TMP5
|
|
|
|
|
|
# Multiply TMP5 * HashKey using karatsuba
|
|
|
|
|
@@ -1282,7 +1282,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|
|
AESENC \TMP3, \XMM2
|
|
|
AESENC \TMP3, \XMM3
|
|
|
AESENC \TMP3, \XMM4
|
|
|
- movdqa HashKey_2_k(%arg2), \TMP5
|
|
|
+ movdqu HashKey_2_k(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
|
|
movaps 0x80(%arg1), \TMP3
|
|
|
AESENC \TMP3, \XMM1 # Round 8
|
|
@@ -1300,7 +1300,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
|
|
movdqa \XMM8, \TMP1
|
|
|
pshufd $78, \XMM8, \TMP2
|
|
|
pxor \XMM8, \TMP2
|
|
|
- movdqa HashKey(%arg2), \TMP5
|
|
|
+ movdqu HashKey(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
|
|
movaps 0x90(%arg1), \TMP3
|
|
|
AESENC \TMP3, \XMM1 # Round 9
|
|
@@ -1329,7 +1329,7 @@ aes_loop_par_dec_done\@:
|
|
|
AESENCLAST \TMP3, \XMM2
|
|
|
AESENCLAST \TMP3, \XMM3
|
|
|
AESENCLAST \TMP3, \XMM4
|
|
|
- movdqa HashKey_k(%arg2), \TMP5
|
|
|
+ movdqu HashKey_k(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
|
|
movdqu (%arg4,%r11,1), \TMP3
|
|
|
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
|
|
@@ -1405,10 +1405,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
|
|
|
movdqa \XMM1, \TMP6
|
|
|
pshufd $78, \XMM1, \TMP2
|
|
|
pxor \XMM1, \TMP2
|
|
|
- movdqa HashKey_4(%arg2), \TMP5
|
|
|
+ movdqu HashKey_4(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1
|
|
|
PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0
|
|
|
- movdqa HashKey_4_k(%arg2), \TMP4
|
|
|
+ movdqu HashKey_4_k(%arg2), \TMP4
|
|
|
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
|
|
movdqa \XMM1, \XMMDst
|
|
|
movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1
|
|
@@ -1418,10 +1418,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
|
|
|
movdqa \XMM2, \TMP1
|
|
|
pshufd $78, \XMM2, \TMP2
|
|
|
pxor \XMM2, \TMP2
|
|
|
- movdqa HashKey_3(%arg2), \TMP5
|
|
|
+ movdqu HashKey_3(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
|
|
PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0
|
|
|
- movdqa HashKey_3_k(%arg2), \TMP4
|
|
|
+ movdqu HashKey_3_k(%arg2), \TMP4
|
|
|
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
|
|
pxor \TMP1, \TMP6
|
|
|
pxor \XMM2, \XMMDst
|
|
@@ -1433,10 +1433,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
|
|
|
movdqa \XMM3, \TMP1
|
|
|
pshufd $78, \XMM3, \TMP2
|
|
|
pxor \XMM3, \TMP2
|
|
|
- movdqa HashKey_2(%arg2), \TMP5
|
|
|
+ movdqu HashKey_2(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
|
|
PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0
|
|
|
- movdqa HashKey_2_k(%arg2), \TMP4
|
|
|
+ movdqu HashKey_2_k(%arg2), \TMP4
|
|
|
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
|
|
pxor \TMP1, \TMP6
|
|
|
pxor \XMM3, \XMMDst
|
|
@@ -1446,10 +1446,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
|
|
|
movdqa \XMM4, \TMP1
|
|
|
pshufd $78, \XMM4, \TMP2
|
|
|
pxor \XMM4, \TMP2
|
|
|
- movdqa HashKey(%arg2), \TMP5
|
|
|
+ movdqu HashKey(%arg2), \TMP5
|
|
|
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
|
|
|
PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0
|
|
|
- movdqa HashKey_k(%arg2), \TMP4
|
|
|
+ movdqu HashKey_k(%arg2), \TMP4
|
|
|
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
|
|
|
pxor \TMP1, \TMP6
|
|
|
pxor \XMM4, \XMMDst
|