|
@@ -47,7 +47,7 @@
|
|
|
/**********************************************************************
|
|
|
8-way AVX cast6
|
|
|
**********************************************************************/
|
|
|
-#define CTX %rdi
|
|
|
+#define CTX %r15
|
|
|
|
|
|
#define RA1 %xmm0
|
|
|
#define RB1 %xmm1
|
|
@@ -70,8 +70,8 @@
|
|
|
|
|
|
#define RTMP %xmm15
|
|
|
|
|
|
-#define RID1 %rbp
|
|
|
-#define RID1d %ebp
|
|
|
+#define RID1 %rdi
|
|
|
+#define RID1d %edi
|
|
|
#define RID2 %rsi
|
|
|
#define RID2d %esi
|
|
|
|
|
@@ -264,15 +264,17 @@
|
|
|
.align 8
|
|
|
__cast6_enc_blk8:
|
|
|
/* input:
|
|
|
- * %rdi: ctx, CTX
|
|
|
+ * %rdi: ctx
|
|
|
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
|
|
|
* output:
|
|
|
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
|
|
|
*/
|
|
|
|
|
|
- pushq %rbp;
|
|
|
+ pushq %r15;
|
|
|
pushq %rbx;
|
|
|
|
|
|
+ movq %rdi, CTX;
|
|
|
+
|
|
|
vmovdqa .Lbswap_mask, RKM;
|
|
|
vmovd .Lfirst_mask, R1ST;
|
|
|
vmovd .L32_mask, R32;
|
|
@@ -297,7 +299,7 @@ __cast6_enc_blk8:
|
|
|
QBAR(11);
|
|
|
|
|
|
popq %rbx;
|
|
|
- popq %rbp;
|
|
|
+ popq %r15;
|
|
|
|
|
|
vmovdqa .Lbswap_mask, RKM;
|
|
|
|
|
@@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8)
|
|
|
.align 8
|
|
|
__cast6_dec_blk8:
|
|
|
/* input:
|
|
|
- * %rdi: ctx, CTX
|
|
|
+ * %rdi: ctx
|
|
|
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
|
|
|
* output:
|
|
|
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
|
|
|
*/
|
|
|
|
|
|
- pushq %rbp;
|
|
|
+ pushq %r15;
|
|
|
pushq %rbx;
|
|
|
|
|
|
+ movq %rdi, CTX;
|
|
|
+
|
|
|
vmovdqa .Lbswap_mask, RKM;
|
|
|
vmovd .Lfirst_mask, R1ST;
|
|
|
vmovd .L32_mask, R32;
|
|
@@ -343,7 +347,7 @@ __cast6_dec_blk8:
|
|
|
QBAR(0);
|
|
|
|
|
|
popq %rbx;
|
|
|
- popq %rbp;
|
|
|
+ popq %r15;
|
|
|
|
|
|
vmovdqa .Lbswap_mask, RKM;
|
|
|
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
|
|
@@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8)
|
|
|
|
|
|
ENTRY(cast6_ecb_enc_8way)
|
|
|
/* input:
|
|
|
- * %rdi: ctx, CTX
|
|
|
+ * %rdi: ctx
|
|
|
* %rsi: dst
|
|
|
* %rdx: src
|
|
|
*/
|
|
|
FRAME_BEGIN
|
|
|
+ pushq %r15;
|
|
|
|
|
|
+ movq %rdi, CTX;
|
|
|
movq %rsi, %r11;
|
|
|
|
|
|
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
|
@@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way)
|
|
|
|
|
|
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
|
|
|
|
|
+ popq %r15;
|
|
|
FRAME_END
|
|
|
ret;
|
|
|
ENDPROC(cast6_ecb_enc_8way)
|
|
|
|
|
|
ENTRY(cast6_ecb_dec_8way)
|
|
|
/* input:
|
|
|
- * %rdi: ctx, CTX
|
|
|
+ * %rdi: ctx
|
|
|
* %rsi: dst
|
|
|
* %rdx: src
|
|
|
*/
|
|
|
FRAME_BEGIN
|
|
|
+ pushq %r15;
|
|
|
|
|
|
+ movq %rdi, CTX;
|
|
|
movq %rsi, %r11;
|
|
|
|
|
|
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
|
@@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way)
|
|
|
|
|
|
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
|
|
|
|
|
+ popq %r15;
|
|
|
FRAME_END
|
|
|
ret;
|
|
|
ENDPROC(cast6_ecb_dec_8way)
|
|
|
|
|
|
ENTRY(cast6_cbc_dec_8way)
|
|
|
/* input:
|
|
|
- * %rdi: ctx, CTX
|
|
|
+ * %rdi: ctx
|
|
|
* %rsi: dst
|
|
|
* %rdx: src
|
|
|
*/
|
|
|
FRAME_BEGIN
|
|
|
-
|
|
|
pushq %r12;
|
|
|
+ pushq %r15;
|
|
|
|
|
|
+ movq %rdi, CTX;
|
|
|
movq %rsi, %r11;
|
|
|
movq %rdx, %r12;
|
|
|
|
|
@@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way)
|
|
|
|
|
|
store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
|
|
|
|
|
+ popq %r15;
|
|
|
popq %r12;
|
|
|
-
|
|
|
FRAME_END
|
|
|
ret;
|
|
|
ENDPROC(cast6_cbc_dec_8way)
|
|
@@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way)
|
|
|
* %rcx: iv (little endian, 128bit)
|
|
|
*/
|
|
|
FRAME_BEGIN
|
|
|
-
|
|
|
pushq %r12;
|
|
|
+ pushq %r15
|
|
|
|
|
|
+ movq %rdi, CTX;
|
|
|
movq %rsi, %r11;
|
|
|
movq %rdx, %r12;
|
|
|
|
|
@@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way)
|
|
|
|
|
|
store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
|
|
|
|
|
+ popq %r15;
|
|
|
popq %r12;
|
|
|
-
|
|
|
FRAME_END
|
|
|
ret;
|
|
|
ENDPROC(cast6_ctr_8way)
|
|
@@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way)
|
|
|
* %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
|
|
|
*/
|
|
|
FRAME_BEGIN
|
|
|
+ pushq %r15;
|
|
|
|
|
|
+ movq %rdi, CTX
|
|
|
movq %rsi, %r11;
|
|
|
|
|
|
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
|
|
@@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way)
|
|
|
/* dst <= regs xor IVs(in dst) */
|
|
|
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
|
|
|
|
|
+ popq %r15;
|
|
|
FRAME_END
|
|
|
ret;
|
|
|
ENDPROC(cast6_xts_enc_8way)
|
|
@@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way)
|
|
|
* %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
|
|
|
*/
|
|
|
FRAME_BEGIN
|
|
|
+ pushq %r15;
|
|
|
|
|
|
+ movq %rdi, CTX
|
|
|
movq %rsi, %r11;
|
|
|
|
|
|
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
|
|
@@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way)
|
|
|
/* dst <= regs xor IVs(in dst) */
|
|
|
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
|
|
|
|
|
|
+ popq %r15;
|
|
|
FRAME_END
|
|
|
ret;
|
|
|
ENDPROC(cast6_xts_dec_8way)
|