|
@@ -0,0 +1,805 @@
|
|
|
+/*
|
|
|
+ * des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher
|
|
|
+ *
|
|
|
+ * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
|
|
+ *
|
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
|
+ * (at your option) any later version.
|
|
|
+ *
|
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
+ * GNU General Public License for more details.
|
|
|
+ */
|
|
|
+
|
|
|
+#include <linux/linkage.h>
|
|
|
+
|
|
|
+.file "des3_ede-asm_64.S"
|
|
|
+.text
|
|
|
+
|
|
|
+#define s1 .L_s1
|
|
|
+#define s2 ((s1) + (64*8))
|
|
|
+#define s3 ((s2) + (64*8))
|
|
|
+#define s4 ((s3) + (64*8))
|
|
|
+#define s5 ((s4) + (64*8))
|
|
|
+#define s6 ((s5) + (64*8))
|
|
|
+#define s7 ((s6) + (64*8))
|
|
|
+#define s8 ((s7) + (64*8))
|
|
|
+
|
|
|
+/* register macros */
|
|
|
+#define CTX %rdi
|
|
|
+
|
|
|
+#define RL0 %r8
|
|
|
+#define RL1 %r9
|
|
|
+#define RL2 %r10
|
|
|
+
|
|
|
+#define RL0d %r8d
|
|
|
+#define RL1d %r9d
|
|
|
+#define RL2d %r10d
|
|
|
+
|
|
|
+#define RR0 %r11
|
|
|
+#define RR1 %r12
|
|
|
+#define RR2 %r13
|
|
|
+
|
|
|
+#define RR0d %r11d
|
|
|
+#define RR1d %r12d
|
|
|
+#define RR2d %r13d
|
|
|
+
|
|
|
+#define RW0 %rax
|
|
|
+#define RW1 %rbx
|
|
|
+#define RW2 %rcx
|
|
|
+
|
|
|
+#define RW0d %eax
|
|
|
+#define RW1d %ebx
|
|
|
+#define RW2d %ecx
|
|
|
+
|
|
|
+#define RW0bl %al
|
|
|
+#define RW1bl %bl
|
|
|
+#define RW2bl %cl
|
|
|
+
|
|
|
+#define RW0bh %ah
|
|
|
+#define RW1bh %bh
|
|
|
+#define RW2bh %ch
|
|
|
+
|
|
|
+#define RT0 %r15
|
|
|
+#define RT1 %rbp
|
|
|
+#define RT2 %r14
|
|
|
+#define RT3 %rdx
|
|
|
+
|
|
|
+#define RT0d %r15d
|
|
|
+#define RT1d %ebp
|
|
|
+#define RT2d %r14d
|
|
|
+#define RT3d %edx
|
|
|
+
|
|
|
+/***********************************************************************
|
|
|
+ * 1-way 3DES
|
|
|
+ ***********************************************************************/
|
|
|
+#define do_permutation(a, b, offset, mask) \
|
|
|
+ movl a, RT0d; \
|
|
|
+ shrl $(offset), RT0d; \
|
|
|
+ xorl b, RT0d; \
|
|
|
+ andl $(mask), RT0d; \
|
|
|
+ xorl RT0d, b; \
|
|
|
+ shll $(offset), RT0d; \
|
|
|
+ xorl RT0d, a;
|
|
|
+
|
|
|
+#define expand_to_64bits(val, mask) \
|
|
|
+ movl val##d, RT0d; \
|
|
|
+ rorl $4, RT0d; \
|
|
|
+ shlq $32, RT0; \
|
|
|
+ orq RT0, val; \
|
|
|
+ andq mask, val;
|
|
|
+
|
|
|
+#define compress_to_64bits(val) \
|
|
|
+ movq val, RT0; \
|
|
|
+ shrq $32, RT0; \
|
|
|
+ roll $4, RT0d; \
|
|
|
+ orl RT0d, val##d;
|
|
|
+
|
|
|
+#define initial_permutation(left, right) \
|
|
|
+ do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \
|
|
|
+ do_permutation(left##d, right##d, 16, 0x0000ffff); \
|
|
|
+ do_permutation(right##d, left##d, 2, 0x33333333); \
|
|
|
+ do_permutation(right##d, left##d, 8, 0x00ff00ff); \
|
|
|
+ movabs $0x3f3f3f3f3f3f3f3f, RT3; \
|
|
|
+ movl left##d, RW0d; \
|
|
|
+ roll $1, right##d; \
|
|
|
+ xorl right##d, RW0d; \
|
|
|
+ andl $0xaaaaaaaa, RW0d; \
|
|
|
+ xorl RW0d, left##d; \
|
|
|
+ xorl RW0d, right##d; \
|
|
|
+ roll $1, left##d; \
|
|
|
+ expand_to_64bits(right, RT3); \
|
|
|
+ expand_to_64bits(left, RT3);
|
|
|
+
|
|
|
+#define final_permutation(left, right) \
|
|
|
+ compress_to_64bits(right); \
|
|
|
+ compress_to_64bits(left); \
|
|
|
+ movl right##d, RW0d; \
|
|
|
+ rorl $1, left##d; \
|
|
|
+ xorl left##d, RW0d; \
|
|
|
+ andl $0xaaaaaaaa, RW0d; \
|
|
|
+ xorl RW0d, right##d; \
|
|
|
+ xorl RW0d, left##d; \
|
|
|
+ rorl $1, right##d; \
|
|
|
+ do_permutation(right##d, left##d, 8, 0x00ff00ff); \
|
|
|
+ do_permutation(right##d, left##d, 2, 0x33333333); \
|
|
|
+ do_permutation(left##d, right##d, 16, 0x0000ffff); \
|
|
|
+ do_permutation(left##d, right##d, 4, 0x0f0f0f0f);
|
|
|
+
|
|
|
+#define round1(n, from, to, load_next_key) \
|
|
|
+ xorq from, RW0; \
|
|
|
+ \
|
|
|
+ movzbl RW0bl, RT0d; \
|
|
|
+ movzbl RW0bh, RT1d; \
|
|
|
+ shrq $16, RW0; \
|
|
|
+ movzbl RW0bl, RT2d; \
|
|
|
+ movzbl RW0bh, RT3d; \
|
|
|
+ shrq $16, RW0; \
|
|
|
+ movq s8(, RT0, 8), RT0; \
|
|
|
+ xorq s6(, RT1, 8), to; \
|
|
|
+ movzbl RW0bl, RL1d; \
|
|
|
+ movzbl RW0bh, RT1d; \
|
|
|
+ shrl $16, RW0d; \
|
|
|
+ xorq s4(, RT2, 8), RT0; \
|
|
|
+ xorq s2(, RT3, 8), to; \
|
|
|
+ movzbl RW0bl, RT2d; \
|
|
|
+ movzbl RW0bh, RT3d; \
|
|
|
+ xorq s7(, RL1, 8), RT0; \
|
|
|
+ xorq s5(, RT1, 8), to; \
|
|
|
+ xorq s3(, RT2, 8), RT0; \
|
|
|
+ load_next_key(n, RW0); \
|
|
|
+ xorq RT0, to; \
|
|
|
+ xorq s1(, RT3, 8), to; \
|
|
|
+
|
|
|
+#define load_next_key(n, RWx) \
|
|
|
+ movq (((n) + 1) * 8)(CTX), RWx;
|
|
|
+
|
|
|
+#define dummy2(a, b) /*_*/
|
|
|
+
|
|
|
+#define read_block(io, left, right) \
|
|
|
+ movl (io), left##d; \
|
|
|
+ movl 4(io), right##d; \
|
|
|
+ bswapl left##d; \
|
|
|
+ bswapl right##d;
|
|
|
+
|
|
|
+#define write_block(io, left, right) \
|
|
|
+ bswapl left##d; \
|
|
|
+ bswapl right##d; \
|
|
|
+ movl left##d, (io); \
|
|
|
+ movl right##d, 4(io);
|
|
|
+
|
|
|
+ENTRY(des3_ede_x86_64_crypt_blk)
|
|
|
+ /* input:
|
|
|
+ * %rdi: round keys, CTX
|
|
|
+ * %rsi: dst
|
|
|
+ * %rdx: src
|
|
|
+ */
|
|
|
+ pushq %rbp;
|
|
|
+ pushq %rbx;
|
|
|
+ pushq %r12;
|
|
|
+ pushq %r13;
|
|
|
+ pushq %r14;
|
|
|
+ pushq %r15;
|
|
|
+
|
|
|
+ read_block(%rdx, RL0, RR0);
|
|
|
+ initial_permutation(RL0, RR0);
|
|
|
+
|
|
|
+ movq (CTX), RW0;
|
|
|
+
|
|
|
+ round1(0, RR0, RL0, load_next_key);
|
|
|
+ round1(1, RL0, RR0, load_next_key);
|
|
|
+ round1(2, RR0, RL0, load_next_key);
|
|
|
+ round1(3, RL0, RR0, load_next_key);
|
|
|
+ round1(4, RR0, RL0, load_next_key);
|
|
|
+ round1(5, RL0, RR0, load_next_key);
|
|
|
+ round1(6, RR0, RL0, load_next_key);
|
|
|
+ round1(7, RL0, RR0, load_next_key);
|
|
|
+ round1(8, RR0, RL0, load_next_key);
|
|
|
+ round1(9, RL0, RR0, load_next_key);
|
|
|
+ round1(10, RR0, RL0, load_next_key);
|
|
|
+ round1(11, RL0, RR0, load_next_key);
|
|
|
+ round1(12, RR0, RL0, load_next_key);
|
|
|
+ round1(13, RL0, RR0, load_next_key);
|
|
|
+ round1(14, RR0, RL0, load_next_key);
|
|
|
+ round1(15, RL0, RR0, load_next_key);
|
|
|
+
|
|
|
+ round1(16+0, RL0, RR0, load_next_key);
|
|
|
+ round1(16+1, RR0, RL0, load_next_key);
|
|
|
+ round1(16+2, RL0, RR0, load_next_key);
|
|
|
+ round1(16+3, RR0, RL0, load_next_key);
|
|
|
+ round1(16+4, RL0, RR0, load_next_key);
|
|
|
+ round1(16+5, RR0, RL0, load_next_key);
|
|
|
+ round1(16+6, RL0, RR0, load_next_key);
|
|
|
+ round1(16+7, RR0, RL0, load_next_key);
|
|
|
+ round1(16+8, RL0, RR0, load_next_key);
|
|
|
+ round1(16+9, RR0, RL0, load_next_key);
|
|
|
+ round1(16+10, RL0, RR0, load_next_key);
|
|
|
+ round1(16+11, RR0, RL0, load_next_key);
|
|
|
+ round1(16+12, RL0, RR0, load_next_key);
|
|
|
+ round1(16+13, RR0, RL0, load_next_key);
|
|
|
+ round1(16+14, RL0, RR0, load_next_key);
|
|
|
+ round1(16+15, RR0, RL0, load_next_key);
|
|
|
+
|
|
|
+ round1(32+0, RR0, RL0, load_next_key);
|
|
|
+ round1(32+1, RL0, RR0, load_next_key);
|
|
|
+ round1(32+2, RR0, RL0, load_next_key);
|
|
|
+ round1(32+3, RL0, RR0, load_next_key);
|
|
|
+ round1(32+4, RR0, RL0, load_next_key);
|
|
|
+ round1(32+5, RL0, RR0, load_next_key);
|
|
|
+ round1(32+6, RR0, RL0, load_next_key);
|
|
|
+ round1(32+7, RL0, RR0, load_next_key);
|
|
|
+ round1(32+8, RR0, RL0, load_next_key);
|
|
|
+ round1(32+9, RL0, RR0, load_next_key);
|
|
|
+ round1(32+10, RR0, RL0, load_next_key);
|
|
|
+ round1(32+11, RL0, RR0, load_next_key);
|
|
|
+ round1(32+12, RR0, RL0, load_next_key);
|
|
|
+ round1(32+13, RL0, RR0, load_next_key);
|
|
|
+ round1(32+14, RR0, RL0, load_next_key);
|
|
|
+ round1(32+15, RL0, RR0, dummy2);
|
|
|
+
|
|
|
+ final_permutation(RR0, RL0);
|
|
|
+ write_block(%rsi, RR0, RL0);
|
|
|
+
|
|
|
+ popq %r15;
|
|
|
+ popq %r14;
|
|
|
+ popq %r13;
|
|
|
+ popq %r12;
|
|
|
+ popq %rbx;
|
|
|
+ popq %rbp;
|
|
|
+
|
|
|
+ ret;
|
|
|
+ENDPROC(des3_ede_x86_64_crypt_blk)
|
|
|
+
|
|
|
+/***********************************************************************
|
|
|
+ * 3-way 3DES
|
|
|
+ ***********************************************************************/
|
|
|
+#define expand_to_64bits(val, mask) \
|
|
|
+ movl val##d, RT0d; \
|
|
|
+ rorl $4, RT0d; \
|
|
|
+ shlq $32, RT0; \
|
|
|
+ orq RT0, val; \
|
|
|
+ andq mask, val;
|
|
|
+
|
|
|
+#define compress_to_64bits(val) \
|
|
|
+ movq val, RT0; \
|
|
|
+ shrq $32, RT0; \
|
|
|
+ roll $4, RT0d; \
|
|
|
+ orl RT0d, val##d;
|
|
|
+
|
|
|
+#define initial_permutation3(left, right) \
|
|
|
+ do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
|
|
|
+ do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
|
|
|
+ do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
|
|
|
+ do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
|
|
|
+ do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \
|
|
|
+ do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
|
|
|
+ \
|
|
|
+ do_permutation(right##0d, left##0d, 2, 0x33333333); \
|
|
|
+ do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
|
|
|
+ do_permutation(right##1d, left##1d, 2, 0x33333333); \
|
|
|
+ do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
|
|
|
+ do_permutation(right##2d, left##2d, 2, 0x33333333); \
|
|
|
+ do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
|
|
|
+ \
|
|
|
+ movabs $0x3f3f3f3f3f3f3f3f, RT3; \
|
|
|
+ \
|
|
|
+ movl left##0d, RW0d; \
|
|
|
+ roll $1, right##0d; \
|
|
|
+ xorl right##0d, RW0d; \
|
|
|
+ andl $0xaaaaaaaa, RW0d; \
|
|
|
+ xorl RW0d, left##0d; \
|
|
|
+ xorl RW0d, right##0d; \
|
|
|
+ roll $1, left##0d; \
|
|
|
+ expand_to_64bits(right##0, RT3); \
|
|
|
+ expand_to_64bits(left##0, RT3); \
|
|
|
+ movl left##1d, RW1d; \
|
|
|
+ roll $1, right##1d; \
|
|
|
+ xorl right##1d, RW1d; \
|
|
|
+ andl $0xaaaaaaaa, RW1d; \
|
|
|
+ xorl RW1d, left##1d; \
|
|
|
+ xorl RW1d, right##1d; \
|
|
|
+ roll $1, left##1d; \
|
|
|
+ expand_to_64bits(right##1, RT3); \
|
|
|
+ expand_to_64bits(left##1, RT3); \
|
|
|
+ movl left##2d, RW2d; \
|
|
|
+ roll $1, right##2d; \
|
|
|
+ xorl right##2d, RW2d; \
|
|
|
+ andl $0xaaaaaaaa, RW2d; \
|
|
|
+ xorl RW2d, left##2d; \
|
|
|
+ xorl RW2d, right##2d; \
|
|
|
+ roll $1, left##2d; \
|
|
|
+ expand_to_64bits(right##2, RT3); \
|
|
|
+ expand_to_64bits(left##2, RT3);
|
|
|
+
|
|
|
+#define final_permutation3(left, right) \
|
|
|
+ compress_to_64bits(right##0); \
|
|
|
+ compress_to_64bits(left##0); \
|
|
|
+ movl right##0d, RW0d; \
|
|
|
+ rorl $1, left##0d; \
|
|
|
+ xorl left##0d, RW0d; \
|
|
|
+ andl $0xaaaaaaaa, RW0d; \
|
|
|
+ xorl RW0d, right##0d; \
|
|
|
+ xorl RW0d, left##0d; \
|
|
|
+ rorl $1, right##0d; \
|
|
|
+ compress_to_64bits(right##1); \
|
|
|
+ compress_to_64bits(left##1); \
|
|
|
+ movl right##1d, RW1d; \
|
|
|
+ rorl $1, left##1d; \
|
|
|
+ xorl left##1d, RW1d; \
|
|
|
+ andl $0xaaaaaaaa, RW1d; \
|
|
|
+ xorl RW1d, right##1d; \
|
|
|
+ xorl RW1d, left##1d; \
|
|
|
+ rorl $1, right##1d; \
|
|
|
+ compress_to_64bits(right##2); \
|
|
|
+ compress_to_64bits(left##2); \
|
|
|
+ movl right##2d, RW2d; \
|
|
|
+ rorl $1, left##2d; \
|
|
|
+ xorl left##2d, RW2d; \
|
|
|
+ andl $0xaaaaaaaa, RW2d; \
|
|
|
+ xorl RW2d, right##2d; \
|
|
|
+ xorl RW2d, left##2d; \
|
|
|
+ rorl $1, right##2d; \
|
|
|
+ \
|
|
|
+ do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
|
|
|
+ do_permutation(right##0d, left##0d, 2, 0x33333333); \
|
|
|
+ do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
|
|
|
+ do_permutation(right##1d, left##1d, 2, 0x33333333); \
|
|
|
+ do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
|
|
|
+ do_permutation(right##2d, left##2d, 2, 0x33333333); \
|
|
|
+ \
|
|
|
+ do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
|
|
|
+ do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
|
|
|
+ do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
|
|
|
+ do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
|
|
|
+ do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
|
|
|
+ do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f);
|
|
|
+
|
|
|
+#define round3(n, from, to, load_next_key, do_movq) \
|
|
|
+ xorq from##0, RW0; \
|
|
|
+ movzbl RW0bl, RT3d; \
|
|
|
+ movzbl RW0bh, RT1d; \
|
|
|
+ shrq $16, RW0; \
|
|
|
+ xorq s8(, RT3, 8), to##0; \
|
|
|
+ xorq s6(, RT1, 8), to##0; \
|
|
|
+ movzbl RW0bl, RT3d; \
|
|
|
+ movzbl RW0bh, RT1d; \
|
|
|
+ shrq $16, RW0; \
|
|
|
+ xorq s4(, RT3, 8), to##0; \
|
|
|
+ xorq s2(, RT1, 8), to##0; \
|
|
|
+ movzbl RW0bl, RT3d; \
|
|
|
+ movzbl RW0bh, RT1d; \
|
|
|
+ shrl $16, RW0d; \
|
|
|
+ xorq s7(, RT3, 8), to##0; \
|
|
|
+ xorq s5(, RT1, 8), to##0; \
|
|
|
+ movzbl RW0bl, RT3d; \
|
|
|
+ movzbl RW0bh, RT1d; \
|
|
|
+ load_next_key(n, RW0); \
|
|
|
+ xorq s3(, RT3, 8), to##0; \
|
|
|
+ xorq s1(, RT1, 8), to##0; \
|
|
|
+ xorq from##1, RW1; \
|
|
|
+ movzbl RW1bl, RT3d; \
|
|
|
+ movzbl RW1bh, RT1d; \
|
|
|
+ shrq $16, RW1; \
|
|
|
+ xorq s8(, RT3, 8), to##1; \
|
|
|
+ xorq s6(, RT1, 8), to##1; \
|
|
|
+ movzbl RW1bl, RT3d; \
|
|
|
+ movzbl RW1bh, RT1d; \
|
|
|
+ shrq $16, RW1; \
|
|
|
+ xorq s4(, RT3, 8), to##1; \
|
|
|
+ xorq s2(, RT1, 8), to##1; \
|
|
|
+ movzbl RW1bl, RT3d; \
|
|
|
+ movzbl RW1bh, RT1d; \
|
|
|
+ shrl $16, RW1d; \
|
|
|
+ xorq s7(, RT3, 8), to##1; \
|
|
|
+ xorq s5(, RT1, 8), to##1; \
|
|
|
+ movzbl RW1bl, RT3d; \
|
|
|
+ movzbl RW1bh, RT1d; \
|
|
|
+ do_movq(RW0, RW1); \
|
|
|
+ xorq s3(, RT3, 8), to##1; \
|
|
|
+ xorq s1(, RT1, 8), to##1; \
|
|
|
+ xorq from##2, RW2; \
|
|
|
+ movzbl RW2bl, RT3d; \
|
|
|
+ movzbl RW2bh, RT1d; \
|
|
|
+ shrq $16, RW2; \
|
|
|
+ xorq s8(, RT3, 8), to##2; \
|
|
|
+ xorq s6(, RT1, 8), to##2; \
|
|
|
+ movzbl RW2bl, RT3d; \
|
|
|
+ movzbl RW2bh, RT1d; \
|
|
|
+ shrq $16, RW2; \
|
|
|
+ xorq s4(, RT3, 8), to##2; \
|
|
|
+ xorq s2(, RT1, 8), to##2; \
|
|
|
+ movzbl RW2bl, RT3d; \
|
|
|
+ movzbl RW2bh, RT1d; \
|
|
|
+ shrl $16, RW2d; \
|
|
|
+ xorq s7(, RT3, 8), to##2; \
|
|
|
+ xorq s5(, RT1, 8), to##2; \
|
|
|
+ movzbl RW2bl, RT3d; \
|
|
|
+ movzbl RW2bh, RT1d; \
|
|
|
+ do_movq(RW0, RW2); \
|
|
|
+ xorq s3(, RT3, 8), to##2; \
|
|
|
+ xorq s1(, RT1, 8), to##2;
|
|
|
+
|
|
|
+#define __movq(src, dst) \
|
|
|
+ movq src, dst;
|
|
|
+
|
|
|
+ENTRY(des3_ede_x86_64_crypt_blk_3way)
|
|
|
+ /* input:
|
|
|
+ * %rdi: ctx, round keys
|
|
|
+ * %rsi: dst (3 blocks)
|
|
|
+ * %rdx: src (3 blocks)
|
|
|
+ */
|
|
|
+
|
|
|
+ pushq %rbp;
|
|
|
+ pushq %rbx;
|
|
|
+ pushq %r12;
|
|
|
+ pushq %r13;
|
|
|
+ pushq %r14;
|
|
|
+ pushq %r15;
|
|
|
+
|
|
|
+ /* load input */
|
|
|
+ movl 0 * 4(%rdx), RL0d;
|
|
|
+ movl 1 * 4(%rdx), RR0d;
|
|
|
+ movl 2 * 4(%rdx), RL1d;
|
|
|
+ movl 3 * 4(%rdx), RR1d;
|
|
|
+ movl 4 * 4(%rdx), RL2d;
|
|
|
+ movl 5 * 4(%rdx), RR2d;
|
|
|
+
|
|
|
+ bswapl RL0d;
|
|
|
+ bswapl RR0d;
|
|
|
+ bswapl RL1d;
|
|
|
+ bswapl RR1d;
|
|
|
+ bswapl RL2d;
|
|
|
+ bswapl RR2d;
|
|
|
+
|
|
|
+ initial_permutation3(RL, RR);
|
|
|
+
|
|
|
+ movq 0(CTX), RW0;
|
|
|
+ movq RW0, RW1;
|
|
|
+ movq RW0, RW2;
|
|
|
+
|
|
|
+ round3(0, RR, RL, load_next_key, __movq);
|
|
|
+ round3(1, RL, RR, load_next_key, __movq);
|
|
|
+ round3(2, RR, RL, load_next_key, __movq);
|
|
|
+ round3(3, RL, RR, load_next_key, __movq);
|
|
|
+ round3(4, RR, RL, load_next_key, __movq);
|
|
|
+ round3(5, RL, RR, load_next_key, __movq);
|
|
|
+ round3(6, RR, RL, load_next_key, __movq);
|
|
|
+ round3(7, RL, RR, load_next_key, __movq);
|
|
|
+ round3(8, RR, RL, load_next_key, __movq);
|
|
|
+ round3(9, RL, RR, load_next_key, __movq);
|
|
|
+ round3(10, RR, RL, load_next_key, __movq);
|
|
|
+ round3(11, RL, RR, load_next_key, __movq);
|
|
|
+ round3(12, RR, RL, load_next_key, __movq);
|
|
|
+ round3(13, RL, RR, load_next_key, __movq);
|
|
|
+ round3(14, RR, RL, load_next_key, __movq);
|
|
|
+ round3(15, RL, RR, load_next_key, __movq);
|
|
|
+
|
|
|
+ round3(16+0, RL, RR, load_next_key, __movq);
|
|
|
+ round3(16+1, RR, RL, load_next_key, __movq);
|
|
|
+ round3(16+2, RL, RR, load_next_key, __movq);
|
|
|
+ round3(16+3, RR, RL, load_next_key, __movq);
|
|
|
+ round3(16+4, RL, RR, load_next_key, __movq);
|
|
|
+ round3(16+5, RR, RL, load_next_key, __movq);
|
|
|
+ round3(16+6, RL, RR, load_next_key, __movq);
|
|
|
+ round3(16+7, RR, RL, load_next_key, __movq);
|
|
|
+ round3(16+8, RL, RR, load_next_key, __movq);
|
|
|
+ round3(16+9, RR, RL, load_next_key, __movq);
|
|
|
+ round3(16+10, RL, RR, load_next_key, __movq);
|
|
|
+ round3(16+11, RR, RL, load_next_key, __movq);
|
|
|
+ round3(16+12, RL, RR, load_next_key, __movq);
|
|
|
+ round3(16+13, RR, RL, load_next_key, __movq);
|
|
|
+ round3(16+14, RL, RR, load_next_key, __movq);
|
|
|
+ round3(16+15, RR, RL, load_next_key, __movq);
|
|
|
+
|
|
|
+ round3(32+0, RR, RL, load_next_key, __movq);
|
|
|
+ round3(32+1, RL, RR, load_next_key, __movq);
|
|
|
+ round3(32+2, RR, RL, load_next_key, __movq);
|
|
|
+ round3(32+3, RL, RR, load_next_key, __movq);
|
|
|
+ round3(32+4, RR, RL, load_next_key, __movq);
|
|
|
+ round3(32+5, RL, RR, load_next_key, __movq);
|
|
|
+ round3(32+6, RR, RL, load_next_key, __movq);
|
|
|
+ round3(32+7, RL, RR, load_next_key, __movq);
|
|
|
+ round3(32+8, RR, RL, load_next_key, __movq);
|
|
|
+ round3(32+9, RL, RR, load_next_key, __movq);
|
|
|
+ round3(32+10, RR, RL, load_next_key, __movq);
|
|
|
+ round3(32+11, RL, RR, load_next_key, __movq);
|
|
|
+ round3(32+12, RR, RL, load_next_key, __movq);
|
|
|
+ round3(32+13, RL, RR, load_next_key, __movq);
|
|
|
+ round3(32+14, RR, RL, load_next_key, __movq);
|
|
|
+ round3(32+15, RL, RR, dummy2, dummy2);
|
|
|
+
|
|
|
+ final_permutation3(RR, RL);
|
|
|
+
|
|
|
+ bswapl RR0d;
|
|
|
+ bswapl RL0d;
|
|
|
+ bswapl RR1d;
|
|
|
+ bswapl RL1d;
|
|
|
+ bswapl RR2d;
|
|
|
+ bswapl RL2d;
|
|
|
+
|
|
|
+ movl RR0d, 0 * 4(%rsi);
|
|
|
+ movl RL0d, 1 * 4(%rsi);
|
|
|
+ movl RR1d, 2 * 4(%rsi);
|
|
|
+ movl RL1d, 3 * 4(%rsi);
|
|
|
+ movl RR2d, 4 * 4(%rsi);
|
|
|
+ movl RL2d, 5 * 4(%rsi);
|
|
|
+
|
|
|
+ popq %r15;
|
|
|
+ popq %r14;
|
|
|
+ popq %r13;
|
|
|
+ popq %r12;
|
|
|
+ popq %rbx;
|
|
|
+ popq %rbp;
|
|
|
+
|
|
|
+ ret;
|
|
|
+ENDPROC(des3_ede_x86_64_crypt_blk_3way)
|
|
|
+
|
|
|
+.data
|
|
|
+.align 16
|
|
|
+.L_s1:
|
|
|
+ .quad 0x0010100001010400, 0x0000000000000000
|
|
|
+ .quad 0x0000100000010000, 0x0010100001010404
|
|
|
+ .quad 0x0010100001010004, 0x0000100000010404
|
|
|
+ .quad 0x0000000000000004, 0x0000100000010000
|
|
|
+ .quad 0x0000000000000400, 0x0010100001010400
|
|
|
+ .quad 0x0010100001010404, 0x0000000000000400
|
|
|
+ .quad 0x0010000001000404, 0x0010100001010004
|
|
|
+ .quad 0x0010000001000000, 0x0000000000000004
|
|
|
+ .quad 0x0000000000000404, 0x0010000001000400
|
|
|
+ .quad 0x0010000001000400, 0x0000100000010400
|
|
|
+ .quad 0x0000100000010400, 0x0010100001010000
|
|
|
+ .quad 0x0010100001010000, 0x0010000001000404
|
|
|
+ .quad 0x0000100000010004, 0x0010000001000004
|
|
|
+ .quad 0x0010000001000004, 0x0000100000010004
|
|
|
+ .quad 0x0000000000000000, 0x0000000000000404
|
|
|
+ .quad 0x0000100000010404, 0x0010000001000000
|
|
|
+ .quad 0x0000100000010000, 0x0010100001010404
|
|
|
+ .quad 0x0000000000000004, 0x0010100001010000
|
|
|
+ .quad 0x0010100001010400, 0x0010000001000000
|
|
|
+ .quad 0x0010000001000000, 0x0000000000000400
|
|
|
+ .quad 0x0010100001010004, 0x0000100000010000
|
|
|
+ .quad 0x0000100000010400, 0x0010000001000004
|
|
|
+ .quad 0x0000000000000400, 0x0000000000000004
|
|
|
+ .quad 0x0010000001000404, 0x0000100000010404
|
|
|
+ .quad 0x0010100001010404, 0x0000100000010004
|
|
|
+ .quad 0x0010100001010000, 0x0010000001000404
|
|
|
+ .quad 0x0010000001000004, 0x0000000000000404
|
|
|
+ .quad 0x0000100000010404, 0x0010100001010400
|
|
|
+ .quad 0x0000000000000404, 0x0010000001000400
|
|
|
+ .quad 0x0010000001000400, 0x0000000000000000
|
|
|
+ .quad 0x0000100000010004, 0x0000100000010400
|
|
|
+ .quad 0x0000000000000000, 0x0010100001010004
|
|
|
+.L_s2:
|
|
|
+ .quad 0x0801080200100020, 0x0800080000000000
|
|
|
+ .quad 0x0000080000000000, 0x0001080200100020
|
|
|
+ .quad 0x0001000000100000, 0x0000000200000020
|
|
|
+ .quad 0x0801000200100020, 0x0800080200000020
|
|
|
+ .quad 0x0800000200000020, 0x0801080200100020
|
|
|
+ .quad 0x0801080000100000, 0x0800000000000000
|
|
|
+ .quad 0x0800080000000000, 0x0001000000100000
|
|
|
+ .quad 0x0000000200000020, 0x0801000200100020
|
|
|
+ .quad 0x0001080000100000, 0x0001000200100020
|
|
|
+ .quad 0x0800080200000020, 0x0000000000000000
|
|
|
+ .quad 0x0800000000000000, 0x0000080000000000
|
|
|
+ .quad 0x0001080200100020, 0x0801000000100000
|
|
|
+ .quad 0x0001000200100020, 0x0800000200000020
|
|
|
+ .quad 0x0000000000000000, 0x0001080000100000
|
|
|
+ .quad 0x0000080200000020, 0x0801080000100000
|
|
|
+ .quad 0x0801000000100000, 0x0000080200000020
|
|
|
+ .quad 0x0000000000000000, 0x0001080200100020
|
|
|
+ .quad 0x0801000200100020, 0x0001000000100000
|
|
|
+ .quad 0x0800080200000020, 0x0801000000100000
|
|
|
+ .quad 0x0801080000100000, 0x0000080000000000
|
|
|
+ .quad 0x0801000000100000, 0x0800080000000000
|
|
|
+ .quad 0x0000000200000020, 0x0801080200100020
|
|
|
+ .quad 0x0001080200100020, 0x0000000200000020
|
|
|
+ .quad 0x0000080000000000, 0x0800000000000000
|
|
|
+ .quad 0x0000080200000020, 0x0801080000100000
|
|
|
+ .quad 0x0001000000100000, 0x0800000200000020
|
|
|
+ .quad 0x0001000200100020, 0x0800080200000020
|
|
|
+ .quad 0x0800000200000020, 0x0001000200100020
|
|
|
+ .quad 0x0001080000100000, 0x0000000000000000
|
|
|
+ .quad 0x0800080000000000, 0x0000080200000020
|
|
|
+ .quad 0x0800000000000000, 0x0801000200100020
|
|
|
+ .quad 0x0801080200100020, 0x0001080000100000
|
|
|
+.L_s3:
|
|
|
+ .quad 0x0000002000000208, 0x0000202008020200
|
|
|
+ .quad 0x0000000000000000, 0x0000200008020008
|
|
|
+ .quad 0x0000002008000200, 0x0000000000000000
|
|
|
+ .quad 0x0000202000020208, 0x0000002008000200
|
|
|
+ .quad 0x0000200000020008, 0x0000000008000008
|
|
|
+ .quad 0x0000000008000008, 0x0000200000020000
|
|
|
+ .quad 0x0000202008020208, 0x0000200000020008
|
|
|
+ .quad 0x0000200008020000, 0x0000002000000208
|
|
|
+ .quad 0x0000000008000000, 0x0000000000000008
|
|
|
+ .quad 0x0000202008020200, 0x0000002000000200
|
|
|
+ .quad 0x0000202000020200, 0x0000200008020000
|
|
|
+ .quad 0x0000200008020008, 0x0000202000020208
|
|
|
+ .quad 0x0000002008000208, 0x0000202000020200
|
|
|
+ .quad 0x0000200000020000, 0x0000002008000208
|
|
|
+ .quad 0x0000000000000008, 0x0000202008020208
|
|
|
+ .quad 0x0000002000000200, 0x0000000008000000
|
|
|
+ .quad 0x0000202008020200, 0x0000000008000000
|
|
|
+ .quad 0x0000200000020008, 0x0000002000000208
|
|
|
+ .quad 0x0000200000020000, 0x0000202008020200
|
|
|
+ .quad 0x0000002008000200, 0x0000000000000000
|
|
|
+ .quad 0x0000002000000200, 0x0000200000020008
|
|
|
+ .quad 0x0000202008020208, 0x0000002008000200
|
|
|
+ .quad 0x0000000008000008, 0x0000002000000200
|
|
|
+ .quad 0x0000000000000000, 0x0000200008020008
|
|
|
+ .quad 0x0000002008000208, 0x0000200000020000
|
|
|
+ .quad 0x0000000008000000, 0x0000202008020208
|
|
|
+ .quad 0x0000000000000008, 0x0000202000020208
|
|
|
+ .quad 0x0000202000020200, 0x0000000008000008
|
|
|
+ .quad 0x0000200008020000, 0x0000002008000208
|
|
|
+ .quad 0x0000002000000208, 0x0000200008020000
|
|
|
+ .quad 0x0000202000020208, 0x0000000000000008
|
|
|
+ .quad 0x0000200008020008, 0x0000202000020200
|
|
|
+.L_s4:
|
|
|
+ .quad 0x1008020000002001, 0x1000020800002001
|
|
|
+ .quad 0x1000020800002001, 0x0000000800000000
|
|
|
+ .quad 0x0008020800002000, 0x1008000800000001
|
|
|
+ .quad 0x1008000000000001, 0x1000020000002001
|
|
|
+ .quad 0x0000000000000000, 0x0008020000002000
|
|
|
+ .quad 0x0008020000002000, 0x1008020800002001
|
|
|
+ .quad 0x1000000800000001, 0x0000000000000000
|
|
|
+ .quad 0x0008000800000000, 0x1008000000000001
|
|
|
+ .quad 0x1000000000000001, 0x0000020000002000
|
|
|
+ .quad 0x0008000000000000, 0x1008020000002001
|
|
|
+ .quad 0x0000000800000000, 0x0008000000000000
|
|
|
+ .quad 0x1000020000002001, 0x0000020800002000
|
|
|
+ .quad 0x1008000800000001, 0x1000000000000001
|
|
|
+ .quad 0x0000020800002000, 0x0008000800000000
|
|
|
+ .quad 0x0000020000002000, 0x0008020800002000
|
|
|
+ .quad 0x1008020800002001, 0x1000000800000001
|
|
|
+ .quad 0x0008000800000000, 0x1008000000000001
|
|
|
+ .quad 0x0008020000002000, 0x1008020800002001
|
|
|
+ .quad 0x1000000800000001, 0x0000000000000000
|
|
|
+ .quad 0x0000000000000000, 0x0008020000002000
|
|
|
+ .quad 0x0000020800002000, 0x0008000800000000
|
|
|
+ .quad 0x1008000800000001, 0x1000000000000001
|
|
|
+ .quad 0x1008020000002001, 0x1000020800002001
|
|
|
+ .quad 0x1000020800002001, 0x0000000800000000
|
|
|
+ .quad 0x1008020800002001, 0x1000000800000001
|
|
|
+ .quad 0x1000000000000001, 0x0000020000002000
|
|
|
+ .quad 0x1008000000000001, 0x1000020000002001
|
|
|
+ .quad 0x0008020800002000, 0x1008000800000001
|
|
|
+ .quad 0x1000020000002001, 0x0000020800002000
|
|
|
+ .quad 0x0008000000000000, 0x1008020000002001
|
|
|
+ .quad 0x0000000800000000, 0x0008000000000000
|
|
|
+ .quad 0x0000020000002000, 0x0008020800002000
|
|
|
+.L_s5:
|
|
|
+ .quad 0x0000001000000100, 0x0020001002080100
|
|
|
+ .quad 0x0020000002080000, 0x0420001002000100
|
|
|
+ .quad 0x0000000000080000, 0x0000001000000100
|
|
|
+ .quad 0x0400000000000000, 0x0020000002080000
|
|
|
+ .quad 0x0400001000080100, 0x0000000000080000
|
|
|
+ .quad 0x0020001002000100, 0x0400001000080100
|
|
|
+ .quad 0x0420001002000100, 0x0420000002080000
|
|
|
+ .quad 0x0000001000080100, 0x0400000000000000
|
|
|
+ .quad 0x0020000002000000, 0x0400000000080000
|
|
|
+ .quad 0x0400000000080000, 0x0000000000000000
|
|
|
+ .quad 0x0400001000000100, 0x0420001002080100
|
|
|
+ .quad 0x0420001002080100, 0x0020001002000100
|
|
|
+ .quad 0x0420000002080000, 0x0400001000000100
|
|
|
+ .quad 0x0000000000000000, 0x0420000002000000
|
|
|
+ .quad 0x0020001002080100, 0x0020000002000000
|
|
|
+ .quad 0x0420000002000000, 0x0000001000080100
|
|
|
+ .quad 0x0000000000080000, 0x0420001002000100
|
|
|
+ .quad 0x0000001000000100, 0x0020000002000000
|
|
|
+ .quad 0x0400000000000000, 0x0020000002080000
|
|
|
+ .quad 0x0420001002000100, 0x0400001000080100
|
|
|
+ .quad 0x0020001002000100, 0x0400000000000000
|
|
|
+ .quad 0x0420000002080000, 0x0020001002080100
|
|
|
+ .quad 0x0400001000080100, 0x0000001000000100
|
|
|
+ .quad 0x0020000002000000, 0x0420000002080000
|
|
|
+ .quad 0x0420001002080100, 0x0000001000080100
|
|
|
+ .quad 0x0420000002000000, 0x0420001002080100
|
|
|
+ .quad 0x0020000002080000, 0x0000000000000000
|
|
|
+ .quad 0x0400000000080000, 0x0420000002000000
|
|
|
+ .quad 0x0000001000080100, 0x0020001002000100
|
|
|
+ .quad 0x0400001000000100, 0x0000000000080000
|
|
|
+ .quad 0x0000000000000000, 0x0400000000080000
|
|
|
+ .quad 0x0020001002080100, 0x0400001000000100
|
|
|
+.L_s6:
|
|
|
+ .quad 0x0200000120000010, 0x0204000020000000
|
|
|
+ .quad 0x0000040000000000, 0x0204040120000010
|
|
|
+ .quad 0x0204000020000000, 0x0000000100000010
|
|
|
+ .quad 0x0204040120000010, 0x0004000000000000
|
|
|
+ .quad 0x0200040020000000, 0x0004040100000010
|
|
|
+ .quad 0x0004000000000000, 0x0200000120000010
|
|
|
+ .quad 0x0004000100000010, 0x0200040020000000
|
|
|
+ .quad 0x0200000020000000, 0x0000040100000010
|
|
|
+ .quad 0x0000000000000000, 0x0004000100000010
|
|
|
+ .quad 0x0200040120000010, 0x0000040000000000
|
|
|
+ .quad 0x0004040000000000, 0x0200040120000010
|
|
|
+ .quad 0x0000000100000010, 0x0204000120000010
|
|
|
+ .quad 0x0204000120000010, 0x0000000000000000
|
|
|
+ .quad 0x0004040100000010, 0x0204040020000000
|
|
|
+ .quad 0x0000040100000010, 0x0004040000000000
|
|
|
+ .quad 0x0204040020000000, 0x0200000020000000
|
|
|
+ .quad 0x0200040020000000, 0x0000000100000010
|
|
|
+ .quad 0x0204000120000010, 0x0004040000000000
|
|
|
+ .quad 0x0204040120000010, 0x0004000000000000
|
|
|
+ .quad 0x0000040100000010, 0x0200000120000010
|
|
|
+ .quad 0x0004000000000000, 0x0200040020000000
|
|
|
+ .quad 0x0200000020000000, 0x0000040100000010
|
|
|
+ .quad 0x0200000120000010, 0x0204040120000010
|
|
|
+ .quad 0x0004040000000000, 0x0204000020000000
|
|
|
+ .quad 0x0004040100000010, 0x0204040020000000
|
|
|
+ .quad 0x0000000000000000, 0x0204000120000010
|
|
|
+ .quad 0x0000000100000010, 0x0000040000000000
|
|
|
+ .quad 0x0204000020000000, 0x0004040100000010
|
|
|
+ .quad 0x0000040000000000, 0x0004000100000010
|
|
|
+ .quad 0x0200040120000010, 0x0000000000000000
|
|
|
+ .quad 0x0204040020000000, 0x0200000020000000
|
|
|
+ .quad 0x0004000100000010, 0x0200040120000010
|
|
|
+.L_s7:
|
|
|
+ .quad 0x0002000000200000, 0x2002000004200002
|
|
|
+ .quad 0x2000000004000802, 0x0000000000000000
|
|
|
+ .quad 0x0000000000000800, 0x2000000004000802
|
|
|
+ .quad 0x2002000000200802, 0x0002000004200800
|
|
|
+ .quad 0x2002000004200802, 0x0002000000200000
|
|
|
+ .quad 0x0000000000000000, 0x2000000004000002
|
|
|
+ .quad 0x2000000000000002, 0x0000000004000000
|
|
|
+ .quad 0x2002000004200002, 0x2000000000000802
|
|
|
+ .quad 0x0000000004000800, 0x2002000000200802
|
|
|
+ .quad 0x2002000000200002, 0x0000000004000800
|
|
|
+ .quad 0x2000000004000002, 0x0002000004200000
|
|
|
+ .quad 0x0002000004200800, 0x2002000000200002
|
|
|
+ .quad 0x0002000004200000, 0x0000000000000800
|
|
|
+ .quad 0x2000000000000802, 0x2002000004200802
|
|
|
+ .quad 0x0002000000200800, 0x2000000000000002
|
|
|
+ .quad 0x0000000004000000, 0x0002000000200800
|
|
|
+ .quad 0x0000000004000000, 0x0002000000200800
|
|
|
+ .quad 0x0002000000200000, 0x2000000004000802
|
|
|
+ .quad 0x2000000004000802, 0x2002000004200002
|
|
|
+ .quad 0x2002000004200002, 0x2000000000000002
|
|
|
+ .quad 0x2002000000200002, 0x0000000004000000
|
|
|
+ .quad 0x0000000004000800, 0x0002000000200000
|
|
|
+ .quad 0x0002000004200800, 0x2000000000000802
|
|
|
+ .quad 0x2002000000200802, 0x0002000004200800
|
|
|
+ .quad 0x2000000000000802, 0x2000000004000002
|
|
|
+ .quad 0x2002000004200802, 0x0002000004200000
|
|
|
+ .quad 0x0002000000200800, 0x0000000000000000
|
|
|
+ .quad 0x2000000000000002, 0x2002000004200802
|
|
|
+ .quad 0x0000000000000000, 0x2002000000200802
|
|
|
+ .quad 0x0002000004200000, 0x0000000000000800
|
|
|
+ .quad 0x2000000004000002, 0x0000000004000800
|
|
|
+ .quad 0x0000000000000800, 0x2002000000200002
|
|
|
+.L_s8:
|
|
|
+ .quad 0x0100010410001000, 0x0000010000001000
|
|
|
+ .quad 0x0000000000040000, 0x0100010410041000
|
|
|
+ .quad 0x0100000010000000, 0x0100010410001000
|
|
|
+ .quad 0x0000000400000000, 0x0100000010000000
|
|
|
+ .quad 0x0000000400040000, 0x0100000010040000
|
|
|
+ .quad 0x0100010410041000, 0x0000010000041000
|
|
|
+ .quad 0x0100010010041000, 0x0000010400041000
|
|
|
+ .quad 0x0000010000001000, 0x0000000400000000
|
|
|
+ .quad 0x0100000010040000, 0x0100000410000000
|
|
|
+ .quad 0x0100010010001000, 0x0000010400001000
|
|
|
+ .quad 0x0000010000041000, 0x0000000400040000
|
|
|
+ .quad 0x0100000410040000, 0x0100010010041000
|
|
|
+ .quad 0x0000010400001000, 0x0000000000000000
|
|
|
+ .quad 0x0000000000000000, 0x0100000410040000
|
|
|
+ .quad 0x0100000410000000, 0x0100010010001000
|
|
|
+ .quad 0x0000010400041000, 0x0000000000040000
|
|
|
+ .quad 0x0000010400041000, 0x0000000000040000
|
|
|
+ .quad 0x0100010010041000, 0x0000010000001000
|
|
|
+ .quad 0x0000000400000000, 0x0100000410040000
|
|
|
+ .quad 0x0000010000001000, 0x0000010400041000
|
|
|
+ .quad 0x0100010010001000, 0x0000000400000000
|
|
|
+ .quad 0x0100000410000000, 0x0100000010040000
|
|
|
+ .quad 0x0100000410040000, 0x0100000010000000
|
|
|
+ .quad 0x0000000000040000, 0x0100010410001000
|
|
|
+ .quad 0x0000000000000000, 0x0100010410041000
|
|
|
+ .quad 0x0000000400040000, 0x0100000410000000
|
|
|
+ .quad 0x0100000010040000, 0x0100010010001000
|
|
|
+ .quad 0x0100010410001000, 0x0000000000000000
|
|
|
+ .quad 0x0100010410041000, 0x0000010000041000
|
|
|
+ .quad 0x0000010000041000, 0x0000010400001000
|
|
|
+ .quad 0x0000010400001000, 0x0000000400040000
|
|
|
+ .quad 0x0100000010000000, 0x0100010010041000
|