|
@@ -0,0 +1,546 @@
|
|
|
+/*
|
|
|
+ * Implement AES CTR mode by8 optimization with AVX instructions. (x86_64)
|
|
|
+ *
|
|
|
+ * This is AES128/192/256 CTR mode optimization implementation. It requires
|
|
|
+ * the support of Intel(R) AESNI and AVX instructions.
|
|
|
+ *
|
|
|
+ * This work was inspired by the AES CTR mode optimization published
|
|
|
+ * in Intel Optimized IPSEC Cryptograhpic library.
|
|
|
+ * Additional information on it can be found at:
|
|
|
+ * http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972
|
|
|
+ *
|
|
|
+ * This file is provided under a dual BSD/GPLv2 license. When using or
|
|
|
+ * redistributing this file, you may do so under either license.
|
|
|
+ *
|
|
|
+ * GPL LICENSE SUMMARY
|
|
|
+ *
|
|
|
+ * Copyright(c) 2014 Intel Corporation.
|
|
|
+ *
|
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
|
+ * it under the terms of version 2 of the GNU General Public License as
|
|
|
+ * published by the Free Software Foundation.
|
|
|
+ *
|
|
|
+ * This program is distributed in the hope that it will be useful, but
|
|
|
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
+ * General Public License for more details.
|
|
|
+ *
|
|
|
+ * Contact Information:
|
|
|
+ * James Guilford <james.guilford@intel.com>
|
|
|
+ * Sean Gulley <sean.m.gulley@intel.com>
|
|
|
+ * Chandramouli Narayanan <mouli@linux.intel.com>
|
|
|
+ *
|
|
|
+ * BSD LICENSE
|
|
|
+ *
|
|
|
+ * Copyright(c) 2014 Intel Corporation.
|
|
|
+ *
|
|
|
+ * Redistribution and use in source and binary forms, with or without
|
|
|
+ * modification, are permitted provided that the following conditions
|
|
|
+ * are met:
|
|
|
+ *
|
|
|
+ * Redistributions of source code must retain the above copyright
|
|
|
+ * notice, this list of conditions and the following disclaimer.
|
|
|
+ * Redistributions in binary form must reproduce the above copyright
|
|
|
+ * notice, this list of conditions and the following disclaimer in
|
|
|
+ * the documentation and/or other materials provided with the
|
|
|
+ * distribution.
|
|
|
+ * Neither the name of Intel Corporation nor the names of its
|
|
|
+ * contributors may be used to endorse or promote products derived
|
|
|
+ * from this software without specific prior written permission.
|
|
|
+ *
|
|
|
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
+ *
|
|
|
+ */
|
|
|
+
|
|
|
+#include <linux/linkage.h>
|
|
|
+#include <asm/inst.h>
|
|
|
+
|
|
|
+#define CONCAT(a,b) a##b
|
|
|
+#define VMOVDQ vmovdqu
|
|
|
+
|
|
|
+#define xdata0 %xmm0
|
|
|
+#define xdata1 %xmm1
|
|
|
+#define xdata2 %xmm2
|
|
|
+#define xdata3 %xmm3
|
|
|
+#define xdata4 %xmm4
|
|
|
+#define xdata5 %xmm5
|
|
|
+#define xdata6 %xmm6
|
|
|
+#define xdata7 %xmm7
|
|
|
+#define xcounter %xmm8
|
|
|
+#define xbyteswap %xmm9
|
|
|
+#define xkey0 %xmm10
|
|
|
+#define xkey3 %xmm11
|
|
|
+#define xkey6 %xmm12
|
|
|
+#define xkey9 %xmm13
|
|
|
+#define xkey4 %xmm11
|
|
|
+#define xkey8 %xmm12
|
|
|
+#define xkey12 %xmm13
|
|
|
+#define xkeyA %xmm14
|
|
|
+#define xkeyB %xmm15
|
|
|
+
|
|
|
+#define p_in %rdi
|
|
|
+#define p_iv %rsi
|
|
|
+#define p_keys %rdx
|
|
|
+#define p_out %rcx
|
|
|
+#define num_bytes %r8
|
|
|
+
|
|
|
+#define tmp %r10
|
|
|
+#define DDQ(i) CONCAT(ddq_add_,i)
|
|
|
+#define XMM(i) CONCAT(%xmm, i)
|
|
|
+#define DDQ_DATA 0
|
|
|
+#define XDATA 1
|
|
|
+#define KEY_128 1
|
|
|
+#define KEY_192 2
|
|
|
+#define KEY_256 3
|
|
|
+
|
|
|
+.section .rodata
|
|
|
+.align 16
|
|
|
+
|
|
|
+byteswap_const:
|
|
|
+ .octa 0x000102030405060708090A0B0C0D0E0F
|
|
|
+ddq_add_1:
|
|
|
+ .octa 0x00000000000000000000000000000001
|
|
|
+ddq_add_2:
|
|
|
+ .octa 0x00000000000000000000000000000002
|
|
|
+ddq_add_3:
|
|
|
+ .octa 0x00000000000000000000000000000003
|
|
|
+ddq_add_4:
|
|
|
+ .octa 0x00000000000000000000000000000004
|
|
|
+ddq_add_5:
|
|
|
+ .octa 0x00000000000000000000000000000005
|
|
|
+ddq_add_6:
|
|
|
+ .octa 0x00000000000000000000000000000006
|
|
|
+ddq_add_7:
|
|
|
+ .octa 0x00000000000000000000000000000007
|
|
|
+ddq_add_8:
|
|
|
+ .octa 0x00000000000000000000000000000008
|
|
|
+
|
|
|
+.text
|
|
|
+
|
|
|
+/* generate a unique variable for ddq_add_x */
|
|
|
+
|
|
|
+.macro setddq n
|
|
|
+ var_ddq_add = DDQ(\n)
|
|
|
+.endm
|
|
|
+
|
|
|
+/* generate a unique variable for xmm register */
|
|
|
+.macro setxdata n
|
|
|
+ var_xdata = XMM(\n)
|
|
|
+.endm
|
|
|
+
|
|
|
+/* club the numeric 'id' to the symbol 'name' */
|
|
|
+
|
|
|
+.macro club name, id
|
|
|
+.altmacro
|
|
|
+ .if \name == DDQ_DATA
|
|
|
+ setddq %\id
|
|
|
+ .elseif \name == XDATA
|
|
|
+ setxdata %\id
|
|
|
+ .endif
|
|
|
+.noaltmacro
|
|
|
+.endm
|
|
|
+
|
|
|
+/*
|
|
|
+ * do_aes num_in_par load_keys key_len
|
|
|
+ * This increments p_in, but not p_out
|
|
|
+ */
|
|
|
+.macro do_aes b, k, key_len
|
|
|
+ .set by, \b
|
|
|
+ .set load_keys, \k
|
|
|
+ .set klen, \key_len
|
|
|
+
|
|
|
+ .if (load_keys)
|
|
|
+ vmovdqa 0*16(p_keys), xkey0
|
|
|
+ .endif
|
|
|
+
|
|
|
+ vpshufb xbyteswap, xcounter, xdata0
|
|
|
+
|
|
|
+ .set i, 1
|
|
|
+ .rept (by - 1)
|
|
|
+ club DDQ_DATA, i
|
|
|
+ club XDATA, i
|
|
|
+ vpaddd var_ddq_add(%rip), xcounter, var_xdata
|
|
|
+ vpshufb xbyteswap, var_xdata, var_xdata
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ vmovdqa 1*16(p_keys), xkeyA
|
|
|
+
|
|
|
+ vpxor xkey0, xdata0, xdata0
|
|
|
+ club DDQ_DATA, by
|
|
|
+ vpaddd var_ddq_add(%rip), xcounter, xcounter
|
|
|
+
|
|
|
+ .set i, 1
|
|
|
+ .rept (by - 1)
|
|
|
+ club XDATA, i
|
|
|
+ vpxor xkey0, var_xdata, var_xdata
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ vmovdqa 2*16(p_keys), xkeyB
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ vaesenc xkeyA, var_xdata, var_xdata /* key 1 */
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ .if (klen == KEY_128)
|
|
|
+ .if (load_keys)
|
|
|
+ vmovdqa 3*16(p_keys), xkeyA
|
|
|
+ .endif
|
|
|
+ .else
|
|
|
+ vmovdqa 3*16(p_keys), xkeyA
|
|
|
+ .endif
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ vaesenc xkeyB, var_xdata, var_xdata /* key 2 */
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ add $(16*by), p_in
|
|
|
+
|
|
|
+ .if (klen == KEY_128)
|
|
|
+ vmovdqa 4*16(p_keys), xkey4
|
|
|
+ .else
|
|
|
+ .if (load_keys)
|
|
|
+ vmovdqa 4*16(p_keys), xkey4
|
|
|
+ .endif
|
|
|
+ .endif
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ vaesenc xkeyA, var_xdata, var_xdata /* key 3 */
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ vmovdqa 5*16(p_keys), xkeyA
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ vaesenc xkey4, var_xdata, var_xdata /* key 4 */
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ .if (klen == KEY_128)
|
|
|
+ .if (load_keys)
|
|
|
+ vmovdqa 6*16(p_keys), xkeyB
|
|
|
+ .endif
|
|
|
+ .else
|
|
|
+ vmovdqa 6*16(p_keys), xkeyB
|
|
|
+ .endif
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ vaesenc xkeyA, var_xdata, var_xdata /* key 5 */
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ vmovdqa 7*16(p_keys), xkeyA
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ vaesenc xkeyB, var_xdata, var_xdata /* key 6 */
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ .if (klen == KEY_128)
|
|
|
+ vmovdqa 8*16(p_keys), xkey8
|
|
|
+ .else
|
|
|
+ .if (load_keys)
|
|
|
+ vmovdqa 8*16(p_keys), xkey8
|
|
|
+ .endif
|
|
|
+ .endif
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ vaesenc xkeyA, var_xdata, var_xdata /* key 7 */
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ .if (klen == KEY_128)
|
|
|
+ .if (load_keys)
|
|
|
+ vmovdqa 9*16(p_keys), xkeyA
|
|
|
+ .endif
|
|
|
+ .else
|
|
|
+ vmovdqa 9*16(p_keys), xkeyA
|
|
|
+ .endif
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ vaesenc xkey8, var_xdata, var_xdata /* key 8 */
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ vmovdqa 10*16(p_keys), xkeyB
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ vaesenc xkeyA, var_xdata, var_xdata /* key 9 */
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ .if (klen != KEY_128)
|
|
|
+ vmovdqa 11*16(p_keys), xkeyA
|
|
|
+ .endif
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ /* key 10 */
|
|
|
+ .if (klen == KEY_128)
|
|
|
+ vaesenclast xkeyB, var_xdata, var_xdata
|
|
|
+ .else
|
|
|
+ vaesenc xkeyB, var_xdata, var_xdata
|
|
|
+ .endif
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ .if (klen != KEY_128)
|
|
|
+ .if (load_keys)
|
|
|
+ vmovdqa 12*16(p_keys), xkey12
|
|
|
+ .endif
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ vaesenc xkeyA, var_xdata, var_xdata /* key 11 */
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ .if (klen == KEY_256)
|
|
|
+ vmovdqa 13*16(p_keys), xkeyA
|
|
|
+ .endif
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ .if (klen == KEY_256)
|
|
|
+ /* key 12 */
|
|
|
+ vaesenc xkey12, var_xdata, var_xdata
|
|
|
+ .else
|
|
|
+ vaesenclast xkey12, var_xdata, var_xdata
|
|
|
+ .endif
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ .if (klen == KEY_256)
|
|
|
+ vmovdqa 14*16(p_keys), xkeyB
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ /* key 13 */
|
|
|
+ vaesenc xkeyA, var_xdata, var_xdata
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ /* key 14 */
|
|
|
+ vaesenclast xkeyB, var_xdata, var_xdata
|
|
|
+ .set i, (i +1)
|
|
|
+ .endr
|
|
|
+ .endif
|
|
|
+ .endif
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept (by / 2)
|
|
|
+ .set j, (i+1)
|
|
|
+ VMOVDQ (i*16 - 16*by)(p_in), xkeyA
|
|
|
+ VMOVDQ (j*16 - 16*by)(p_in), xkeyB
|
|
|
+ club XDATA, i
|
|
|
+ vpxor xkeyA, var_xdata, var_xdata
|
|
|
+ club XDATA, j
|
|
|
+ vpxor xkeyB, var_xdata, var_xdata
|
|
|
+ .set i, (i+2)
|
|
|
+ .endr
|
|
|
+
|
|
|
+ .if (i < by)
|
|
|
+ VMOVDQ (i*16 - 16*by)(p_in), xkeyA
|
|
|
+ club XDATA, i
|
|
|
+ vpxor xkeyA, var_xdata, var_xdata
|
|
|
+ .endif
|
|
|
+
|
|
|
+ .set i, 0
|
|
|
+ .rept by
|
|
|
+ club XDATA, i
|
|
|
+ VMOVDQ var_xdata, i*16(p_out)
|
|
|
+ .set i, (i+1)
|
|
|
+ .endr
|
|
|
+.endm
|
|
|
+
|
|
|
+.macro do_aes_load val, key_len
|
|
|
+ do_aes \val, 1, \key_len
|
|
|
+.endm
|
|
|
+
|
|
|
+.macro do_aes_noload val, key_len
|
|
|
+ do_aes \val, 0, \key_len
|
|
|
+.endm
|
|
|
+
|
|
|
+/* main body of aes ctr load */
|
|
|
+
|
|
|
+.macro do_aes_ctrmain key_len
|
|
|
+
|
|
|
+ cmp $16, num_bytes
|
|
|
+ jb .Ldo_return2\key_len
|
|
|
+
|
|
|
+ vmovdqa byteswap_const(%rip), xbyteswap
|
|
|
+ vmovdqu (p_iv), xcounter
|
|
|
+ vpshufb xbyteswap, xcounter, xcounter
|
|
|
+
|
|
|
+ mov num_bytes, tmp
|
|
|
+ and $(7*16), tmp
|
|
|
+ jz .Lmult_of_8_blks\key_len
|
|
|
+
|
|
|
+ /* 1 <= tmp <= 7 */
|
|
|
+ cmp $(4*16), tmp
|
|
|
+ jg .Lgt4\key_len
|
|
|
+ je .Leq4\key_len
|
|
|
+
|
|
|
+.Llt4\key_len:
|
|
|
+ cmp $(2*16), tmp
|
|
|
+ jg .Leq3\key_len
|
|
|
+ je .Leq2\key_len
|
|
|
+
|
|
|
+.Leq1\key_len:
|
|
|
+ do_aes_load 1, \key_len
|
|
|
+ add $(1*16), p_out
|
|
|
+ and $(~7*16), num_bytes
|
|
|
+ jz .Ldo_return2\key_len
|
|
|
+ jmp .Lmain_loop2\key_len
|
|
|
+
|
|
|
+.Leq2\key_len:
|
|
|
+ do_aes_load 2, \key_len
|
|
|
+ add $(2*16), p_out
|
|
|
+ and $(~7*16), num_bytes
|
|
|
+ jz .Ldo_return2\key_len
|
|
|
+ jmp .Lmain_loop2\key_len
|
|
|
+
|
|
|
+
|
|
|
+.Leq3\key_len:
|
|
|
+ do_aes_load 3, \key_len
|
|
|
+ add $(3*16), p_out
|
|
|
+ and $(~7*16), num_bytes
|
|
|
+ jz .Ldo_return2\key_len
|
|
|
+ jmp .Lmain_loop2\key_len
|
|
|
+
|
|
|
+.Leq4\key_len:
|
|
|
+ do_aes_load 4, \key_len
|
|
|
+ add $(4*16), p_out
|
|
|
+ and $(~7*16), num_bytes
|
|
|
+ jz .Ldo_return2\key_len
|
|
|
+ jmp .Lmain_loop2\key_len
|
|
|
+
|
|
|
+.Lgt4\key_len:
|
|
|
+ cmp $(6*16), tmp
|
|
|
+ jg .Leq7\key_len
|
|
|
+ je .Leq6\key_len
|
|
|
+
|
|
|
+.Leq5\key_len:
|
|
|
+ do_aes_load 5, \key_len
|
|
|
+ add $(5*16), p_out
|
|
|
+ and $(~7*16), num_bytes
|
|
|
+ jz .Ldo_return2\key_len
|
|
|
+ jmp .Lmain_loop2\key_len
|
|
|
+
|
|
|
+.Leq6\key_len:
|
|
|
+ do_aes_load 6, \key_len
|
|
|
+ add $(6*16), p_out
|
|
|
+ and $(~7*16), num_bytes
|
|
|
+ jz .Ldo_return2\key_len
|
|
|
+ jmp .Lmain_loop2\key_len
|
|
|
+
|
|
|
+.Leq7\key_len:
|
|
|
+ do_aes_load 7, \key_len
|
|
|
+ add $(7*16), p_out
|
|
|
+ and $(~7*16), num_bytes
|
|
|
+ jz .Ldo_return2\key_len
|
|
|
+ jmp .Lmain_loop2\key_len
|
|
|
+
|
|
|
+.Lmult_of_8_blks\key_len:
|
|
|
+ .if (\key_len != KEY_128)
|
|
|
+ vmovdqa 0*16(p_keys), xkey0
|
|
|
+ vmovdqa 4*16(p_keys), xkey4
|
|
|
+ vmovdqa 8*16(p_keys), xkey8
|
|
|
+ vmovdqa 12*16(p_keys), xkey12
|
|
|
+ .else
|
|
|
+ vmovdqa 0*16(p_keys), xkey0
|
|
|
+ vmovdqa 3*16(p_keys), xkey4
|
|
|
+ vmovdqa 6*16(p_keys), xkey8
|
|
|
+ vmovdqa 9*16(p_keys), xkey12
|
|
|
+ .endif
|
|
|
+.align 16
|
|
|
+.Lmain_loop2\key_len:
|
|
|
+ /* num_bytes is a multiple of 8 and >0 */
|
|
|
+ do_aes_noload 8, \key_len
|
|
|
+ add $(8*16), p_out
|
|
|
+ sub $(8*16), num_bytes
|
|
|
+ jne .Lmain_loop2\key_len
|
|
|
+
|
|
|
+.Ldo_return2\key_len:
|
|
|
+ /* return updated IV */
|
|
|
+ vpshufb xbyteswap, xcounter, xcounter
|
|
|
+ vmovdqu xcounter, (p_iv)
|
|
|
+ ret
|
|
|
+.endm
|
|
|
+
|
|
|
+/*
|
|
|
+ * routine to do AES128 CTR enc/decrypt "by8"
|
|
|
+ * XMM registers are clobbered.
|
|
|
+ * Saving/restoring must be done at a higher level
|
|
|
+ * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out,
|
|
|
+ * unsigned int num_bytes)
|
|
|
+ */
|
|
|
+ENTRY(aes_ctr_enc_128_avx_by8)
|
|
|
+ /* call the aes main loop */
|
|
|
+ do_aes_ctrmain KEY_128
|
|
|
+
|
|
|
+ENDPROC(aes_ctr_enc_128_avx_by8)
|
|
|
+
|
|
|
+/*
|
|
|
+ * routine to do AES192 CTR enc/decrypt "by8"
|
|
|
+ * XMM registers are clobbered.
|
|
|
+ * Saving/restoring must be done at a higher level
|
|
|
+ * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out,
|
|
|
+ * unsigned int num_bytes)
|
|
|
+ */
|
|
|
+ENTRY(aes_ctr_enc_192_avx_by8)
|
|
|
+ /* call the aes main loop */
|
|
|
+ do_aes_ctrmain KEY_192
|
|
|
+
|
|
|
+ENDPROC(aes_ctr_enc_192_avx_by8)
|
|
|
+
|
|
|
+/*
|
|
|
+ * routine to do AES256 CTR enc/decrypt "by8"
|
|
|
+ * XMM registers are clobbered.
|
|
|
+ * Saving/restoring must be done at a higher level
|
|
|
+ * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out,
|
|
|
+ * unsigned int num_bytes)
|
|
|
+ */
|
|
|
+ENTRY(aes_ctr_enc_256_avx_by8)
|
|
|
+ /* call the aes main loop */
|
|
|
+ do_aes_ctrmain KEY_256
|
|
|
+
|
|
|
+ENDPROC(aes_ctr_enc_256_avx_by8)
|