8 vuotta sitten · 1abee99eaf
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -82,4 +82,11 @@ config CRYPTO_CHACHA20_NEON
 
				 	select CRYPTO_BLKCIPHER
			
 
				 	select CRYPTO_CHACHA20
			
 
				 
			
 
				+config CRYPTO_AES_ARM64_BS
			
 
				+	tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
			
 
				+	depends on KERNEL_MODE_NEON
			
 
				+	select CRYPTO_BLKCIPHER
			
 
				+	select CRYPTO_AES_ARM64
			
 
				+	select CRYPTO_SIMD
			
 
				+
			
 
				 endif
			
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -47,6 +47,9 @@ chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
 
				 obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
			
 
				 aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
			
 
				 
			
 
				+obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o
			
 
				+aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
			
 
				+
			
 
				 AFLAGS_aes-ce.o		:= -DINTERLEAVE=4
			
 
				 AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
			
 
				 
			
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -0,0 +1,963 @@
 
				+/*
			
 
				+ * Bit sliced AES using NEON instructions
			
 
				+ *
			
 
				+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * The algorithm implemented here is described in detail by the paper
			
 
				+ * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
			
 
				+ * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
			
 
				+ *
			
 
				+ * This implementation is based primarily on the OpenSSL implementation
			
 
				+ * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
			
 
				+ */
			
 
				+
			
 
				+#include <linux/linkage.h>
			
 
				+#include <asm/assembler.h>
			
 
				+
			
 
				+	.text
			
 
				+
			
 
				+	rounds		.req	x11
			
 
				+	bskey		.req	x12
			
 
				+
			
 
				+	.macro		in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
			
 
				+	eor		\b2, \b2, \b1
			
 
				+	eor		\b5, \b5, \b6
			
 
				+	eor		\b3, \b3, \b0
			
 
				+	eor		\b6, \b6, \b2
			
 
				+	eor		\b5, \b5, \b0
			
 
				+	eor		\b6, \b6, \b3
			
 
				+	eor		\b3, \b3, \b7
			
 
				+	eor		\b7, \b7, \b5
			
 
				+	eor		\b3, \b3, \b4
			
 
				+	eor		\b4, \b4, \b5
			
 
				+	eor		\b2, \b2, \b7
			
 
				+	eor		\b3, \b3, \b1
			
 
				+	eor		\b1, \b1, \b5
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
			
 
				+	eor		\b0, \b0, \b6
			
 
				+	eor		\b1, \b1, \b4
			
 
				+	eor		\b4, \b4, \b6
			
 
				+	eor		\b2, \b2, \b0
			
 
				+	eor		\b6, \b6, \b1
			
 
				+	eor		\b1, \b1, \b5
			
 
				+	eor		\b5, \b5, \b3
			
 
				+	eor		\b3, \b3, \b7
			
 
				+	eor		\b7, \b7, \b5
			
 
				+	eor		\b2, \b2, \b5
			
 
				+	eor		\b4, \b4, \b7
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
			
 
				+	eor		\b1, \b1, \b7
			
 
				+	eor		\b4, \b4, \b7
			
 
				+	eor		\b7, \b7, \b5
			
 
				+	eor		\b1, \b1, \b3
			
 
				+	eor		\b2, \b2, \b5
			
 
				+	eor		\b3, \b3, \b7
			
 
				+	eor		\b6, \b6, \b1
			
 
				+	eor		\b2, \b2, \b0
			
 
				+	eor		\b5, \b5, \b3
			
 
				+	eor		\b4, \b4, \b6
			
 
				+	eor		\b0, \b0, \b6
			
 
				+	eor		\b1, \b1, \b4
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
			
 
				+	eor		\b1, \b1, \b5
			
 
				+	eor		\b2, \b2, \b7
			
 
				+	eor		\b3, \b3, \b1
			
 
				+	eor		\b4, \b4, \b5
			
 
				+	eor		\b7, \b7, \b5
			
 
				+	eor		\b3, \b3, \b4
			
 
				+	eor 		\b5, \b5, \b0
			
 
				+	eor		\b3, \b3, \b7
			
 
				+	eor		\b6, \b6, \b2
			
 
				+	eor		\b2, \b2, \b1
			
 
				+	eor		\b6, \b6, \b3
			
 
				+	eor		\b3, \b3, \b0
			
 
				+	eor		\b5, \b5, \b6
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		mul_gf4, x0, x1, y0, y1, t0, t1
			
 
				+	eor 		\t0, \y0, \y1
			
 
				+	and		\t0, \t0, \x0
			
 
				+	eor		\x0, \x0, \x1
			
 
				+	and		\t1, \x1, \y0
			
 
				+	and		\x0, \x0, \y1
			
 
				+	eor		\x1, \t1, \t0
			
 
				+	eor		\x0, \x0, \t1
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
			
 
				+	eor		\t0, \y0, \y1
			
 
				+	eor 		\t1, \y2, \y3
			
 
				+	and		\t0, \t0, \x0
			
 
				+	and		\t1, \t1, \x2
			
 
				+	eor		\x0, \x0, \x1
			
 
				+	eor		\x2, \x2, \x3
			
 
				+	and		\x1, \x1, \y0
			
 
				+	and		\x3, \x3, \y2
			
 
				+	and		\x0, \x0, \y1
			
 
				+	and		\x2, \x2, \y3
			
 
				+	eor		\x1, \x1, \x0
			
 
				+	eor		\x2, \x2, \x3
			
 
				+	eor		\x0, \x0, \t0
			
 
				+	eor		\x3, \x3, \t1
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
			
 
				+				    y0, y1, y2, y3, t0, t1, t2, t3
			
 
				+	eor		\t0, \x0, \x2
			
 
				+	eor		\t1, \x1, \x3
			
 
				+	mul_gf4  	\x0, \x1, \y0, \y1, \t2, \t3
			
 
				+	eor		\y0, \y0, \y2
			
 
				+	eor		\y1, \y1, \y3
			
 
				+	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
			
 
				+	eor		\x0, \x0, \t0
			
 
				+	eor		\x2, \x2, \t0
			
 
				+	eor		\x1, \x1, \t1
			
 
				+	eor		\x3, \x3, \t1
			
 
				+	eor		\t0, \x4, \x6
			
 
				+	eor		\t1, \x5, \x7
			
 
				+	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
			
 
				+	eor		\y0, \y0, \y2
			
 
				+	eor		\y1, \y1, \y3
			
 
				+	mul_gf4  	\x4, \x5, \y0, \y1, \t2, \t3
			
 
				+	eor		\x4, \x4, \t0
			
 
				+	eor		\x6, \x6, \t0
			
 
				+	eor		\x5, \x5, \t1
			
 
				+	eor		\x7, \x7, \t1
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
			
 
				+				   t0, t1, t2, t3, s0, s1, s2, s3
			
 
				+	eor		\t3, \x4, \x6
			
 
				+	eor		\t0, \x5, \x7
			
 
				+	eor		\t1, \x1, \x3
			
 
				+	eor		\s1, \x7, \x6
			
 
				+	eor		\s0, \x0, \x2
			
 
				+	eor		\s3, \t3, \t0
			
 
				+	orr		\t2, \t0, \t1
			
 
				+	and		\s2, \t3, \s0
			
 
				+	orr		\t3, \t3, \s0
			
 
				+	eor		\s0, \s0, \t1
			
 
				+	and		\t0, \t0, \t1
			
 
				+	eor		\t1, \x3, \x2
			
 
				+	and		\s3, \s3, \s0
			
 
				+	and		\s1, \s1, \t1
			
 
				+	eor		\t1, \x4, \x5
			
 
				+	eor		\s0, \x1, \x0
			
 
				+	eor		\t3, \t3, \s1
			
 
				+	eor		\t2, \t2, \s1
			
 
				+	and		\s1, \t1, \s0
			
 
				+	orr		\t1, \t1, \s0
			
 
				+	eor		\t3, \t3, \s3
			
 
				+	eor		\t0, \t0, \s1
			
 
				+	eor		\t2, \t2, \s2
			
 
				+	eor		\t1, \t1, \s3
			
 
				+	eor		\t0, \t0, \s2
			
 
				+	and		\s0, \x7, \x3
			
 
				+	eor		\t1, \t1, \s2
			
 
				+	and		\s1, \x6, \x2
			
 
				+	and		\s2, \x5, \x1
			
 
				+	orr		\s3, \x4, \x0
			
 
				+	eor		\t3, \t3, \s0
			
 
				+	eor		\t1, \t1, \s2
			
 
				+	eor		\s0, \t0, \s3
			
 
				+	eor		\t2, \t2, \s1
			
 
				+	and		\s2, \t3, \t1
			
 
				+	eor		\s1, \t2, \s2
			
 
				+	eor		\s3, \s0, \s2
			
 
				+	bsl		\s1, \t1, \s0
			
 
				+	not		\t0, \s0
			
 
				+	bsl		\s0, \s1, \s3
			
 
				+	bsl		\t0, \s1, \s3
			
 
				+	bsl		\s3, \t3, \t2
			
 
				+	eor		\t3, \t3, \t2
			
 
				+	and		\s2, \s0, \s3
			
 
				+	eor		\t1, \t1, \t0
			
 
				+	eor		\s2, \s2, \t3
			
 
				+	mul_gf16_2	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
			
 
				+			\s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
			
 
				+			      t0, t1, t2, t3, s0, s1, s2, s3
			
 
				+	in_bs_ch	\b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
			
 
				+			\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
			
 
				+	inv_gf256	\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
			
 
				+			\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
			
 
				+			\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
			
 
				+			\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
			
 
				+	out_bs_ch	\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
			
 
				+			\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
			
 
				+				  t0, t1, t2, t3, s0, s1, s2, s3
			
 
				+	inv_in_bs_ch	\b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
			
 
				+			\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
			
 
				+	inv_gf256	\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
			
 
				+			\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
			
 
				+			\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
			
 
				+			\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
			
 
				+	inv_out_bs_ch	\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
			
 
				+			\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		enc_next_rk
			
 
				+	ldp		q16, q17, [bskey], #128
			
 
				+	ldp		q18, q19, [bskey, #-96]
			
 
				+	ldp		q20, q21, [bskey, #-64]
			
 
				+	ldp		q22, q23, [bskey, #-32]
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		dec_next_rk
			
 
				+	ldp		q16, q17, [bskey, #-128]!
			
 
				+	ldp		q18, q19, [bskey, #32]
			
 
				+	ldp		q20, q21, [bskey, #64]
			
 
				+	ldp		q22, q23, [bskey, #96]
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
			
 
				+	eor		\x0\().16b, \x0\().16b, v16.16b
			
 
				+	eor		\x1\().16b, \x1\().16b, v17.16b
			
 
				+	eor		\x2\().16b, \x2\().16b, v18.16b
			
 
				+	eor		\x3\().16b, \x3\().16b, v19.16b
			
 
				+	eor		\x4\().16b, \x4\().16b, v20.16b
			
 
				+	eor		\x5\().16b, \x5\().16b, v21.16b
			
 
				+	eor		\x6\().16b, \x6\().16b, v22.16b
			
 
				+	eor		\x7\().16b, \x7\().16b, v23.16b
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
			
 
				+	tbl		\x0\().16b, {\x0\().16b}, \mask\().16b
			
 
				+	tbl		\x1\().16b, {\x1\().16b}, \mask\().16b
			
 
				+	tbl		\x2\().16b, {\x2\().16b}, \mask\().16b
			
 
				+	tbl		\x3\().16b, {\x3\().16b}, \mask\().16b
			
 
				+	tbl		\x4\().16b, {\x4\().16b}, \mask\().16b
			
 
				+	tbl		\x5\().16b, {\x5\().16b}, \mask\().16b
			
 
				+	tbl		\x6\().16b, {\x6\().16b}, \mask\().16b
			
 
				+	tbl		\x7\().16b, {\x7\().16b}, \mask\().16b
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
			
 
				+				  t0, t1, t2, t3, t4, t5, t6, t7, inv
			
 
				+	ext		\t0\().16b, \x0\().16b, \x0\().16b, #12
			
 
				+	ext		\t1\().16b, \x1\().16b, \x1\().16b, #12
			
 
				+	eor		\x0\().16b, \x0\().16b, \t0\().16b
			
 
				+	ext		\t2\().16b, \x2\().16b, \x2\().16b, #12
			
 
				+	eor		\x1\().16b, \x1\().16b, \t1\().16b
			
 
				+	ext		\t3\().16b, \x3\().16b, \x3\().16b, #12
			
 
				+	eor		\x2\().16b, \x2\().16b, \t2\().16b
			
 
				+	ext		\t4\().16b, \x4\().16b, \x4\().16b, #12
			
 
				+	eor		\x3\().16b, \x3\().16b, \t3\().16b
			
 
				+	ext		\t5\().16b, \x5\().16b, \x5\().16b, #12
			
 
				+	eor		\x4\().16b, \x4\().16b, \t4\().16b
			
 
				+	ext		\t6\().16b, \x6\().16b, \x6\().16b, #12
			
 
				+	eor		\x5\().16b, \x5\().16b, \t5\().16b
			
 
				+	ext		\t7\().16b, \x7\().16b, \x7\().16b, #12
			
 
				+	eor		\x6\().16b, \x6\().16b, \t6\().16b
			
 
				+	eor		\t1\().16b, \t1\().16b, \x0\().16b
			
 
				+	eor		\x7\().16b, \x7\().16b, \t7\().16b
			
 
				+	ext		\x0\().16b, \x0\().16b, \x0\().16b, #8
			
 
				+	eor		\t2\().16b, \t2\().16b, \x1\().16b
			
 
				+	eor		\t0\().16b, \t0\().16b, \x7\().16b
			
 
				+	eor		\t1\().16b, \t1\().16b, \x7\().16b
			
 
				+	ext		\x1\().16b, \x1\().16b, \x1\().16b, #8
			
 
				+	eor		\t5\().16b, \t5\().16b, \x4\().16b
			
 
				+	eor		\x0\().16b, \x0\().16b, \t0\().16b
			
 
				+	eor		\t6\().16b, \t6\().16b, \x5\().16b
			
 
				+	eor		\x1\().16b, \x1\().16b, \t1\().16b
			
 
				+	ext		\t0\().16b, \x4\().16b, \x4\().16b, #8
			
 
				+	eor		\t4\().16b, \t4\().16b, \x3\().16b
			
 
				+	ext		\t1\().16b, \x5\().16b, \x5\().16b, #8
			
 
				+	eor		\t7\().16b, \t7\().16b, \x6\().16b
			
 
				+	ext		\x4\().16b, \x3\().16b, \x3\().16b, #8
			
 
				+	eor		\t3\().16b, \t3\().16b, \x2\().16b
			
 
				+	ext		\x5\().16b, \x7\().16b, \x7\().16b, #8
			
 
				+	eor		\t4\().16b, \t4\().16b, \x7\().16b
			
 
				+	ext		\x3\().16b, \x6\().16b, \x6\().16b, #8
			
 
				+	eor		\t3\().16b, \t3\().16b, \x7\().16b
			
 
				+	ext		\x6\().16b, \x2\().16b, \x2\().16b, #8
			
 
				+	eor		\x7\().16b, \t1\().16b, \t5\().16b
			
 
				+	.ifb		\inv
			
 
				+	eor		\x2\().16b, \t0\().16b, \t4\().16b
			
 
				+	eor		\x4\().16b, \x4\().16b, \t3\().16b
			
 
				+	eor		\x5\().16b, \x5\().16b, \t7\().16b
			
 
				+	eor		\x3\().16b, \x3\().16b, \t6\().16b
			
 
				+	eor		\x6\().16b, \x6\().16b, \t2\().16b
			
 
				+	.else
			
 
				+	eor		\t3\().16b, \t3\().16b, \x4\().16b
			
 
				+	eor		\x5\().16b, \x5\().16b, \t7\().16b
			
 
				+	eor		\x2\().16b, \x3\().16b, \t6\().16b
			
 
				+	eor		\x3\().16b, \t0\().16b, \t4\().16b
			
 
				+	eor		\x4\().16b, \x6\().16b, \t2\().16b
			
 
				+	mov		\x6\().16b, \t3\().16b
			
 
				+	.endif
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
			
 
				+				      t0, t1, t2, t3, t4, t5, t6, t7
			
 
				+	ext		\t0\().16b, \x0\().16b, \x0\().16b, #8
			
 
				+	ext		\t6\().16b, \x6\().16b, \x6\().16b, #8
			
 
				+	ext		\t7\().16b, \x7\().16b, \x7\().16b, #8
			
 
				+	eor		\t0\().16b, \t0\().16b, \x0\().16b
			
 
				+	ext		\t1\().16b, \x1\().16b, \x1\().16b, #8
			
 
				+	eor		\t6\().16b, \t6\().16b, \x6\().16b
			
 
				+	ext		\t2\().16b, \x2\().16b, \x2\().16b, #8
			
 
				+	eor		\t7\().16b, \t7\().16b, \x7\().16b
			
 
				+	ext		\t3\().16b, \x3\().16b, \x3\().16b, #8
			
 
				+	eor		\t1\().16b, \t1\().16b, \x1\().16b
			
 
				+	ext		\t4\().16b, \x4\().16b, \x4\().16b, #8
			
 
				+	eor		\t2\().16b, \t2\().16b, \x2\().16b
			
 
				+	ext		\t5\().16b, \x5\().16b, \x5\().16b, #8
			
 
				+	eor		\t3\().16b, \t3\().16b, \x3\().16b
			
 
				+	eor		\t4\().16b, \t4\().16b, \x4\().16b
			
 
				+	eor		\t5\().16b, \t5\().16b, \x5\().16b
			
 
				+	eor		\x0\().16b, \x0\().16b, \t6\().16b
			
 
				+	eor		\x1\().16b, \x1\().16b, \t6\().16b
			
 
				+	eor		\x2\().16b, \x2\().16b, \t0\().16b
			
 
				+	eor		\x4\().16b, \x4\().16b, \t2\().16b
			
 
				+	eor		\x3\().16b, \x3\().16b, \t1\().16b
			
 
				+	eor		\x1\().16b, \x1\().16b, \t7\().16b
			
 
				+	eor		\x2\().16b, \x2\().16b, \t7\().16b
			
 
				+	eor		\x4\().16b, \x4\().16b, \t6\().16b
			
 
				+	eor		\x5\().16b, \x5\().16b, \t3\().16b
			
 
				+	eor		\x3\().16b, \x3\().16b, \t6\().16b
			
 
				+	eor		\x6\().16b, \x6\().16b, \t4\().16b
			
 
				+	eor		\x4\().16b, \x4\().16b, \t7\().16b
			
 
				+	eor		\x5\().16b, \x5\().16b, \t7\().16b
			
 
				+	eor		\x7\().16b, \x7\().16b, \t5\().16b
			
 
				+	mix_cols	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
			
 
				+			\t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
			
 
				+	ushr		\t0\().2d, \b0\().2d, #\n
			
 
				+	ushr		\t1\().2d, \b1\().2d, #\n
			
 
				+	eor		\t0\().16b, \t0\().16b, \a0\().16b
			
 
				+	eor		\t1\().16b, \t1\().16b, \a1\().16b
			
 
				+	and		\t0\().16b, \t0\().16b, \mask\().16b
			
 
				+	and		\t1\().16b, \t1\().16b, \mask\().16b
			
 
				+	eor		\a0\().16b, \a0\().16b, \t0\().16b
			
 
				+	shl		\t0\().2d, \t0\().2d, #\n
			
 
				+	eor		\a1\().16b, \a1\().16b, \t1\().16b
			
 
				+	shl		\t1\().2d, \t1\().2d, #\n
			
 
				+	eor		\b0\().16b, \b0\().16b, \t0\().16b
			
 
				+	eor		\b1\().16b, \b1\().16b, \t1\().16b
			
 
				+	.endm
			
 
				+
			
 
				+	.macro		bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
			
 
				+	movi		\t0\().16b, #0x55
			
 
				+	movi		\t1\().16b, #0x33
			
 
				+	swapmove_2x	\x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
			
 
				+	swapmove_2x	\x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
			
 
				+	movi		\t0\().16b, #0x0f
			
 
				+	swapmove_2x	\x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
			
 
				+	swapmove_2x	\x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
			
 
				+	swapmove_2x	\x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
			
 
				+	swapmove_2x	\x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
			
 
				+	.endm
			
 
				+
			
 
				+
			
 
				+	.align		6
			
 
				+M0:	.octa		0x0004080c0105090d02060a0e03070b0f
			
 
				+
			
 
				+M0SR:	.octa		0x0004080c05090d010a0e02060f03070b
			
 
				+SR:	.octa		0x0f0e0d0c0a09080b0504070600030201
			
 
				+SRM0:	.octa		0x01060b0c0207080d0304090e00050a0f
			
 
				+
			
 
				+M0ISR:	.octa		0x0004080c0d0105090a0e0206070b0f03
			
 
				+ISR:	.octa		0x0f0e0d0c080b0a090504070602010003
			
 
				+ISRM0:	.octa		0x0306090c00070a0d01040b0e0205080f
			
 
				+
			
 
				+	/*
			
 
				+	 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
			
 
				+	 */
			
 
				+ENTRY(aesbs_convert_key)
			
 
				+	ld1		{v7.4s}, [x1], #16		// load round 0 key
			
 
				+	ld1		{v17.4s}, [x1], #16		// load round 1 key
			
 
				+
			
 
				+	movi		v8.16b,  #0x01			// bit masks
			
 
				+	movi		v9.16b,  #0x02
			
 
				+	movi		v10.16b, #0x04
			
 
				+	movi		v11.16b, #0x08
			
 
				+	movi		v12.16b, #0x10
			
 
				+	movi		v13.16b, #0x20
			
 
				+	movi		v14.16b, #0x40
			
 
				+	movi		v15.16b, #0x80
			
 
				+	ldr		q16, M0
			
 
				+
			
 
				+	sub		x2, x2, #1
			
 
				+	str		q7, [x0], #16		// save round 0 key
			
 
				+
			
 
				+.Lkey_loop:
			
 
				+	tbl		v7.16b ,{v17.16b}, v16.16b
			
 
				+	ld1		{v17.4s}, [x1], #16		// load next round key
			
 
				+
			
 
				+	cmtst		v0.16b, v7.16b, v8.16b
			
 
				+	cmtst		v1.16b, v7.16b, v9.16b
			
 
				+	cmtst		v2.16b, v7.16b, v10.16b
			
 
				+	cmtst		v3.16b, v7.16b, v11.16b
			
 
				+	cmtst		v4.16b, v7.16b, v12.16b
			
 
				+	cmtst		v5.16b, v7.16b, v13.16b
			
 
				+	cmtst		v6.16b, v7.16b, v14.16b
			
 
				+	cmtst		v7.16b, v7.16b, v15.16b
			
 
				+	not		v0.16b, v0.16b
			
 
				+	not		v1.16b, v1.16b
			
 
				+	not		v5.16b, v5.16b
			
 
				+	not		v6.16b, v6.16b
			
 
				+
			
 
				+	subs		x2, x2, #1
			
 
				+	stp		q0, q1, [x0], #128
			
 
				+	stp		q2, q3, [x0, #-96]
			
 
				+	stp		q4, q5, [x0, #-64]
			
 
				+	stp		q6, q7, [x0, #-32]
			
 
				+	b.ne		.Lkey_loop
			
 
				+
			
 
				+	movi		v7.16b, #0x63			// compose .L63
			
 
				+	eor		v17.16b, v17.16b, v7.16b
			
 
				+	str		q17, [x0]
			
 
				+	ret
			
 
				+ENDPROC(aesbs_convert_key)
			
 
				+
			
 
				+	.align		4
			
 
				+aesbs_encrypt8:
			
 
				+	ldr		q9, [bskey], #16		// round 0 key
			
 
				+	ldr		q8, M0SR
			
 
				+	ldr		q24, SR
			
 
				+
			
 
				+	eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
			
 
				+	eor		v11.16b, v1.16b, v9.16b
			
 
				+	tbl		v0.16b, {v10.16b}, v8.16b
			
 
				+	eor		v12.16b, v2.16b, v9.16b
			
 
				+	tbl		v1.16b, {v11.16b}, v8.16b
			
 
				+	eor		v13.16b, v3.16b, v9.16b
			
 
				+	tbl		v2.16b, {v12.16b}, v8.16b
			
 
				+	eor		v14.16b, v4.16b, v9.16b
			
 
				+	tbl		v3.16b, {v13.16b}, v8.16b
			
 
				+	eor		v15.16b, v5.16b, v9.16b
			
 
				+	tbl		v4.16b, {v14.16b}, v8.16b
			
 
				+	eor		v10.16b, v6.16b, v9.16b
			
 
				+	tbl		v5.16b, {v15.16b}, v8.16b
			
 
				+	eor		v11.16b, v7.16b, v9.16b
			
 
				+	tbl		v6.16b, {v10.16b}, v8.16b
			
 
				+	tbl		v7.16b, {v11.16b}, v8.16b
			
 
				+
			
 
				+	bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
			
 
				+
			
 
				+	sub		rounds, rounds, #1
			
 
				+	b		.Lenc_sbox
			
 
				+
			
 
				+.Lenc_loop:
			
 
				+	shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
			
 
				+.Lenc_sbox:
			
 
				+	sbox		v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
			
 
				+								v13, v14, v15
			
 
				+	subs		rounds, rounds, #1
			
 
				+	b.cc		.Lenc_done
			
 
				+
			
 
				+	enc_next_rk
			
 
				+
			
 
				+	mix_cols	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
			
 
				+								v13, v14, v15
			
 
				+
			
 
				+	add_round_key	v0, v1, v2, v3, v4, v5, v6, v7
			
 
				+
			
 
				+	b.ne		.Lenc_loop
			
 
				+	ldr		q24, SRM0
			
 
				+	b		.Lenc_loop
			
 
				+
			
 
				+.Lenc_done:
			
 
				+	ldr		q12, [bskey]			// last round key
			
 
				+
			
 
				+	bitslice	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
			
 
				+
			
 
				+	eor		v0.16b, v0.16b, v12.16b
			
 
				+	eor		v1.16b, v1.16b, v12.16b
			
 
				+	eor		v4.16b, v4.16b, v12.16b
			
 
				+	eor		v6.16b, v6.16b, v12.16b
			
 
				+	eor		v3.16b, v3.16b, v12.16b
			
 
				+	eor		v7.16b, v7.16b, v12.16b
			
 
				+	eor		v2.16b, v2.16b, v12.16b
			
 
				+	eor		v5.16b, v5.16b, v12.16b
			
 
				+	ret
			
 
				+ENDPROC(aesbs_encrypt8)
			
 
				+
			
 
				+	.align		4
			
 
				+aesbs_decrypt8:
			
 
				+	lsl		x9, rounds, #7
			
 
				+	add		bskey, bskey, x9
			
 
				+
			
 
				+	ldr		q9, [bskey, #-112]!		// round 0 key
			
 
				+	ldr		q8, M0ISR
			
 
				+	ldr		q24, ISR
			
 
				+
			
 
				+	eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
			
 
				+	eor		v11.16b, v1.16b, v9.16b
			
 
				+	tbl		v0.16b, {v10.16b}, v8.16b
			
 
				+	eor		v12.16b, v2.16b, v9.16b
			
 
				+	tbl		v1.16b, {v11.16b}, v8.16b
			
 
				+	eor		v13.16b, v3.16b, v9.16b
			
 
				+	tbl		v2.16b, {v12.16b}, v8.16b
			
 
				+	eor		v14.16b, v4.16b, v9.16b
			
 
				+	tbl		v3.16b, {v13.16b}, v8.16b
			
 
				+	eor		v15.16b, v5.16b, v9.16b
			
 
				+	tbl		v4.16b, {v14.16b}, v8.16b
			
 
				+	eor		v10.16b, v6.16b, v9.16b
			
 
				+	tbl		v5.16b, {v15.16b}, v8.16b
			
 
				+	eor		v11.16b, v7.16b, v9.16b
			
 
				+	tbl		v6.16b, {v10.16b}, v8.16b
			
 
				+	tbl		v7.16b, {v11.16b}, v8.16b
			
 
				+
			
 
				+	bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
			
 
				+
			
 
				+	sub		rounds, rounds, #1
			
 
				+	b		.Ldec_sbox
			
 
				+
			
 
				+.Ldec_loop:
			
 
				+	shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
			
 
				+.Ldec_sbox:
			
 
				+	inv_sbox	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
			
 
				+								v13, v14, v15
			
 
				+	subs		rounds, rounds, #1
			
 
				+	b.cc		.Ldec_done
			
 
				+
			
 
				+	dec_next_rk
			
 
				+
			
 
				+	add_round_key	v0, v1, v6, v4, v2, v7, v3, v5
			
 
				+
			
 
				+	inv_mix_cols	v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
			
 
				+								v13, v14, v15
			
 
				+
			
 
				+	b.ne		.Ldec_loop
			
 
				+	ldr		q24, ISRM0
			
 
				+	b		.Ldec_loop
			
 
				+.Ldec_done:
			
 
				+	ldr		q12, [bskey, #-16]		// last round key
			
 
				+
			
 
				+	bitslice	v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
			
 
				+
			
 
				+	eor		v0.16b, v0.16b, v12.16b
			
 
				+	eor		v1.16b, v1.16b, v12.16b
			
 
				+	eor		v6.16b, v6.16b, v12.16b
			
 
				+	eor		v4.16b, v4.16b, v12.16b
			
 
				+	eor		v2.16b, v2.16b, v12.16b
			
 
				+	eor		v7.16b, v7.16b, v12.16b
			
 
				+	eor		v3.16b, v3.16b, v12.16b
			
 
				+	eor		v5.16b, v5.16b, v12.16b
			
 
				+	ret
			
 
				+ENDPROC(aesbs_decrypt8)
			
 
				+
			
 
				+	/*
			
 
				+	 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
			
 
				+	 *		     int blocks)
			
 
				+	 * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
			
 
				+	 *		     int blocks)
			
 
				+	 */
			
 
				+	.macro		__ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
			
 
				+	stp		x29, x30, [sp, #-16]!
			
 
				+	mov		x29, sp
			
 
				+
			
 
				+99:	mov		x5, #1
			
 
				+	lsl		x5, x5, x4
			
 
				+	subs		w4, w4, #8
			
 
				+	csel		x4, x4, xzr, pl
			
 
				+	csel		x5, x5, xzr, mi
			
 
				+
			
 
				+	ld1		{v0.16b}, [x1], #16
			
 
				+	tbnz		x5, #1, 0f
			
 
				+	ld1		{v1.16b}, [x1], #16
			
 
				+	tbnz		x5, #2, 0f
			
 
				+	ld1		{v2.16b}, [x1], #16
			
 
				+	tbnz		x5, #3, 0f
			
 
				+	ld1		{v3.16b}, [x1], #16
			
 
				+	tbnz		x5, #4, 0f
			
 
				+	ld1		{v4.16b}, [x1], #16
			
 
				+	tbnz		x5, #5, 0f
			
 
				+	ld1		{v5.16b}, [x1], #16
			
 
				+	tbnz		x5, #6, 0f
			
 
				+	ld1		{v6.16b}, [x1], #16
			
 
				+	tbnz		x5, #7, 0f
			
 
				+	ld1		{v7.16b}, [x1], #16
			
 
				+
			
 
				+0:	mov		bskey, x2
			
 
				+	mov		rounds, x3
			
 
				+	bl		\do8
			
 
				+
			
 
				+	st1		{\o0\().16b}, [x0], #16
			
 
				+	tbnz		x5, #1, 1f
			
 
				+	st1		{\o1\().16b}, [x0], #16
			
 
				+	tbnz		x5, #2, 1f
			
 
				+	st1		{\o2\().16b}, [x0], #16
			
 
				+	tbnz		x5, #3, 1f
			
 
				+	st1		{\o3\().16b}, [x0], #16
			
 
				+	tbnz		x5, #4, 1f
			
 
				+	st1		{\o4\().16b}, [x0], #16
			
 
				+	tbnz		x5, #5, 1f
			
 
				+	st1		{\o5\().16b}, [x0], #16
			
 
				+	tbnz		x5, #6, 1f
			
 
				+	st1		{\o6\().16b}, [x0], #16
			
 
				+	tbnz		x5, #7, 1f
			
 
				+	st1		{\o7\().16b}, [x0], #16
			
 
				+
			
 
				+	cbnz		x4, 99b
			
 
				+
			
 
				+1:	ldp		x29, x30, [sp], #16
			
 
				+	ret
			
 
				+	.endm
			
 
				+
			
 
				+	.align		4
			
 
				+ENTRY(aesbs_ecb_encrypt)
			
 
				+	__ecb_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
			
 
				+ENDPROC(aesbs_ecb_encrypt)
			
 
				+
			
 
				+	.align		4
			
 
				+ENTRY(aesbs_ecb_decrypt)
			
 
				+	__ecb_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
			
 
				+ENDPROC(aesbs_ecb_decrypt)
			
 
				+
			
 
				+	/*
			
 
				+	 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
			
 
				+	 *		     int blocks, u8 iv[])
			
 
				+	 */
			
 
				+	.align		4
			
 
				+ENTRY(aesbs_cbc_decrypt)
			
 
				+	stp		x29, x30, [sp, #-16]!
			
 
				+	mov		x29, sp
			
 
				+
			
 
				+99:	mov		x6, #1
			
 
				+	lsl		x6, x6, x4
			
 
				+	subs		w4, w4, #8
			
 
				+	csel		x4, x4, xzr, pl
			
 
				+	csel		x6, x6, xzr, mi
			
 
				+
			
 
				+	ld1		{v0.16b}, [x1], #16
			
 
				+	mov		v25.16b, v0.16b
			
 
				+	tbnz		x6, #1, 0f
			
 
				+	ld1		{v1.16b}, [x1], #16
			
 
				+	mov		v26.16b, v1.16b
			
 
				+	tbnz		x6, #2, 0f
			
 
				+	ld1		{v2.16b}, [x1], #16
			
 
				+	mov		v27.16b, v2.16b
			
 
				+	tbnz		x6, #3, 0f
			
 
				+	ld1		{v3.16b}, [x1], #16
			
 
				+	mov		v28.16b, v3.16b
			
 
				+	tbnz		x6, #4, 0f
			
 
				+	ld1		{v4.16b}, [x1], #16
			
 
				+	mov		v29.16b, v4.16b
			
 
				+	tbnz		x6, #5, 0f
			
 
				+	ld1		{v5.16b}, [x1], #16
			
 
				+	mov		v30.16b, v5.16b
			
 
				+	tbnz		x6, #6, 0f
			
 
				+	ld1		{v6.16b}, [x1], #16
			
 
				+	mov		v31.16b, v6.16b
			
 
				+	tbnz		x6, #7, 0f
			
 
				+	ld1		{v7.16b}, [x1]
			
 
				+
			
 
				+0:	mov		bskey, x2
			
 
				+	mov		rounds, x3
			
 
				+	bl		aesbs_decrypt8
			
 
				+
			
 
				+	ld1		{v24.16b}, [x5]			// load IV
			
 
				+
			
 
				+	eor		v1.16b, v1.16b, v25.16b
			
 
				+	eor		v6.16b, v6.16b, v26.16b
			
 
				+	eor		v4.16b, v4.16b, v27.16b
			
 
				+	eor		v2.16b, v2.16b, v28.16b
			
 
				+	eor		v7.16b, v7.16b, v29.16b
			
 
				+	eor		v0.16b, v0.16b, v24.16b
			
 
				+	eor		v3.16b, v3.16b, v30.16b
			
 
				+	eor		v5.16b, v5.16b, v31.16b
			
 
				+
			
 
				+	st1		{v0.16b}, [x0], #16
			
 
				+	mov		v24.16b, v25.16b
			
 
				+	tbnz		x6, #1, 1f
			
 
				+	st1		{v1.16b}, [x0], #16
			
 
				+	mov		v24.16b, v26.16b
			
 
				+	tbnz		x6, #2, 1f
			
 
				+	st1		{v6.16b}, [x0], #16
			
 
				+	mov		v24.16b, v27.16b
			
 
				+	tbnz		x6, #3, 1f
			
 
				+	st1		{v4.16b}, [x0], #16
			
 
				+	mov		v24.16b, v28.16b
			
 
				+	tbnz		x6, #4, 1f
			
 
				+	st1		{v2.16b}, [x0], #16
			
 
				+	mov		v24.16b, v29.16b
			
 
				+	tbnz		x6, #5, 1f
			
 
				+	st1		{v7.16b}, [x0], #16
			
 
				+	mov		v24.16b, v30.16b
			
 
				+	tbnz		x6, #6, 1f
			
 
				+	st1		{v3.16b}, [x0], #16
			
 
				+	mov		v24.16b, v31.16b
			
 
				+	tbnz		x6, #7, 1f
			
 
				+	ld1		{v24.16b}, [x1], #16
			
 
				+	st1		{v5.16b}, [x0], #16
			
 
				+1:	st1		{v24.16b}, [x5]			// store IV
			
 
				+
			
 
				+	cbnz		x4, 99b
			
 
				+
			
 
				+	ldp		x29, x30, [sp], #16
			
 
				+	ret
			
 
				+ENDPROC(aesbs_cbc_decrypt)
			
 
				+
			
 
				+	.macro		next_tweak, out, in, const, tmp
			
 
				+	sshr		\tmp\().2d,  \in\().2d,   #63
			
 
				+	and		\tmp\().16b, \tmp\().16b, \const\().16b
			
 
				+	add		\out\().2d,  \in\().2d,   \in\().2d
			
 
				+	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
			
 
				+	eor		\out\().16b, \out\().16b, \tmp\().16b
			
 
				+	.endm
			
 
				+
			
 
				+	.align		4
			
 
				+.Lxts_mul_x:
			
 
				+CPU_LE(	.quad		1, 0x87		)
			
 
				+CPU_BE(	.quad		0x87, 1		)
			
 
				+
			
 
				+	/*
			
 
				+	 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
			
 
				+	 *		     int blocks, u8 iv[])
			
 
				+	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
			
 
				+	 *		     int blocks, u8 iv[])
			
 
				+	 */
			
 
				+__xts_crypt8:
			
 
				+	mov		x6, #1
			
 
				+	lsl		x6, x6, x4
			
 
				+	subs		w4, w4, #8
			
 
				+	csel		x4, x4, xzr, pl
			
 
				+	csel		x6, x6, xzr, mi
			
 
				+
			
 
				+	ld1		{v0.16b}, [x1], #16
			
 
				+	next_tweak	v26, v25, v30, v31
			
 
				+	eor		v0.16b, v0.16b, v25.16b
			
 
				+	tbnz		x6, #1, 0f
			
 
				+
			
 
				+	ld1		{v1.16b}, [x1], #16
			
 
				+	next_tweak	v27, v26, v30, v31
			
 
				+	eor		v1.16b, v1.16b, v26.16b
			
 
				+	tbnz		x6, #2, 0f
			
 
				+
			
 
				+	ld1		{v2.16b}, [x1], #16
			
 
				+	next_tweak	v28, v27, v30, v31
			
 
				+	eor		v2.16b, v2.16b, v27.16b
			
 
				+	tbnz		x6, #3, 0f
			
 
				+
			
 
				+	ld1		{v3.16b}, [x1], #16
			
 
				+	next_tweak	v29, v28, v30, v31
			
 
				+	eor		v3.16b, v3.16b, v28.16b
			
 
				+	tbnz		x6, #4, 0f
			
 
				+
			
 
				+	ld1		{v4.16b}, [x1], #16
			
 
				+	str		q29, [sp, #16]
			
 
				+	eor		v4.16b, v4.16b, v29.16b
			
 
				+	next_tweak	v29, v29, v30, v31
			
 
				+	tbnz		x6, #5, 0f
			
 
				+
			
 
				+	ld1		{v5.16b}, [x1], #16
			
 
				+	str		q29, [sp, #32]
			
 
				+	eor		v5.16b, v5.16b, v29.16b
			
 
				+	next_tweak	v29, v29, v30, v31
			
 
				+	tbnz		x6, #6, 0f
			
 
				+
			
 
				+	ld1		{v6.16b}, [x1], #16
			
 
				+	str		q29, [sp, #48]
			
 
				+	eor		v6.16b, v6.16b, v29.16b
			
 
				+	next_tweak	v29, v29, v30, v31
			
 
				+	tbnz		x6, #7, 0f
			
 
				+
			
 
				+	ld1		{v7.16b}, [x1], #16
			
 
				+	str		q29, [sp, #64]
			
 
				+	eor		v7.16b, v7.16b, v29.16b
			
 
				+	next_tweak	v29, v29, v30, v31
			
 
				+
			
 
				+0:	mov		bskey, x2
			
 
				+	mov		rounds, x3
			
 
				+	br		x7
			
 
				+ENDPROC(__xts_crypt8)
			
 
				+
			
 
				+	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
			
 
				+	stp		x29, x30, [sp, #-80]!
			
 
				+	mov		x29, sp
			
 
				+
			
 
				+	ldr		q30, .Lxts_mul_x
			
 
				+	ld1		{v25.16b}, [x5]
			
 
				+
			
 
				+99:	adr		x7, \do8
			
 
				+	bl		__xts_crypt8
			
 
				+
			
 
				+	ldp		q16, q17, [sp, #16]
			
 
				+	ldp		q18, q19, [sp, #48]
			
 
				+
			
 
				+	eor		\o0\().16b, \o0\().16b, v25.16b
			
 
				+	eor		\o1\().16b, \o1\().16b, v26.16b
			
 
				+	eor		\o2\().16b, \o2\().16b, v27.16b
			
 
				+	eor		\o3\().16b, \o3\().16b, v28.16b
			
 
				+
			
 
				+	st1		{\o0\().16b}, [x0], #16
			
 
				+	mov		v25.16b, v26.16b
			
 
				+	tbnz		x6, #1, 1f
			
 
				+	st1		{\o1\().16b}, [x0], #16
			
 
				+	mov		v25.16b, v27.16b
			
 
				+	tbnz		x6, #2, 1f
			
 
				+	st1		{\o2\().16b}, [x0], #16
			
 
				+	mov		v25.16b, v28.16b
			
 
				+	tbnz		x6, #3, 1f
			
 
				+	st1		{\o3\().16b}, [x0], #16
			
 
				+	mov		v25.16b, v29.16b
			
 
				+	tbnz		x6, #4, 1f
			
 
				+
			
 
				+	eor		\o4\().16b, \o4\().16b, v16.16b
			
 
				+	eor		\o5\().16b, \o5\().16b, v17.16b
			
 
				+	eor		\o6\().16b, \o6\().16b, v18.16b
			
 
				+	eor		\o7\().16b, \o7\().16b, v19.16b
			
 
				+
			
 
				+	st1		{\o4\().16b}, [x0], #16
			
 
				+	tbnz		x6, #5, 1f
			
 
				+	st1		{\o5\().16b}, [x0], #16
			
 
				+	tbnz		x6, #6, 1f
			
 
				+	st1		{\o6\().16b}, [x0], #16
			
 
				+	tbnz		x6, #7, 1f
			
 
				+	st1		{\o7\().16b}, [x0], #16
			
 
				+
			
 
				+	cbnz		x4, 99b
			
 
				+
			
 
				+1:	st1		{v25.16b}, [x5]
			
 
				+	ldp		x29, x30, [sp], #80
			
 
				+	ret
			
 
				+	.endm
			
 
				+
			
 
				+ENTRY(aesbs_xts_encrypt)
			
 
				+	__xts_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
			
 
				+ENDPROC(aesbs_xts_encrypt)
			
 
				+
			
 
				+ENTRY(aesbs_xts_decrypt)
			
 
				+	__xts_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
			
 
				+ENDPROC(aesbs_xts_decrypt)
			
 
				+
			
 
				+	.macro		next_ctr, v
			
 
				+	mov		\v\().d[1], x8
			
 
				+	adds		x8, x8, #1
			
 
				+	mov		\v\().d[0], x7
			
 
				+	adc		x7, x7, xzr
			
 
				+	rev64		\v\().16b, \v\().16b
			
 
				+	.endm
			
 
				+
			
 
				+	/*
			
 
				+	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
			
 
				+	 *		     int rounds, int blocks, u8 iv[], bool final)
			
 
				+	 */
			
 
				+ENTRY(aesbs_ctr_encrypt)
			
 
				+	stp		x29, x30, [sp, #-16]!
			
 
				+	mov		x29, sp
			
 
				+
			
 
				+	add		x4, x4, x6		// do one extra block if final
			
 
				+
			
 
				+	ldp		x7, x8, [x5]
			
 
				+	ld1		{v0.16b}, [x5]
			
 
				+CPU_LE(	rev		x7, x7		)
			
 
				+CPU_LE(	rev		x8, x8		)
			
 
				+	adds		x8, x8, #1
			
 
				+	adc		x7, x7, xzr
			
 
				+
			
 
				+99:	mov		x9, #1
			
 
				+	lsl		x9, x9, x4
			
 
				+	subs		w4, w4, #8
			
 
				+	csel		x4, x4, xzr, pl
			
 
				+	csel		x9, x9, xzr, le
			
 
				+
			
 
				+	next_ctr	v1
			
 
				+	next_ctr	v2
			
 
				+	next_ctr	v3
			
 
				+	next_ctr	v4
			
 
				+	next_ctr	v5
			
 
				+	next_ctr	v6
			
 
				+	next_ctr	v7
			
 
				+
			
 
				+0:	mov		bskey, x2
			
 
				+	mov		rounds, x3
			
 
				+	bl		aesbs_encrypt8
			
 
				+
			
 
				+	lsr		x9, x9, x6		// disregard the extra block
			
 
				+	tbnz		x9, #0, 0f
			
 
				+
			
 
				+	ld1		{v8.16b}, [x1], #16
			
 
				+	eor		v0.16b, v0.16b, v8.16b
			
 
				+	st1		{v0.16b}, [x0], #16
			
 
				+	tbnz		x9, #1, 1f
			
 
				+
			
 
				+	ld1		{v9.16b}, [x1], #16
			
 
				+	eor		v1.16b, v1.16b, v9.16b
			
 
				+	st1		{v1.16b}, [x0], #16
			
 
				+	tbnz		x9, #2, 2f
			
 
				+
			
 
				+	ld1		{v10.16b}, [x1], #16
			
 
				+	eor		v4.16b, v4.16b, v10.16b
			
 
				+	st1		{v4.16b}, [x0], #16
			
 
				+	tbnz		x9, #3, 3f
			
 
				+
			
 
				+	ld1		{v11.16b}, [x1], #16
			
 
				+	eor		v6.16b, v6.16b, v11.16b
			
 
				+	st1		{v6.16b}, [x0], #16
			
 
				+	tbnz		x9, #4, 4f
			
 
				+
			
 
				+	ld1		{v12.16b}, [x1], #16
			
 
				+	eor		v3.16b, v3.16b, v12.16b
			
 
				+	st1		{v3.16b}, [x0], #16
			
 
				+	tbnz		x9, #5, 5f
			
 
				+
			
 
				+	ld1		{v13.16b}, [x1], #16
			
 
				+	eor		v7.16b, v7.16b, v13.16b
			
 
				+	st1		{v7.16b}, [x0], #16
			
 
				+	tbnz		x9, #6, 6f
			
 
				+
			
 
				+	ld1		{v14.16b}, [x1], #16
			
 
				+	eor		v2.16b, v2.16b, v14.16b
			
 
				+	st1		{v2.16b}, [x0], #16
			
 
				+	tbnz		x9, #7, 7f
			
 
				+
			
 
				+	ld1		{v15.16b}, [x1], #16
			
 
				+	eor		v5.16b, v5.16b, v15.16b
			
 
				+	st1		{v5.16b}, [x0], #16
			
 
				+
			
 
				+	next_ctr	v0
			
 
				+	cbnz		x4, 99b
			
 
				+
			
 
				+0:	st1		{v0.16b}, [x5]
			
 
				+8:	ldp		x29, x30, [sp], #16
			
 
				+	ret
			
 
				+
			
 
				+	/*
			
 
				+	 * If we are handling the tail of the input (x6 == 1), return the
			
 
				+	 * final keystream block back to the caller via the IV buffer.
			
 
				+	 */
			
 
				+1:	cbz		x6, 8b
			
 
				+	st1		{v1.16b}, [x5]
			
 
				+	b		8b
			
 
				+2:	cbz		x6, 8b
			
 
				+	st1		{v4.16b}, [x5]
			
 
				+	b		8b
			
 
				+3:	cbz		x6, 8b
			
 
				+	st1		{v6.16b}, [x5]
			
 
				+	b		8b
			
 
				+4:	cbz		x6, 8b
			
 
				+	st1		{v3.16b}, [x5]
			
 
				+	b		8b
			
 
				+5:	cbz		x6, 8b
			
 
				+	st1		{v7.16b}, [x5]
			
 
				+	b		8b
			
 
				+6:	cbz		x6, 8b
			
 
				+	st1		{v2.16b}, [x5]
			
 
				+	b		8b
			
 
				+7:	cbz		x6, 8b
			
 
				+	st1		{v5.16b}, [x5]
			
 
				+	b		8b
			
 
				+ENDPROC(aesbs_ctr_encrypt)
			
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -0,0 +1,420 @@
 
				+/*
			
 
				+ * Bit sliced AES using NEON instructions
			
 
				+ *
			
 
				+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2 as
			
 
				+ * published by the Free Software Foundation.
			
 
				+ */
			
 
				+
			
 
				+#include <asm/neon.h>
			
 
				+#include <crypto/aes.h>
			
 
				+#include <crypto/cbc.h>
			
 
				+#include <crypto/internal/simd.h>
			
 
				+#include <crypto/internal/skcipher.h>
			
 
				+#include <crypto/xts.h>
			
 
				+#include <linux/module.h>
			
 
				+
			
 
				+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
			
 
				+MODULE_LICENSE("GPL v2");
			
 
				+
			
 
				+MODULE_ALIAS_CRYPTO("ecb(aes)");
			
 
				+MODULE_ALIAS_CRYPTO("cbc(aes)");
			
 
				+MODULE_ALIAS_CRYPTO("ctr(aes)");
			
 
				+MODULE_ALIAS_CRYPTO("xts(aes)");
			
 
				+
			
 
				+asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds);
			
 
				+
			
 
				+asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
			
 
				+				  int rounds, int blocks);
			
 
				+asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
			
 
				+				  int rounds, int blocks);
			
 
				+
			
 
				+asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
			
 
				+				  int rounds, int blocks, u8 iv[]);
			
 
				+
			
 
				+asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
			
 
				+				  int rounds, int blocks, u8 iv[], bool final);
			
 
				+
			
 
				+asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
			
 
				+				  int rounds, int blocks, u8 iv[]);
			
 
				+asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
			
 
				+				  int rounds, int blocks, u8 iv[]);
			
 
				+
			
 
				+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
			
 
				+
			
 
				+struct aesbs_ctx {
			
 
				+	u8	rk[13 * (8 * AES_BLOCK_SIZE) + 32];
			
 
				+	int	rounds;
			
 
				+} __aligned(AES_BLOCK_SIZE);
			
 
				+
			
 
				+struct aesbs_cbc_ctx {
			
 
				+	struct aesbs_ctx	key;
			
 
				+	u32			enc[AES_MAX_KEYLENGTH_U32];
			
 
				+};
			
 
				+
			
 
				+struct aesbs_xts_ctx {
			
 
				+	struct aesbs_ctx	key;
			
 
				+	u32			twkey[AES_MAX_KEYLENGTH_U32];
			
 
				+};
			
 
				+
			
 
				+static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
			
 
				+			unsigned int key_len)
			
 
				+{
			
 
				+	struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct crypto_aes_ctx rk;
			
 
				+	int err;
			
 
				+
			
 
				+	err = crypto_aes_expand_key(&rk, in_key, key_len);
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	ctx->rounds = 6 + key_len / 4;
			
 
				+
			
 
				+	kernel_neon_begin();
			
 
				+	aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
			
 
				+	kernel_neon_end();
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int __ecb_crypt(struct skcipher_request *req,
			
 
				+		       void (*fn)(u8 out[], u8 const in[], u8 const rk[],
			
 
				+				  int rounds, int blocks))
			
 
				+{
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_walk walk;
			
 
				+	int err;
			
 
				+
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				+
			
 
				+	kernel_neon_begin();
			
 
				+	while (walk.nbytes >= AES_BLOCK_SIZE) {
			
 
				+		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
			
 
				+
			
 
				+		if (walk.nbytes < walk.total)
			
 
				+			blocks = round_down(blocks,
			
 
				+					    walk.stride / AES_BLOCK_SIZE);
			
 
				+
			
 
				+		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
			
 
				+		   ctx->rounds, blocks);
			
 
				+		err = skcipher_walk_done(&walk,
			
 
				+					 walk.nbytes - blocks * AES_BLOCK_SIZE);
			
 
				+	}
			
 
				+	kernel_neon_end();
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static int ecb_encrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	return __ecb_crypt(req, aesbs_ecb_encrypt);
			
 
				+}
			
 
				+
			
 
				+static int ecb_decrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	return __ecb_crypt(req, aesbs_ecb_decrypt);
			
 
				+}
			
 
				+
			
 
				+static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
			
 
				+			    unsigned int key_len)
			
 
				+{
			
 
				+	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct crypto_aes_ctx rk;
			
 
				+	int err;
			
 
				+
			
 
				+	err = crypto_aes_expand_key(&rk, in_key, key_len);
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	ctx->key.rounds = 6 + key_len / 4;
			
 
				+
			
 
				+	memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
			
 
				+
			
 
				+	kernel_neon_begin();
			
 
				+	aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
			
 
				+	kernel_neon_end();
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void cbc_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
			
 
				+{
			
 
				+	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+
			
 
				+	__aes_arm64_encrypt(ctx->enc, dst, src, ctx->key.rounds);
			
 
				+}
			
 
				+
			
 
				+static int cbc_encrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	return crypto_cbc_encrypt_walk(req, cbc_encrypt_one);
			
 
				+}
			
 
				+
			
 
				+static int cbc_decrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_walk walk;
			
 
				+	int err;
			
 
				+
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				+
			
 
				+	kernel_neon_begin();
			
 
				+	while (walk.nbytes >= AES_BLOCK_SIZE) {
			
 
				+		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
			
 
				+
			
 
				+		if (walk.nbytes < walk.total)
			
 
				+			blocks = round_down(blocks,
			
 
				+					    walk.stride / AES_BLOCK_SIZE);
			
 
				+
			
 
				+		aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				+				  ctx->key.rk, ctx->key.rounds, blocks,
			
 
				+				  walk.iv);
			
 
				+		err = skcipher_walk_done(&walk,
			
 
				+					 walk.nbytes - blocks * AES_BLOCK_SIZE);
			
 
				+	}
			
 
				+	kernel_neon_end();
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static int ctr_encrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_walk walk;
			
 
				+	int err;
			
 
				+
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				+
			
 
				+	kernel_neon_begin();
			
 
				+	while (walk.nbytes > 0) {
			
 
				+		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
			
 
				+		bool final = (walk.total % AES_BLOCK_SIZE) != 0;
			
 
				+
			
 
				+		if (walk.nbytes < walk.total) {
			
 
				+			blocks = round_down(blocks,
			
 
				+					    walk.stride / AES_BLOCK_SIZE);
			
 
				+			final = false;
			
 
				+		}
			
 
				+
			
 
				+		aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
			
 
				+				  ctx->rk, ctx->rounds, blocks, walk.iv, final);
			
 
				+
			
 
				+		if (final) {
			
 
				+			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
			
 
				+			u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
			
 
				+
			
 
				+			if (dst != src)
			
 
				+				memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
			
 
				+			crypto_xor(dst, walk.iv, walk.total % AES_BLOCK_SIZE);
			
 
				+
			
 
				+			err = skcipher_walk_done(&walk, 0);
			
 
				+			break;
			
 
				+		}
			
 
				+		err = skcipher_walk_done(&walk,
			
 
				+					 walk.nbytes - blocks * AES_BLOCK_SIZE);
			
 
				+	}
			
 
				+	kernel_neon_end();
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
			
 
				+			    unsigned int key_len)
			
 
				+{
			
 
				+	struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct crypto_aes_ctx rk;
			
 
				+	int err;
			
 
				+
			
 
				+	err = xts_verify_key(tfm, in_key, key_len);
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	key_len /= 2;
			
 
				+	err = crypto_aes_expand_key(&rk, in_key + key_len, key_len);
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey));
			
 
				+
			
 
				+	return aesbs_setkey(tfm, in_key, key_len);
			
 
				+}
			
 
				+
			
 
				+static int __xts_crypt(struct skcipher_request *req,
			
 
				+		       void (*fn)(u8 out[], u8 const in[], u8 const rk[],
			
 
				+				  int rounds, int blocks, u8 iv[]))
			
 
				+{
			
 
				+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
			
 
				+	struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
			
 
				+	struct skcipher_walk walk;
			
 
				+	int err;
			
 
				+
			
 
				+	err = skcipher_walk_virt(&walk, req, true);
			
 
				+
			
 
				+	__aes_arm64_encrypt(ctx->twkey, walk.iv, walk.iv, ctx->key.rounds);
			
 
				+
			
 
				+	kernel_neon_begin();
			
 
				+	while (walk.nbytes >= AES_BLOCK_SIZE) {
			
 
				+		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
			
 
				+
			
 
				+		if (walk.nbytes < walk.total)
			
 
				+			blocks = round_down(blocks,
			
 
				+					    walk.stride / AES_BLOCK_SIZE);
			
 
				+
			
 
				+		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
			
 
				+		   ctx->key.rounds, blocks, walk.iv);
			
 
				+		err = skcipher_walk_done(&walk,
			
 
				+					 walk.nbytes - blocks * AES_BLOCK_SIZE);
			
 
				+	}
			
 
				+	kernel_neon_end();
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+static int xts_encrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	return __xts_crypt(req, aesbs_xts_encrypt);
			
 
				+}
			
 
				+
			
 
				+static int xts_decrypt(struct skcipher_request *req)
			
 
				+{
			
 
				+	return __xts_crypt(req, aesbs_xts_decrypt);
			
 
				+}
			
 
				+
			
 
				+static struct skcipher_alg aes_algs[] = { {
			
 
				+	.base.cra_name		= "__ecb(aes)",
			
 
				+	.base.cra_driver_name	= "__ecb-aes-neonbs",
			
 
				+	.base.cra_priority	= 250,
			
 
				+	.base.cra_blocksize	= AES_BLOCK_SIZE,
			
 
				+	.base.cra_ctxsize	= sizeof(struct aesbs_ctx),
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+	.base.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+
			
 
				+	.min_keysize		= AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize		= AES_MAX_KEY_SIZE,
			
 
				+	.walksize		= 8 * AES_BLOCK_SIZE,
			
 
				+	.setkey			= aesbs_setkey,
			
 
				+	.encrypt		= ecb_encrypt,
			
 
				+	.decrypt		= ecb_decrypt,
			
 
				+}, {
			
 
				+	.base.cra_name		= "__cbc(aes)",
			
 
				+	.base.cra_driver_name	= "__cbc-aes-neonbs",
			
 
				+	.base.cra_priority	= 250,
			
 
				+	.base.cra_blocksize	= AES_BLOCK_SIZE,
			
 
				+	.base.cra_ctxsize	= sizeof(struct aesbs_cbc_ctx),
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+	.base.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+
			
 
				+	.min_keysize		= AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize		= AES_MAX_KEY_SIZE,
			
 
				+	.walksize		= 8 * AES_BLOCK_SIZE,
			
 
				+	.ivsize			= AES_BLOCK_SIZE,
			
 
				+	.setkey			= aesbs_cbc_setkey,
			
 
				+	.encrypt		= cbc_encrypt,
			
 
				+	.decrypt		= cbc_decrypt,
			
 
				+}, {
			
 
				+	.base.cra_name		= "__ctr(aes)",
			
 
				+	.base.cra_driver_name	= "__ctr-aes-neonbs",
			
 
				+	.base.cra_priority	= 250,
			
 
				+	.base.cra_blocksize	= 1,
			
 
				+	.base.cra_ctxsize	= sizeof(struct aesbs_ctx),
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+	.base.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+
			
 
				+	.min_keysize		= AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize		= AES_MAX_KEY_SIZE,
			
 
				+	.chunksize		= AES_BLOCK_SIZE,
			
 
				+	.walksize		= 8 * AES_BLOCK_SIZE,
			
 
				+	.ivsize			= AES_BLOCK_SIZE,
			
 
				+	.setkey			= aesbs_setkey,
			
 
				+	.encrypt		= ctr_encrypt,
			
 
				+	.decrypt		= ctr_encrypt,
			
 
				+}, {
			
 
				+	.base.cra_name		= "ctr(aes)",
			
 
				+	.base.cra_driver_name	= "ctr-aes-neonbs",
			
 
				+	.base.cra_priority	= 250 - 1,
			
 
				+	.base.cra_blocksize	= 1,
			
 
				+	.base.cra_ctxsize	= sizeof(struct aesbs_ctx),
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+
			
 
				+	.min_keysize		= AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize		= AES_MAX_KEY_SIZE,
			
 
				+	.chunksize		= AES_BLOCK_SIZE,
			
 
				+	.walksize		= 8 * AES_BLOCK_SIZE,
			
 
				+	.ivsize			= AES_BLOCK_SIZE,
			
 
				+	.setkey			= aesbs_setkey,
			
 
				+	.encrypt		= ctr_encrypt,
			
 
				+	.decrypt		= ctr_encrypt,
			
 
				+}, {
			
 
				+	.base.cra_name		= "__xts(aes)",
			
 
				+	.base.cra_driver_name	= "__xts-aes-neonbs",
			
 
				+	.base.cra_priority	= 250,
			
 
				+	.base.cra_blocksize	= AES_BLOCK_SIZE,
			
 
				+	.base.cra_ctxsize	= sizeof(struct aesbs_xts_ctx),
			
 
				+	.base.cra_module	= THIS_MODULE,
			
 
				+	.base.cra_flags		= CRYPTO_ALG_INTERNAL,
			
 
				+
			
 
				+	.min_keysize		= 2 * AES_MIN_KEY_SIZE,
			
 
				+	.max_keysize		= 2 * AES_MAX_KEY_SIZE,
			
 
				+	.walksize		= 8 * AES_BLOCK_SIZE,
			
 
				+	.ivsize			= AES_BLOCK_SIZE,
			
 
				+	.setkey			= aesbs_xts_setkey,
			
 
				+	.encrypt		= xts_encrypt,
			
 
				+	.decrypt		= xts_decrypt,
			
 
				+} };
			
 
				+
			
 
				+static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
			
 
				+
			
 
				+static void aes_exit(void)
			
 
				+{
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
			
 
				+		if (aes_simd_algs[i])
			
 
				+			simd_skcipher_free(aes_simd_algs[i]);
			
 
				+
			
 
				+	crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
			
 
				+}
			
 
				+
			
 
				+static int __init aes_init(void)
			
 
				+{
			
 
				+	struct simd_skcipher_alg *simd;
			
 
				+	const char *basename;
			
 
				+	const char *algname;
			
 
				+	const char *drvname;
			
 
				+	int err;
			
 
				+	int i;
			
 
				+
			
 
				+	if (!(elf_hwcap & HWCAP_ASIMD))
			
 
				+		return -ENODEV;
			
 
				+
			
 
				+	err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+
			
 
				+	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
			
 
				+		if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
			
 
				+			continue;
			
 
				+
			
 
				+		algname = aes_algs[i].base.cra_name + 2;
			
 
				+		drvname = aes_algs[i].base.cra_driver_name + 2;
			
 
				+		basename = aes_algs[i].base.cra_driver_name;
			
 
				+		simd = simd_skcipher_create_compat(algname, drvname, basename);
			
 
				+		err = PTR_ERR(simd);
			
 
				+		if (IS_ERR(simd))
			
 
				+			goto unregister_simds;
			
 
				+
			
 
				+		aes_simd_algs[i] = simd;
			
 
				+	}
			
 
				+	return 0;
			
 
				+
			
 
				+unregister_simds:
			
 
				+	aes_exit();
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+module_init(aes_init);
			
 
				+module_exit(aes_exit);