Browse Source

Merge branch 'bpf-jit-cleanups'

Daniel Borkmann says:

====================
This series follows up mostly with with some minor cleanups on top
of 'Move ld_abs/ld_ind to native BPF' as well as implements better
32/64 bit immediate load into register and saves tail call init on
cBPF for the arm64 JIT. Last but not least we add a couple of test
cases. For details please see individual patches. Thanks!

v1 -> v2:
  - Minor fix in i64_i16_blocks() to remove 24 shift.
  - Added last two patches.
  - Added Acks from prior round.
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Alexei Starovoitov 7 years ago
parent
commit
fb40c9ddd6

+ 3 - 10
arch/arm/net/bpf_jit_32.c

@@ -234,18 +234,11 @@ static void jit_fill_hole(void *area, unsigned int size)
 #define SCRATCH_SIZE 80
 
 /* total stack size used in JITed code */
-#define _STACK_SIZE \
-	(ctx->prog->aux->stack_depth + \
-	 + SCRATCH_SIZE + \
-	 + 4 /* extra for skb_copy_bits buffer */)
-
-#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
+#define _STACK_SIZE	(ctx->prog->aux->stack_depth + SCRATCH_SIZE)
+#define STACK_SIZE	ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
 
 /* Get the offset of eBPF REGISTERs stored on scratch space. */
-#define STACK_VAR(off) (STACK_SIZE-off-4)
-
-/* Offset of skb_copy_bits buffer */
-#define SKB_BUFFER STACK_VAR(SCRATCH_SIZE)
+#define STACK_VAR(off) (STACK_SIZE - off)
 
 #if __LINUX_ARM_ARCH__ < 7
 

+ 69 - 46
arch/arm64/net/bpf_jit_comp.c

@@ -21,7 +21,6 @@
 #include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/printk.h>
-#include <linux/skbuff.h>
 #include <linux/slab.h>
 
 #include <asm/byteorder.h>
@@ -80,23 +79,66 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx)
 	ctx->idx++;
 }
 
+static inline void emit_a64_mov_i(const int is64, const int reg,
+				  const s32 val, struct jit_ctx *ctx)
+{
+	u16 hi = val >> 16;
+	u16 lo = val & 0xffff;
+
+	if (hi & 0x8000) {
+		if (hi == 0xffff) {
+			emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
+		} else {
+			emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
+			if (lo != 0xffff)
+				emit(A64_MOVK(is64, reg, lo, 0), ctx);
+		}
+	} else {
+		emit(A64_MOVZ(is64, reg, lo, 0), ctx);
+		if (hi)
+			emit(A64_MOVK(is64, reg, hi, 16), ctx);
+	}
+}
+
+static int i64_i16_blocks(const u64 val, bool inverse)
+{
+	return (((val >>  0) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
+	       (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
+	       (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
+	       (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000));
+}
+
 static inline void emit_a64_mov_i64(const int reg, const u64 val,
 				    struct jit_ctx *ctx)
 {
-	u64 tmp = val;
-	int shift = 0;
-
-	emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
-	tmp >>= 16;
-	shift += 16;
-	while (tmp) {
-		if (tmp & 0xffff)
-			emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
-		tmp >>= 16;
-		shift += 16;
+	u64 nrm_tmp = val, rev_tmp = ~val;
+	bool inverse;
+	int shift;
+
+	if (!(nrm_tmp >> 32))
+		return emit_a64_mov_i(0, reg, (u32)val, ctx);
+
+	inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false);
+	shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) :
+					  (fls64(nrm_tmp) - 1)), 16), 0);
+	if (inverse)
+		emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx);
+	else
+		emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
+	shift -= 16;
+	while (shift >= 0) {
+		if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000))
+			emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
+		shift -= 16;
 	}
 }
 
+/*
+ * This is an unoptimized 64 immediate emission used for BPF to BPF call
+ * addresses. It will always do a full 64 bit decomposition as otherwise
+ * more complexity in the last extra pass is required since we previously
+ * reserved 4 instructions for the address.
+ */
 static inline void emit_addr_mov_i64(const int reg, const u64 val,
 				     struct jit_ctx *ctx)
 {
@@ -111,26 +153,6 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val,
 	}
 }
 
-static inline void emit_a64_mov_i(const int is64, const int reg,
-				  const s32 val, struct jit_ctx *ctx)
-{
-	u16 hi = val >> 16;
-	u16 lo = val & 0xffff;
-
-	if (hi & 0x8000) {
-		if (hi == 0xffff) {
-			emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
-		} else {
-			emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
-			emit(A64_MOVK(is64, reg, lo, 0), ctx);
-		}
-	} else {
-		emit(A64_MOVZ(is64, reg, lo, 0), ctx);
-		if (hi)
-			emit(A64_MOVK(is64, reg, hi, 16), ctx);
-	}
-}
-
 static inline int bpf2a64_offset(int bpf_to, int bpf_from,
 				 const struct jit_ctx *ctx)
 {
@@ -163,7 +185,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
 /* Tail call offset to jump into */
 #define PROLOGUE_OFFSET 7
 
-static int build_prologue(struct jit_ctx *ctx)
+static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 {
 	const struct bpf_prog *prog = ctx->prog;
 	const u8 r6 = bpf2a64[BPF_REG_6];
@@ -188,7 +210,7 @@ static int build_prologue(struct jit_ctx *ctx)
 	 *                        | ... | BPF prog stack
 	 *                        |     |
 	 *                        +-----+ <= (BPF_FP - prog->aux->stack_depth)
-	 *                        |RSVD | JIT scratchpad
+	 *                        |RSVD | padding
 	 * current A64_SP =>      +-----+ <= (BPF_FP - ctx->stack_size)
 	 *                        |     |
 	 *                        | ... | Function call stack
@@ -210,19 +232,19 @@ static int build_prologue(struct jit_ctx *ctx)
 	/* Set up BPF prog stack base register */
 	emit(A64_MOV(1, fp, A64_SP), ctx);
 
-	/* Initialize tail_call_cnt */
-	emit(A64_MOVZ(1, tcc, 0, 0), ctx);
+	if (!ebpf_from_cbpf) {
+		/* Initialize tail_call_cnt */
+		emit(A64_MOVZ(1, tcc, 0, 0), ctx);
 
-	cur_offset = ctx->idx - idx0;
-	if (cur_offset != PROLOGUE_OFFSET) {
-		pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
-			    cur_offset, PROLOGUE_OFFSET);
-		return -1;
+		cur_offset = ctx->idx - idx0;
+		if (cur_offset != PROLOGUE_OFFSET) {
+			pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
+				    cur_offset, PROLOGUE_OFFSET);
+			return -1;
+		}
 	}
 
-	/* 4 byte extra for skb_copy_bits buffer */
-	ctx->stack_size = prog->aux->stack_depth + 4;
-	ctx->stack_size = STACK_ALIGN(ctx->stack_size);
+	ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth);
 
 	/* Set up function call stack */
 	emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
@@ -786,6 +808,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	struct bpf_prog *tmp, *orig_prog = prog;
 	struct bpf_binary_header *header;
 	struct arm64_jit_data *jit_data;
+	bool was_classic = bpf_prog_was_classic(prog);
 	bool tmp_blinded = false;
 	bool extra_pass = false;
 	struct jit_ctx ctx;
@@ -840,7 +863,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		goto out_off;
 	}
 
-	if (build_prologue(&ctx)) {
+	if (build_prologue(&ctx, was_classic)) {
 		prog = orig_prog;
 		goto out_off;
 	}
@@ -863,7 +886,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 skip_init_ctx:
 	ctx.idx = 0;
 
-	build_prologue(&ctx);
+	build_prologue(&ctx, was_classic);
 
 	if (build_body(&ctx)) {
 		bpf_jit_binary_free(header);

+ 0 - 26
arch/mips/net/ebpf_jit.c

@@ -95,7 +95,6 @@ enum reg_val_type {
  * struct jit_ctx - JIT context
  * @skf:		The sk_filter
  * @stack_size:		eBPF stack size
- * @tmp_offset:		eBPF $sp offset to 8-byte temporary memory
  * @idx:		Instruction index
  * @flags:		JIT flags
  * @offsets:		Instruction offsets
@@ -105,7 +104,6 @@ enum reg_val_type {
 struct jit_ctx {
 	const struct bpf_prog *skf;
 	int stack_size;
-	int tmp_offset;
 	u32 idx;
 	u32 flags;
 	u32 *offsets;
@@ -293,7 +291,6 @@ static int gen_int_prologue(struct jit_ctx *ctx)
 	locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0;
 
 	stack_adjust += locals_size;
-	ctx->tmp_offset = locals_size;
 
 	ctx->stack_size = stack_adjust;
 
@@ -399,7 +396,6 @@ static void gen_imm_to_reg(const struct bpf_insn *insn, int reg,
 		emit_instr(ctx, lui, reg, upper >> 16);
 		emit_instr(ctx, addiu, reg, reg, lower);
 	}
-
 }
 
 static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
@@ -547,28 +543,6 @@ static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	return 0;
 }
 
-static void * __must_check
-ool_skb_header_pointer(const struct sk_buff *skb, int offset,
-		       int len, void *buffer)
-{
-	return skb_header_pointer(skb, offset, len, buffer);
-}
-
-static int size_to_len(const struct bpf_insn *insn)
-{
-	switch (BPF_SIZE(insn->code)) {
-	case BPF_B:
-		return 1;
-	case BPF_H:
-		return 2;
-	case BPF_W:
-		return 4;
-	case BPF_DW:
-		return 8;
-	}
-	return 0;
-}
-
 static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value)
 {
 	if (value >= 0xffffffffffff8000ull || value < 0x8000ull) {

+ 0 - 1
arch/sparc/net/bpf_jit_comp_64.c

@@ -894,7 +894,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	const int i = insn - ctx->prog->insnsi;
 	const s16 off = insn->off;
 	const s32 imm = insn->imm;
-	u32 *func;
 
 	if (insn->src_reg == BPF_REG_FP)
 		ctx->saw_frame_pointer = true;

+ 14 - 15
arch/x86/include/asm/nospec-branch.h

@@ -301,9 +301,9 @@ do {									\
  *    jmp *%edx for x86_32
  */
 #ifdef CONFIG_RETPOLINE
-#ifdef CONFIG_X86_64
-# define RETPOLINE_RAX_BPF_JIT_SIZE	17
-# define RETPOLINE_RAX_BPF_JIT()				\
+# ifdef CONFIG_X86_64
+#  define RETPOLINE_RAX_BPF_JIT_SIZE	17
+#  define RETPOLINE_RAX_BPF_JIT()				\
 do {								\
 	EMIT1_off32(0xE8, 7);	 /* callq do_rop */		\
 	/* spec_trap: */					\
@@ -314,8 +314,8 @@ do {								\
 	EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */	\
 	EMIT1(0xC3);             /* retq */			\
 } while (0)
-#else
-# define RETPOLINE_EDX_BPF_JIT()				\
+# else /* !CONFIG_X86_64 */
+#  define RETPOLINE_EDX_BPF_JIT()				\
 do {								\
 	EMIT1_off32(0xE8, 7);	 /* call do_rop */		\
 	/* spec_trap: */					\
@@ -326,17 +326,16 @@ do {								\
 	EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */		\
 	EMIT1(0xC3);             /* ret */			\
 } while (0)
-#endif
+# endif
 #else /* !CONFIG_RETPOLINE */
-
-#ifdef CONFIG_X86_64
-# define RETPOLINE_RAX_BPF_JIT_SIZE	2
-# define RETPOLINE_RAX_BPF_JIT()				\
-	EMIT2(0xFF, 0xE0);	 /* jmp *%rax */
-#else
-# define RETPOLINE_EDX_BPF_JIT()				\
-	EMIT2(0xFF, 0xE2) /* jmp *%edx */
-#endif
+# ifdef CONFIG_X86_64
+#  define RETPOLINE_RAX_BPF_JIT_SIZE	2
+#  define RETPOLINE_RAX_BPF_JIT()				\
+	EMIT2(0xFF, 0xE0);       /* jmp *%rax */
+# else /* !CONFIG_X86_64 */
+#  define RETPOLINE_EDX_BPF_JIT()				\
+	EMIT2(0xFF, 0xE2)        /* jmp *%edx */
+# endif
 #endif
 
 #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */

+ 80 - 0
tools/testing/selftests/bpf/bpf_rand.h

@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_RAND__
+#define __BPF_RAND__
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <time.h>
+
+static inline uint64_t bpf_rand_mask(uint64_t mask)
+{
+	return (((uint64_t)(uint32_t)rand()) |
+	        ((uint64_t)(uint32_t)rand() << 32)) & mask;
+}
+
+#define bpf_rand_ux(x, m)			\
+static inline uint64_t bpf_rand_u##x(int shift)	\
+{						\
+	return bpf_rand_mask((m)) << shift;	\
+}
+
+bpf_rand_ux( 8,               0xffULL)
+bpf_rand_ux(16,             0xffffULL)
+bpf_rand_ux(24,           0xffffffULL)
+bpf_rand_ux(32,         0xffffffffULL)
+bpf_rand_ux(40,       0xffffffffffULL)
+bpf_rand_ux(48,     0xffffffffffffULL)
+bpf_rand_ux(56,   0xffffffffffffffULL)
+bpf_rand_ux(64, 0xffffffffffffffffULL)
+
+static inline void bpf_semi_rand_init(void)
+{
+	srand(time(NULL));
+}
+
+static inline uint64_t bpf_semi_rand_get(void)
+{
+	switch (rand() % 39) {
+	case  0: return 0x000000ff00000000ULL | bpf_rand_u8(0);
+	case  1: return 0xffffffff00000000ULL | bpf_rand_u16(0);
+	case  2: return 0x00000000ffff0000ULL | bpf_rand_u16(0);
+	case  3: return 0x8000000000000000ULL | bpf_rand_u32(0);
+	case  4: return 0x00000000f0000000ULL | bpf_rand_u32(0);
+	case  5: return 0x0000000100000000ULL | bpf_rand_u24(0);
+	case  6: return 0x800ff00000000000ULL | bpf_rand_u32(0);
+	case  7: return 0x7fffffff00000000ULL | bpf_rand_u32(0);
+	case  8: return 0xffffffffffffff00ULL ^ bpf_rand_u32(24);
+	case  9: return 0xffffffffffffff00ULL | bpf_rand_u8(0);
+	case 10: return 0x0000000010000000ULL | bpf_rand_u32(0);
+	case 11: return 0xf000000000000000ULL | bpf_rand_u8(0);
+	case 12: return 0x0000f00000000000ULL | bpf_rand_u8(8);
+	case 13: return 0x000000000f000000ULL | bpf_rand_u8(16);
+	case 14: return 0x0000000000000f00ULL | bpf_rand_u8(32);
+	case 15: return 0x00fff00000000f00ULL | bpf_rand_u8(48);
+	case 16: return 0x00007fffffffffffULL ^ bpf_rand_u32(1);
+	case 17: return 0xffff800000000000ULL | bpf_rand_u8(4);
+	case 18: return 0xffff800000000000ULL | bpf_rand_u8(20);
+	case 19: return (0xffffffc000000000ULL + 0x80000ULL) | bpf_rand_u32(0);
+	case 20: return (0xffffffc000000000ULL - 0x04000000ULL) | bpf_rand_u32(0);
+	case 21: return 0x0000000000000000ULL | bpf_rand_u8(55) | bpf_rand_u32(20);
+	case 22: return 0xffffffffffffffffULL ^ bpf_rand_u8(3) ^ bpf_rand_u32(40);
+	case 23: return 0x0000000000000000ULL | bpf_rand_u8(bpf_rand_u8(0) % 64);
+	case 24: return 0x0000000000000000ULL | bpf_rand_u16(bpf_rand_u8(0) % 64);
+	case 25: return 0xffffffffffffffffULL ^ bpf_rand_u8(bpf_rand_u8(0) % 64);
+	case 26: return 0xffffffffffffffffULL ^ bpf_rand_u40(bpf_rand_u8(0) % 64);
+	case 27: return 0x0000800000000000ULL;
+	case 28: return 0x8000000000000000ULL;
+	case 29: return 0x0000000000000000ULL;
+	case 30: return 0xffffffffffffffffULL;
+	case 31: return bpf_rand_u16(bpf_rand_u8(0) % 64);
+	case 32: return bpf_rand_u24(bpf_rand_u8(0) % 64);
+	case 33: return bpf_rand_u32(bpf_rand_u8(0) % 64);
+	case 34: return bpf_rand_u40(bpf_rand_u8(0) % 64);
+	case 35: return bpf_rand_u48(bpf_rand_u8(0) % 64);
+	case 36: return bpf_rand_u56(bpf_rand_u8(0) % 64);
+	case 37: return bpf_rand_u64(bpf_rand_u8(0) % 64);
+	default: return bpf_rand_u64(0);
+	}
+}
+
+#endif /* __BPF_RAND__ */

+ 62 - 0
tools/testing/selftests/bpf/test_verifier.c

@@ -41,6 +41,7 @@
 # endif
 #endif
 #include "bpf_rlimit.h"
+#include "bpf_rand.h"
 #include "../../../include/linux/filter.h"
 
 #ifndef ARRAY_SIZE
@@ -152,6 +153,30 @@ static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
 	insn[i] = BPF_EXIT_INSN();
 }
 
+static void bpf_fill_rand_ld_dw(struct bpf_test *self)
+{
+	struct bpf_insn *insn = self->insns;
+	uint64_t res = 0;
+	int i = 0;
+
+	insn[i++] = BPF_MOV32_IMM(BPF_REG_0, 0);
+	while (i < self->retval) {
+		uint64_t val = bpf_semi_rand_get();
+		struct bpf_insn tmp[2] = { BPF_LD_IMM64(BPF_REG_1, val) };
+
+		res ^= val;
+		insn[i++] = tmp[0];
+		insn[i++] = tmp[1];
+		insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
+	}
+	insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+	insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32);
+	insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
+	insn[i] = BPF_EXIT_INSN();
+	res ^= (res >> 32);
+	self->retval = (uint32_t)res;
+}
+
 static struct bpf_test tests[] = {
 	{
 		"add+sub+mul",
@@ -11974,6 +11999,42 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.retval = 10,
 	},
+	{
+		"ld_dw: xor semi-random 64 bit imms, test 1",
+		.insns = { },
+		.data = { },
+		.fill_helper = bpf_fill_rand_ld_dw,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 4090,
+	},
+	{
+		"ld_dw: xor semi-random 64 bit imms, test 2",
+		.insns = { },
+		.data = { },
+		.fill_helper = bpf_fill_rand_ld_dw,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 2047,
+	},
+	{
+		"ld_dw: xor semi-random 64 bit imms, test 3",
+		.insns = { },
+		.data = { },
+		.fill_helper = bpf_fill_rand_ld_dw,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 511,
+	},
+	{
+		"ld_dw: xor semi-random 64 bit imms, test 4",
+		.insns = { },
+		.data = { },
+		.fill_helper = bpf_fill_rand_ld_dw,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 5,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
@@ -12346,5 +12407,6 @@ int main(int argc, char **argv)
 		return EXIT_FAILURE;
 	}
 
+	bpf_semi_rand_init();
 	return do_test(unpriv, from, to);
 }