Przeglądaj źródła

Merge branch 'bpf-misc-updates'

Daniel Borkmann says:

====================
Misc BPF updates

This series contains a couple of misc updates to the BPF code, besides
others a new helper bpf_skb_load_bytes(), moving clearing of A/X to the
classic converter, etc. Please see individual patches for details.

Thanks!
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 10 lat temu
rodzic
commit
d73e5f4146

+ 0 - 6
arch/arm64/net/bpf_jit_comp.c

@@ -152,8 +152,6 @@ static void build_prologue(struct jit_ctx *ctx)
 	const u8 r8 = bpf2a64[BPF_REG_8];
 	const u8 r9 = bpf2a64[BPF_REG_9];
 	const u8 fp = bpf2a64[BPF_REG_FP];
-	const u8 ra = bpf2a64[BPF_REG_A];
-	const u8 rx = bpf2a64[BPF_REG_X];
 	const u8 tmp1 = bpf2a64[TMP_REG_1];
 	const u8 tmp2 = bpf2a64[TMP_REG_2];
 
@@ -200,10 +198,6 @@ static void build_prologue(struct jit_ctx *ctx)
 
 	/* Set up function call stack */
 	emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
-
-	/* Clear registers A and X */
-	emit_a64_mov_i64(ra, 0, ctx);
-	emit_a64_mov_i64(rx, 0, ctx);
 }
 
 static void build_epilogue(struct jit_ctx *ctx)

+ 2 - 11
arch/s390/net/bpf_jit_comp.c

@@ -408,7 +408,7 @@ static void emit_load_skb_data_hlen(struct bpf_jit *jit)
  * Save registers and create stack frame if necessary.
  * See stack frame layout desription in "bpf_jit.h"!
  */
-static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic)
+static void bpf_jit_prologue(struct bpf_jit *jit)
 {
 	if (jit->seen & SEEN_TAIL_CALL) {
 		/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
@@ -448,15 +448,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, bool is_classic)
 		/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
 		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15,
 			      STK_OFF_SKBP);
-	/* Clear A (%b0) and X (%b7) registers for converted BPF programs */
-	if (is_classic) {
-		if (REG_SEEN(BPF_REG_A))
-			/* lghi %ba,0 */
-			EMIT4_IMM(0xa7090000, BPF_REG_A, 0);
-		if (REG_SEEN(BPF_REG_X))
-			/* lghi %bx,0 */
-			EMIT4_IMM(0xa7090000, BPF_REG_X, 0);
-	}
 }
 
 /*
@@ -1245,7 +1236,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
 	jit->lit = jit->lit_start;
 	jit->prg = 0;
 
-	bpf_jit_prologue(jit, bpf_prog_was_classic(fp));
+	bpf_jit_prologue(jit);
 	for (i = 0; i < fp->len; i += insn_count) {
 		insn_count = bpf_jit_insn(jit, fp, i);
 		if (insn_count < 0)

+ 35 - 5
arch/x86/net/bpf_jit_comp.c

@@ -193,7 +193,7 @@ struct jit_context {
 	 32 /* space for rbx, r13, r14, r15 */ + \
 	 8 /* space for skb_copy_bits() buffer */)
 
-#define PROLOGUE_SIZE 51
+#define PROLOGUE_SIZE 48
 
 /* emit x64 prologue code for BPF program and check it's size.
  * bpf_tail_call helper will skip it while jumping into another program
@@ -229,11 +229,15 @@ static void emit_prologue(u8 **pprog)
 	/* mov qword ptr [rbp-X],r15 */
 	EMIT3_off32(0x4C, 0x89, 0xBD, -STACKSIZE + 24);
 
-	/* clear A and X registers */
-	EMIT2(0x31, 0xc0); /* xor eax, eax */
-	EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */
+	/* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
+	 * we need to reset the counter to 0. It's done in two instructions,
+	 * resetting rax register to 0 (xor on eax gets 0 extended), and
+	 * moving it to the counter location.
+	 */
 
-	/* clear tail_cnt: mov qword ptr [rbp-X], rax */
+	/* xor eax, eax */
+	EMIT2(0x31, 0xc0);
+	/* mov qword ptr [rbp-X], rax */
 	EMIT3_off32(0x48, 0x89, 0x85, -STACKSIZE + 32);
 
 	BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
@@ -455,6 +459,18 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			}
 
 		case BPF_ALU | BPF_MOV | BPF_K:
+			/* optimization: if imm32 is zero, use 'xor <dst>,<dst>'
+			 * to save 3 bytes.
+			 */
+			if (imm32 == 0) {
+				if (is_ereg(dst_reg))
+					EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+				b2 = 0x31; /* xor */
+				b3 = 0xC0;
+				EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
+				break;
+			}
+
 			/* mov %eax, imm32 */
 			if (is_ereg(dst_reg))
 				EMIT1(add_1mod(0x40, dst_reg));
@@ -469,6 +485,20 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 				return -EINVAL;
 			}
 
+			/* optimization: if imm64 is zero, use 'xor <dst>,<dst>'
+			 * to save 7 bytes.
+			 */
+			if (insn[0].imm == 0 && insn[1].imm == 0) {
+				b1 = add_2mod(0x48, dst_reg, dst_reg);
+				b2 = 0x31; /* xor */
+				b3 = 0xC0;
+				EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg));
+
+				insn++;
+				i++;
+				break;
+			}
+
 			/* movabsq %rax, imm64 */
 			EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
 			EMIT(insn[0].imm, 4);

+ 1 - 0
include/uapi/linux/bpf.h

@@ -269,6 +269,7 @@ enum bpf_func_id {
 	 * Return: 0 on success
 	 */
 	BPF_FUNC_perf_event_output,
+	BPF_FUNC_skb_load_bytes,
 	__BPF_FUNC_MAX_ID,
 };
 

+ 0 - 4
kernel/bpf/core.c

@@ -306,10 +306,6 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 	FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
 	ARG1 = (u64) (unsigned long) ctx;
 
-	/* Registers used in classic BPF programs need to be reset first. */
-	regs[BPF_REG_A] = 0;
-	regs[BPF_REG_X] = 0;
-
 select_insn:
 	goto *jumptable[insn->code];
 

+ 120 - 0
lib/test_bpf.c

@@ -1685,6 +1685,126 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 0x35d97ef2 } }
 	},
+	{	/* Mainly checking JIT here. */
+		"MOV REG64",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, 0xffffffffffffffffLL),
+			BPF_MOV64_REG(R1, R0),
+			BPF_MOV64_REG(R2, R1),
+			BPF_MOV64_REG(R3, R2),
+			BPF_MOV64_REG(R4, R3),
+			BPF_MOV64_REG(R5, R4),
+			BPF_MOV64_REG(R6, R5),
+			BPF_MOV64_REG(R7, R6),
+			BPF_MOV64_REG(R8, R7),
+			BPF_MOV64_REG(R9, R8),
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),
+			BPF_ALU64_IMM(BPF_MOV, R1, 0),
+			BPF_ALU64_IMM(BPF_MOV, R2, 0),
+			BPF_ALU64_IMM(BPF_MOV, R3, 0),
+			BPF_ALU64_IMM(BPF_MOV, R4, 0),
+			BPF_ALU64_IMM(BPF_MOV, R5, 0),
+			BPF_ALU64_IMM(BPF_MOV, R6, 0),
+			BPF_ALU64_IMM(BPF_MOV, R7, 0),
+			BPF_ALU64_IMM(BPF_MOV, R8, 0),
+			BPF_ALU64_IMM(BPF_MOV, R9, 0),
+			BPF_ALU64_REG(BPF_ADD, R0, R0),
+			BPF_ALU64_REG(BPF_ADD, R0, R1),
+			BPF_ALU64_REG(BPF_ADD, R0, R2),
+			BPF_ALU64_REG(BPF_ADD, R0, R3),
+			BPF_ALU64_REG(BPF_ADD, R0, R4),
+			BPF_ALU64_REG(BPF_ADD, R0, R5),
+			BPF_ALU64_REG(BPF_ADD, R0, R6),
+			BPF_ALU64_REG(BPF_ADD, R0, R7),
+			BPF_ALU64_REG(BPF_ADD, R0, R8),
+			BPF_ALU64_REG(BPF_ADD, R0, R9),
+			BPF_ALU64_IMM(BPF_ADD, R0, 0xfefe),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0xfefe } }
+	},
+	{	/* Mainly checking JIT here. */
+		"MOV REG32",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, 0xffffffffffffffffLL),
+			BPF_MOV64_REG(R1, R0),
+			BPF_MOV64_REG(R2, R1),
+			BPF_MOV64_REG(R3, R2),
+			BPF_MOV64_REG(R4, R3),
+			BPF_MOV64_REG(R5, R4),
+			BPF_MOV64_REG(R6, R5),
+			BPF_MOV64_REG(R7, R6),
+			BPF_MOV64_REG(R8, R7),
+			BPF_MOV64_REG(R9, R8),
+			BPF_ALU32_IMM(BPF_MOV, R0, 0),
+			BPF_ALU32_IMM(BPF_MOV, R1, 0),
+			BPF_ALU32_IMM(BPF_MOV, R2, 0),
+			BPF_ALU32_IMM(BPF_MOV, R3, 0),
+			BPF_ALU32_IMM(BPF_MOV, R4, 0),
+			BPF_ALU32_IMM(BPF_MOV, R5, 0),
+			BPF_ALU32_IMM(BPF_MOV, R6, 0),
+			BPF_ALU32_IMM(BPF_MOV, R7, 0),
+			BPF_ALU32_IMM(BPF_MOV, R8, 0),
+			BPF_ALU32_IMM(BPF_MOV, R9, 0),
+			BPF_ALU64_REG(BPF_ADD, R0, R0),
+			BPF_ALU64_REG(BPF_ADD, R0, R1),
+			BPF_ALU64_REG(BPF_ADD, R0, R2),
+			BPF_ALU64_REG(BPF_ADD, R0, R3),
+			BPF_ALU64_REG(BPF_ADD, R0, R4),
+			BPF_ALU64_REG(BPF_ADD, R0, R5),
+			BPF_ALU64_REG(BPF_ADD, R0, R6),
+			BPF_ALU64_REG(BPF_ADD, R0, R7),
+			BPF_ALU64_REG(BPF_ADD, R0, R8),
+			BPF_ALU64_REG(BPF_ADD, R0, R9),
+			BPF_ALU64_IMM(BPF_ADD, R0, 0xfefe),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0xfefe } }
+	},
+	{	/* Mainly checking JIT here. */
+		"LD IMM64",
+		.u.insns_int = {
+			BPF_LD_IMM64(R0, 0xffffffffffffffffLL),
+			BPF_MOV64_REG(R1, R0),
+			BPF_MOV64_REG(R2, R1),
+			BPF_MOV64_REG(R3, R2),
+			BPF_MOV64_REG(R4, R3),
+			BPF_MOV64_REG(R5, R4),
+			BPF_MOV64_REG(R6, R5),
+			BPF_MOV64_REG(R7, R6),
+			BPF_MOV64_REG(R8, R7),
+			BPF_MOV64_REG(R9, R8),
+			BPF_LD_IMM64(R0, 0x0LL),
+			BPF_LD_IMM64(R1, 0x0LL),
+			BPF_LD_IMM64(R2, 0x0LL),
+			BPF_LD_IMM64(R3, 0x0LL),
+			BPF_LD_IMM64(R4, 0x0LL),
+			BPF_LD_IMM64(R5, 0x0LL),
+			BPF_LD_IMM64(R6, 0x0LL),
+			BPF_LD_IMM64(R7, 0x0LL),
+			BPF_LD_IMM64(R8, 0x0LL),
+			BPF_LD_IMM64(R9, 0x0LL),
+			BPF_ALU64_REG(BPF_ADD, R0, R0),
+			BPF_ALU64_REG(BPF_ADD, R0, R1),
+			BPF_ALU64_REG(BPF_ADD, R0, R2),
+			BPF_ALU64_REG(BPF_ADD, R0, R3),
+			BPF_ALU64_REG(BPF_ADD, R0, R4),
+			BPF_ALU64_REG(BPF_ADD, R0, R5),
+			BPF_ALU64_REG(BPF_ADD, R0, R6),
+			BPF_ALU64_REG(BPF_ADD, R0, R7),
+			BPF_ALU64_REG(BPF_ADD, R0, R8),
+			BPF_ALU64_REG(BPF_ADD, R0, R9),
+			BPF_ALU64_IMM(BPF_ADD, R0, 0xfefe),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 0xfefe } }
+	},
 	{
 		"INT: ALU MIX",
 		.u.insns_int = {

+ 50 - 10
net/core/filter.c

@@ -348,12 +348,6 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
  *    jump offsets, 2nd pass remapping:
  *   new_prog = kmalloc(sizeof(struct bpf_insn) * new_len);
  *   bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
- *
- * User BPF's register A is mapped to our BPF register 6, user BPF
- * register X is mapped to BPF register 7; frame pointer is always
- * register 10; Context 'void *ctx' is stored in register 1, that is,
- * for socket filters: ctx == 'struct sk_buff *', for seccomp:
- * ctx == 'struct seccomp_data *'.
  */
 static int bpf_convert_filter(struct sock_filter *prog, int len,
 			      struct bpf_insn *new_prog, int *new_len)
@@ -381,9 +375,22 @@ do_pass:
 	new_insn = new_prog;
 	fp = prog;
 
-	if (new_insn)
-		*new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
-	new_insn++;
+	/* Classic BPF related prologue emission. */
+	if (new_insn) {
+		/* Classic BPF expects A and X to be reset first. These need
+		 * to be guaranteed to be the first two instructions.
+		 */
+		*new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+		*new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
+
+		/* All programs must keep CTX in callee saved BPF_REG_CTX.
+		 * In eBPF case it's done by the compiler, here we need to
+		 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
+		 */
+		*new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
+	} else {
+		new_insn += 3;
+	}
 
 	for (i = 0; i < len; fp++, i++) {
 		struct bpf_insn tmp_insns[6] = { };
@@ -1245,6 +1252,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
 }
 
 #define BPF_RECOMPUTE_CSUM(flags)	((flags) & 1)
+#define BPF_LDST_LEN			16U
 
 static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 {
@@ -1252,7 +1260,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 	int offset = (int) r2;
 	void *from = (void *) (long) r3;
 	unsigned int len = (unsigned int) r4;
-	char buf[16];
+	char buf[BPF_LDST_LEN];
 	void *ptr;
 
 	/* bpf verifier guarantees that:
@@ -1299,6 +1307,36 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
+static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1;
+	int offset = (int) r2;
+	void *to = (void *)(unsigned long) r3;
+	unsigned int len = (unsigned int) r4;
+	void *ptr;
+
+	if (unlikely((u32) offset > 0xffff || len > BPF_LDST_LEN))
+		return -EFAULT;
+
+	ptr = skb_header_pointer(skb, offset, len, to);
+	if (unlikely(!ptr))
+		return -EFAULT;
+	if (ptr != to)
+		memcpy(to, ptr, len);
+
+	return 0;
+}
+
+const struct bpf_func_proto bpf_skb_load_bytes_proto = {
+	.func		= bpf_skb_load_bytes,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_STACK,
+	.arg4_type	= ARG_CONST_STACK_SIZE,
+};
+
 #define BPF_HEADER_FIELD_SIZE(flags)	((flags) & 0x0f)
 #define BPF_IS_PSEUDO_HEADER(flags)	((flags) & 0x10)
 
@@ -1654,6 +1692,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 	switch (func_id) {
 	case BPF_FUNC_skb_store_bytes:
 		return &bpf_skb_store_bytes_proto;
+	case BPF_FUNC_skb_load_bytes:
+		return &bpf_skb_load_bytes_proto;
 	case BPF_FUNC_l3_csum_replace:
 		return &bpf_l3_csum_replace_proto;
 	case BPF_FUNC_l4_csum_replace: