|
@@ -17,15 +17,6 @@
|
|
|
#include <asm/set_memory.h>
|
|
|
#include <asm/nospec-branch.h>
|
|
|
|
|
|
-/*
|
|
|
- * Assembly code in arch/x86/net/bpf_jit.S
|
|
|
- */
|
|
|
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
|
|
|
-extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
|
|
|
-extern u8 sk_load_byte_positive_offset[];
|
|
|
-extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
|
|
|
-extern u8 sk_load_byte_negative_offset[];
|
|
|
-
|
|
|
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
|
|
|
{
|
|
|
if (len == 1)
|
|
@@ -107,9 +98,6 @@ static int bpf_size_to_x86_bytes(int bpf_size)
|
|
|
#define X86_JLE 0x7E
|
|
|
#define X86_JG 0x7F
|
|
|
|
|
|
-#define CHOOSE_LOAD_FUNC(K, func) \
|
|
|
- ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
|
|
|
-
|
|
|
/* Pick a register outside of BPF range for JIT internal work */
|
|
|
#define AUX_REG (MAX_BPF_JIT_REG + 1)
|
|
|
|
|
@@ -120,8 +108,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
|
|
|
* register in load/store instructions, it always needs an
|
|
|
* extra byte of encoding and is callee saved.
|
|
|
*
|
|
|
- * R9 caches skb->len - skb->data_len
|
|
|
- * R10 caches skb->data, and used for blinding (if enabled)
|
|
|
+ * Also x86-64 register R9 is unused. x86-64 register R10 is
|
|
|
+ * used for blinding (if enabled).
|
|
|
*/
|
|
|
static const int reg2hex[] = {
|
|
|
[BPF_REG_0] = 0, /* RAX */
|
|
@@ -196,19 +184,15 @@ static void jit_fill_hole(void *area, unsigned int size)
|
|
|
|
|
|
struct jit_context {
|
|
|
int cleanup_addr; /* Epilogue code offset */
|
|
|
- bool seen_ld_abs;
|
|
|
- bool seen_ax_reg;
|
|
|
};
|
|
|
|
|
|
/* Maximum number of bytes emitted while JITing one eBPF insn */
|
|
|
#define BPF_MAX_INSN_SIZE 128
|
|
|
#define BPF_INSN_SAFETY 64
|
|
|
|
|
|
-#define AUX_STACK_SPACE \
|
|
|
- (32 /* Space for RBX, R13, R14, R15 */ + \
|
|
|
- 8 /* Space for skb_copy_bits() buffer */)
|
|
|
+#define AUX_STACK_SPACE 40 /* Space for RBX, R13, R14, R15, tailcnt */
|
|
|
|
|
|
-#define PROLOGUE_SIZE 37
|
|
|
+#define PROLOGUE_SIZE 37
|
|
|
|
|
|
/*
|
|
|
* Emit x86-64 prologue code for BPF program and check its size.
|
|
@@ -232,20 +216,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
|
|
|
/* sub rbp, AUX_STACK_SPACE */
|
|
|
EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
|
|
|
|
|
|
- /* All classic BPF filters use R6(rbx) save it */
|
|
|
-
|
|
|
/* mov qword ptr [rbp+0],rbx */
|
|
|
EMIT4(0x48, 0x89, 0x5D, 0);
|
|
|
-
|
|
|
- /*
|
|
|
- * bpf_convert_filter() maps classic BPF register X to R7 and uses R8
|
|
|
- * as temporary, so all tcpdump filters need to spill/fill R7(R13) and
|
|
|
- * R8(R14). R9(R15) spill could be made conditional, but there is only
|
|
|
- * one 'bpf_error' return path out of helper functions inside bpf_jit.S
|
|
|
- * The overhead of extra spill is negligible for any filter other
|
|
|
- * than synthetic ones. Therefore not worth adding complexity.
|
|
|
- */
|
|
|
-
|
|
|
/* mov qword ptr [rbp+8],r13 */
|
|
|
EMIT4(0x4C, 0x89, 0x6D, 8);
|
|
|
/* mov qword ptr [rbp+16],r14 */
|
|
@@ -353,27 +325,6 @@ static void emit_bpf_tail_call(u8 **pprog)
|
|
|
*pprog = prog;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-static void emit_load_skb_data_hlen(u8 **pprog)
|
|
|
-{
|
|
|
- u8 *prog = *pprog;
|
|
|
- int cnt = 0;
|
|
|
-
|
|
|
- /*
|
|
|
- * r9d = skb->len - skb->data_len (headlen)
|
|
|
- * r10 = skb->data
|
|
|
- */
|
|
|
- /* mov %r9d, off32(%rdi) */
|
|
|
- EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len));
|
|
|
-
|
|
|
- /* sub %r9d, off32(%rdi) */
|
|
|
- EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len));
|
|
|
-
|
|
|
- /* mov %r10, off32(%rdi) */
|
|
|
- EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data));
|
|
|
- *pprog = prog;
|
|
|
-}
|
|
|
-
|
|
|
static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
|
|
|
u32 dst_reg, const u32 imm32)
|
|
|
{
|
|
@@ -462,8 +413,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
|
|
|
{
|
|
|
struct bpf_insn *insn = bpf_prog->insnsi;
|
|
|
int insn_cnt = bpf_prog->len;
|
|
|
- bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
|
|
|
- bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0);
|
|
|
bool seen_exit = false;
|
|
|
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
|
|
|
int i, cnt = 0;
|
|
@@ -473,9 +422,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
|
|
|
emit_prologue(&prog, bpf_prog->aux->stack_depth,
|
|
|
bpf_prog_was_classic(bpf_prog));
|
|
|
|
|
|
- if (seen_ld_abs)
|
|
|
- emit_load_skb_data_hlen(&prog);
|
|
|
-
|
|
|
for (i = 0; i < insn_cnt; i++, insn++) {
|
|
|
const s32 imm32 = insn->imm;
|
|
|
u32 dst_reg = insn->dst_reg;
|
|
@@ -483,13 +429,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
|
|
|
u8 b2 = 0, b3 = 0;
|
|
|
s64 jmp_offset;
|
|
|
u8 jmp_cond;
|
|
|
- bool reload_skb_data;
|
|
|
int ilen;
|
|
|
u8 *func;
|
|
|
|
|
|
- if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
|
|
|
- ctx->seen_ax_reg = seen_ax_reg = true;
|
|
|
-
|
|
|
switch (insn->code) {
|
|
|
/* ALU */
|
|
|
case BPF_ALU | BPF_ADD | BPF_X:
|
|
@@ -916,36 +858,12 @@ xadd: if (is_imm8(insn->off))
|
|
|
case BPF_JMP | BPF_CALL:
|
|
|
func = (u8 *) __bpf_call_base + imm32;
|
|
|
jmp_offset = func - (image + addrs[i]);
|
|
|
- if (seen_ld_abs) {
|
|
|
- reload_skb_data = bpf_helper_changes_pkt_data(func);
|
|
|
- if (reload_skb_data) {
|
|
|
- EMIT1(0x57); /* push %rdi */
|
|
|
- jmp_offset += 22; /* pop, mov, sub, mov */
|
|
|
- } else {
|
|
|
- EMIT2(0x41, 0x52); /* push %r10 */
|
|
|
- EMIT2(0x41, 0x51); /* push %r9 */
|
|
|
- /*
|
|
|
- * We need to adjust jmp offset, since
|
|
|
- * pop %r9, pop %r10 take 4 bytes after call insn
|
|
|
- */
|
|
|
- jmp_offset += 4;
|
|
|
- }
|
|
|
- }
|
|
|
if (!imm32 || !is_simm32(jmp_offset)) {
|
|
|
pr_err("unsupported BPF func %d addr %p image %p\n",
|
|
|
imm32, func, image);
|
|
|
return -EINVAL;
|
|
|
}
|
|
|
EMIT1_off32(0xE8, jmp_offset);
|
|
|
- if (seen_ld_abs) {
|
|
|
- if (reload_skb_data) {
|
|
|
- EMIT1(0x5F); /* pop %rdi */
|
|
|
- emit_load_skb_data_hlen(&prog);
|
|
|
- } else {
|
|
|
- EMIT2(0x41, 0x59); /* pop %r9 */
|
|
|
- EMIT2(0x41, 0x5A); /* pop %r10 */
|
|
|
- }
|
|
|
- }
|
|
|
break;
|
|
|
|
|
|
case BPF_JMP | BPF_TAIL_CALL:
|
|
@@ -1080,60 +998,6 @@ emit_jmp:
|
|
|
}
|
|
|
break;
|
|
|
|
|
|
- case BPF_LD | BPF_IND | BPF_W:
|
|
|
- func = sk_load_word;
|
|
|
- goto common_load;
|
|
|
- case BPF_LD | BPF_ABS | BPF_W:
|
|
|
- func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
|
|
|
-common_load:
|
|
|
- ctx->seen_ld_abs = seen_ld_abs = true;
|
|
|
- jmp_offset = func - (image + addrs[i]);
|
|
|
- if (!func || !is_simm32(jmp_offset)) {
|
|
|
- pr_err("unsupported BPF func %d addr %p image %p\n",
|
|
|
- imm32, func, image);
|
|
|
- return -EINVAL;
|
|
|
- }
|
|
|
- if (BPF_MODE(insn->code) == BPF_ABS) {
|
|
|
- /* mov %esi, imm32 */
|
|
|
- EMIT1_off32(0xBE, imm32);
|
|
|
- } else {
|
|
|
- /* mov %rsi, src_reg */
|
|
|
- EMIT_mov(BPF_REG_2, src_reg);
|
|
|
- if (imm32) {
|
|
|
- if (is_imm8(imm32))
|
|
|
- /* add %esi, imm8 */
|
|
|
- EMIT3(0x83, 0xC6, imm32);
|
|
|
- else
|
|
|
- /* add %esi, imm32 */
|
|
|
- EMIT2_off32(0x81, 0xC6, imm32);
|
|
|
- }
|
|
|
- }
|
|
|
- /*
|
|
|
- * skb pointer is in R6 (%rbx), it will be copied into
|
|
|
- * %rdi if skb_copy_bits() call is necessary.
|
|
|
- * sk_load_* helpers also use %r10 and %r9d.
|
|
|
- * See bpf_jit.S
|
|
|
- */
|
|
|
- if (seen_ax_reg)
|
|
|
- /* r10 = skb->data, mov %r10, off32(%rbx) */
|
|
|
- EMIT3_off32(0x4c, 0x8b, 0x93,
|
|
|
- offsetof(struct sk_buff, data));
|
|
|
- EMIT1_off32(0xE8, jmp_offset); /* call */
|
|
|
- break;
|
|
|
-
|
|
|
- case BPF_LD | BPF_IND | BPF_H:
|
|
|
- func = sk_load_half;
|
|
|
- goto common_load;
|
|
|
- case BPF_LD | BPF_ABS | BPF_H:
|
|
|
- func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
|
|
|
- goto common_load;
|
|
|
- case BPF_LD | BPF_IND | BPF_B:
|
|
|
- func = sk_load_byte;
|
|
|
- goto common_load;
|
|
|
- case BPF_LD | BPF_ABS | BPF_B:
|
|
|
- func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
|
|
|
- goto common_load;
|
|
|
-
|
|
|
case BPF_JMP | BPF_EXIT:
|
|
|
if (seen_exit) {
|
|
|
jmp_offset = ctx->cleanup_addr - addrs[i];
|