|
@@ -103,23 +103,18 @@ nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off)
|
|
|
/* --- Emitters --- */
|
|
|
static void
|
|
|
__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
|
|
|
- u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync, bool indir)
|
|
|
+ u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx,
|
|
|
+ bool indir)
|
|
|
{
|
|
|
- enum cmd_ctx_swap ctx;
|
|
|
u64 insn;
|
|
|
|
|
|
- if (sync)
|
|
|
- ctx = CMD_CTX_SWAP;
|
|
|
- else
|
|
|
- ctx = CMD_CTX_NO_SWAP;
|
|
|
-
|
|
|
insn = FIELD_PREP(OP_CMD_A_SRC, areg) |
|
|
|
FIELD_PREP(OP_CMD_CTX, ctx) |
|
|
|
FIELD_PREP(OP_CMD_B_SRC, breg) |
|
|
|
FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
|
|
|
FIELD_PREP(OP_CMD_XFER, xfer) |
|
|
|
FIELD_PREP(OP_CMD_CNT, size) |
|
|
|
- FIELD_PREP(OP_CMD_SIG, sync) |
|
|
|
+ FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) |
|
|
|
FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
|
|
|
FIELD_PREP(OP_CMD_INDIR, indir) |
|
|
|
FIELD_PREP(OP_CMD_MODE, mode);
|
|
@@ -129,7 +124,7 @@ __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
|
|
|
|
|
|
static void
|
|
|
emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
|
|
|
- swreg lreg, swreg rreg, u8 size, bool sync, bool indir)
|
|
|
+ swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir)
|
|
|
{
|
|
|
struct nfp_insn_re_regs reg;
|
|
|
int err;
|
|
@@ -150,22 +145,22 @@ emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync,
|
|
|
+ __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx,
|
|
|
indir);
|
|
|
}
|
|
|
|
|
|
static void
|
|
|
emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
|
|
|
- swreg lreg, swreg rreg, u8 size, bool sync)
|
|
|
+ swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
|
|
|
{
|
|
|
- emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, false);
|
|
|
+ emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false);
|
|
|
}
|
|
|
|
|
|
static void
|
|
|
emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
|
|
|
- swreg lreg, swreg rreg, u8 size, bool sync)
|
|
|
+ swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
|
|
|
{
|
|
|
- emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, true);
|
|
|
+ emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true);
|
|
|
}
|
|
|
|
|
|
static void
|
|
@@ -410,7 +405,7 @@ __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
|
|
|
FIELD_PREP(OP_LCSR_A_SRC, areg) |
|
|
|
FIELD_PREP(OP_LCSR_B_SRC, breg) |
|
|
|
FIELD_PREP(OP_LCSR_WRITE, wr) |
|
|
|
- FIELD_PREP(OP_LCSR_ADDR, addr) |
|
|
|
+ FIELD_PREP(OP_LCSR_ADDR, addr / 4) |
|
|
|
FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
|
|
|
FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
|
|
|
|
|
@@ -438,10 +433,16 @@ static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4,
|
|
|
+ __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr,
|
|
|
false, reg.src_lmextn);
|
|
|
}
|
|
|
|
|
|
+/* CSR value is read in following immed[gpr, 0] */
|
|
|
+static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr)
|
|
|
+{
|
|
|
+ __emit_lcsr(nfp_prog, 0, 0, false, addr, false, false);
|
|
|
+}
|
|
|
+
|
|
|
static void emit_nop(struct nfp_prog *nfp_prog)
|
|
|
{
|
|
|
__emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
|
|
@@ -553,6 +554,19 @@ wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
|
|
|
emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
|
|
|
}
|
|
|
|
|
|
+/* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the
|
|
|
+ * result to @dst from offset, there is no change on the other bits of @dst.
|
|
|
+ */
|
|
|
+static void
|
|
|
+wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src,
|
|
|
+ u8 field_len, u8 offset)
|
|
|
+{
|
|
|
+ enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE;
|
|
|
+ u8 mask = ((1 << field_len) - 1) << offset;
|
|
|
+
|
|
|
+ emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8);
|
|
|
+}
|
|
|
+
|
|
|
static void
|
|
|
addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
|
|
|
swreg *rega, swreg *regb)
|
|
@@ -597,7 +611,7 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
/* Memory read from source addr into transfer-in registers. */
|
|
|
emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
|
|
|
src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
|
|
|
- src_base, off, xfer_num - 1, true, len > 32);
|
|
|
+ src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32);
|
|
|
|
|
|
/* Move from transfer-in to transfer-out. */
|
|
|
for (i = 0; i < xfer_num; i++)
|
|
@@ -609,39 +623,39 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
/* Use single direct_ref write8. */
|
|
|
emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
|
|
|
reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
|
|
|
- true);
|
|
|
+ CMD_CTX_SWAP);
|
|
|
} else if (len <= 32 && IS_ALIGNED(len, 4)) {
|
|
|
/* Use single direct_ref write32. */
|
|
|
emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
|
|
|
reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
|
|
|
- true);
|
|
|
+ CMD_CTX_SWAP);
|
|
|
} else if (len <= 32) {
|
|
|
/* Use single indirect_ref write8. */
|
|
|
wrp_immed(nfp_prog, reg_none(),
|
|
|
CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
|
|
|
emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
|
|
|
reg_a(meta->paired_st->dst_reg * 2), off,
|
|
|
- len - 1, true);
|
|
|
+ len - 1, CMD_CTX_SWAP);
|
|
|
} else if (IS_ALIGNED(len, 4)) {
|
|
|
/* Use single indirect_ref write32. */
|
|
|
wrp_immed(nfp_prog, reg_none(),
|
|
|
CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
|
|
|
emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
|
|
|
reg_a(meta->paired_st->dst_reg * 2), off,
|
|
|
- xfer_num - 1, true);
|
|
|
+ xfer_num - 1, CMD_CTX_SWAP);
|
|
|
} else if (len <= 40) {
|
|
|
/* Use one direct_ref write32 to write the first 32-bytes, then
|
|
|
* another direct_ref write8 to write the remaining bytes.
|
|
|
*/
|
|
|
emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
|
|
|
reg_a(meta->paired_st->dst_reg * 2), off, 7,
|
|
|
- true);
|
|
|
+ CMD_CTX_SWAP);
|
|
|
|
|
|
off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
|
|
|
imm_b(nfp_prog));
|
|
|
emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
|
|
|
reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
|
|
|
- true);
|
|
|
+ CMD_CTX_SWAP);
|
|
|
} else {
|
|
|
/* Use one indirect_ref write32 to write 4-bytes aligned length,
|
|
|
* then another direct_ref write8 to write the remaining bytes.
|
|
@@ -652,12 +666,12 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
|
|
|
emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
|
|
|
reg_a(meta->paired_st->dst_reg * 2), off,
|
|
|
- xfer_num - 2, true);
|
|
|
+ xfer_num - 2, CMD_CTX_SWAP);
|
|
|
new_off = meta->paired_st->off + (xfer_num - 1) * 4;
|
|
|
off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
|
|
|
emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
|
|
|
xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
|
|
|
- (len & 0x3) - 1, true);
|
|
|
+ (len & 0x3) - 1, CMD_CTX_SWAP);
|
|
|
}
|
|
|
|
|
|
/* TODO: The following extra load is to make sure data flow be identical
|
|
@@ -718,7 +732,7 @@ data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
|
|
|
shift = size < 4 ? 4 - size : 0;
|
|
|
|
|
|
emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
|
|
|
- pptr_reg(nfp_prog), offset, sz - 1, true);
|
|
|
+ pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP);
|
|
|
|
|
|
i = 0;
|
|
|
if (shift)
|
|
@@ -748,7 +762,7 @@ data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
|
|
|
mask = size < 4 ? GENMASK(size - 1, 0) : 0;
|
|
|
|
|
|
emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
|
|
|
- lreg, rreg, sz / 4 - 1, true);
|
|
|
+ lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP);
|
|
|
|
|
|
i = 0;
|
|
|
if (mask)
|
|
@@ -828,7 +842,7 @@ data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
|
|
|
wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
|
|
|
|
|
|
emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
|
|
|
- reg_a(dst_gpr), offset, size - 1, true);
|
|
|
+ reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
@@ -842,7 +856,7 @@ data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
|
|
|
wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
|
|
|
|
|
|
emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
|
|
|
- reg_a(dst_gpr), offset, size - 1, true);
|
|
|
+ reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
@@ -1339,7 +1353,7 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
}
|
|
|
|
|
|
static int
|
|
|
-map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
+map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
{
|
|
|
struct bpf_offloaded_map *offmap;
|
|
|
struct nfp_bpf_map *nfp_map;
|
|
@@ -1353,19 +1367,21 @@ map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
|
|
|
/* We only have to reload LM0 if the key is not at start of stack */
|
|
|
lm_off = nfp_prog->stack_depth;
|
|
|
- lm_off += meta->arg2.var_off.value + meta->arg2.off;
|
|
|
- load_lm_ptr = meta->arg2_var_off || lm_off;
|
|
|
+ lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off;
|
|
|
+ load_lm_ptr = meta->arg2.var_off || lm_off;
|
|
|
|
|
|
/* Set LM0 to start of key */
|
|
|
if (load_lm_ptr)
|
|
|
emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
|
|
|
+ if (meta->func_id == BPF_FUNC_map_update_elem)
|
|
|
+ emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);
|
|
|
|
|
|
/* Load map ID into a register, it should actually fit as an immediate
|
|
|
* but in case it doesn't deal with it here, not in the delay slots.
|
|
|
*/
|
|
|
tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));
|
|
|
|
|
|
- emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + BPF_FUNC_map_lookup_elem,
|
|
|
+ emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
|
|
|
2, RELO_BR_HELPER);
|
|
|
ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
|
|
|
|
|
@@ -1388,6 +1404,18 @@ map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static int
|
|
|
+nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
+{
|
|
|
+ __emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM);
|
|
|
+ /* CSR value is read in following immed[gpr, 0] */
|
|
|
+ emit_immed(nfp_prog, reg_both(0), 0,
|
|
|
+ IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
|
|
|
+ emit_immed(nfp_prog, reg_both(1), 0,
|
|
|
+ IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/* --- Callbacks --- */
|
|
|
static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
{
|
|
@@ -1838,6 +1866,128 @@ mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
|
|
|
tmp_reg, meta->insn.dst_reg * 2, size);
|
|
|
}
|
|
|
|
|
|
+static void
|
|
|
+mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog,
|
|
|
+ struct nfp_insn_meta *meta)
|
|
|
+{
|
|
|
+ s16 range_start = meta->pkt_cache.range_start;
|
|
|
+ s16 range_end = meta->pkt_cache.range_end;
|
|
|
+ swreg src_base, off;
|
|
|
+ u8 xfer_num, len;
|
|
|
+ bool indir;
|
|
|
+
|
|
|
+ off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog));
|
|
|
+ src_base = reg_a(meta->insn.src_reg * 2);
|
|
|
+ len = range_end - range_start;
|
|
|
+ xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH;
|
|
|
+
|
|
|
+ indir = len > 8 * REG_WIDTH;
|
|
|
+ /* Setup PREV_ALU for indirect mode. */
|
|
|
+ if (indir)
|
|
|
+ wrp_immed(nfp_prog, reg_none(),
|
|
|
+ CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
|
|
|
+
|
|
|
+ /* Cache memory into transfer-in registers. */
|
|
|
+ emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
|
|
|
+ off, xfer_num - 1, CMD_CTX_SWAP, indir);
|
|
|
+}
|
|
|
+
|
|
|
+static int
|
|
|
+mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
|
|
|
+ struct nfp_insn_meta *meta,
|
|
|
+ unsigned int size)
|
|
|
+{
|
|
|
+ s16 range_start = meta->pkt_cache.range_start;
|
|
|
+ s16 insn_off = meta->insn.off - range_start;
|
|
|
+ swreg dst_lo, dst_hi, src_lo, src_mid;
|
|
|
+ u8 dst_gpr = meta->insn.dst_reg * 2;
|
|
|
+ u8 len_lo = size, len_mid = 0;
|
|
|
+ u8 idx = insn_off / REG_WIDTH;
|
|
|
+ u8 off = insn_off % REG_WIDTH;
|
|
|
+
|
|
|
+ dst_hi = reg_both(dst_gpr + 1);
|
|
|
+ dst_lo = reg_both(dst_gpr);
|
|
|
+ src_lo = reg_xfer(idx);
|
|
|
+
|
|
|
+ /* The read length could involve as many as three registers. */
|
|
|
+ if (size > REG_WIDTH - off) {
|
|
|
+ /* Calculate the part in the second register. */
|
|
|
+ len_lo = REG_WIDTH - off;
|
|
|
+ len_mid = size - len_lo;
|
|
|
+
|
|
|
+ /* Calculate the part in the third register. */
|
|
|
+ if (size > 2 * REG_WIDTH - off)
|
|
|
+ len_mid = REG_WIDTH;
|
|
|
+ }
|
|
|
+
|
|
|
+ wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
|
|
|
+
|
|
|
+ if (!len_mid) {
|
|
|
+ wrp_immed(nfp_prog, dst_hi, 0);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ src_mid = reg_xfer(idx + 1);
|
|
|
+
|
|
|
+ if (size <= REG_WIDTH) {
|
|
|
+ wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
|
|
|
+ wrp_immed(nfp_prog, dst_hi, 0);
|
|
|
+ } else {
|
|
|
+ swreg src_hi = reg_xfer(idx + 2);
|
|
|
+
|
|
|
+ wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid,
|
|
|
+ REG_WIDTH - len_lo, len_lo);
|
|
|
+ wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo,
|
|
|
+ REG_WIDTH - len_lo);
|
|
|
+ wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo,
|
|
|
+ len_lo);
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int
|
|
|
+mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
|
|
|
+ struct nfp_insn_meta *meta,
|
|
|
+ unsigned int size)
|
|
|
+{
|
|
|
+ swreg dst_lo, dst_hi, src_lo;
|
|
|
+ u8 dst_gpr, idx;
|
|
|
+
|
|
|
+ idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH;
|
|
|
+ dst_gpr = meta->insn.dst_reg * 2;
|
|
|
+ dst_hi = reg_both(dst_gpr + 1);
|
|
|
+ dst_lo = reg_both(dst_gpr);
|
|
|
+ src_lo = reg_xfer(idx);
|
|
|
+
|
|
|
+ if (size < REG_WIDTH) {
|
|
|
+ wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
|
|
|
+ wrp_immed(nfp_prog, dst_hi, 0);
|
|
|
+ } else if (size == REG_WIDTH) {
|
|
|
+ wrp_mov(nfp_prog, dst_lo, src_lo);
|
|
|
+ wrp_immed(nfp_prog, dst_hi, 0);
|
|
|
+ } else {
|
|
|
+ swreg src_hi = reg_xfer(idx + 1);
|
|
|
+
|
|
|
+ wrp_mov(nfp_prog, dst_lo, src_lo);
|
|
|
+ wrp_mov(nfp_prog, dst_hi, src_hi);
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int
|
|
|
+mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog,
|
|
|
+ struct nfp_insn_meta *meta, unsigned int size)
|
|
|
+{
|
|
|
+ u8 off = meta->insn.off - meta->pkt_cache.range_start;
|
|
|
+
|
|
|
+ if (IS_ALIGNED(off, REG_WIDTH))
|
|
|
+ return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size);
|
|
|
+
|
|
|
+ return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size);
|
|
|
+}
|
|
|
+
|
|
|
static int
|
|
|
mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
|
|
|
unsigned int size)
|
|
@@ -1852,8 +2002,16 @@ mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
|
|
|
return mem_ldx_skb(nfp_prog, meta, size);
|
|
|
}
|
|
|
|
|
|
- if (meta->ptr.type == PTR_TO_PACKET)
|
|
|
- return mem_ldx_data(nfp_prog, meta, size);
|
|
|
+ if (meta->ptr.type == PTR_TO_PACKET) {
|
|
|
+ if (meta->pkt_cache.range_end) {
|
|
|
+ if (meta->pkt_cache.do_init)
|
|
|
+ mem_ldx_data_init_pktcache(nfp_prog, meta);
|
|
|
+
|
|
|
+ return mem_ldx_data_from_pktcache(nfp_prog, meta, size);
|
|
|
+ } else {
|
|
|
+ return mem_ldx_data(nfp_prog, meta, size);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
if (meta->ptr.type == PTR_TO_STACK)
|
|
|
return mem_ldx_stack(nfp_prog, meta, size,
|
|
@@ -1982,6 +2140,111 @@ static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
return mem_stx(nfp_prog, meta, 8);
|
|
|
}
|
|
|
|
|
|
+static int
|
|
|
+mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
|
|
|
+{
|
|
|
+ u8 dst_gpr = meta->insn.dst_reg * 2;
|
|
|
+ u8 src_gpr = meta->insn.src_reg * 2;
|
|
|
+ unsigned int full_add, out;
|
|
|
+ swreg addra, addrb, off;
|
|
|
+
|
|
|
+ off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
|
|
|
+
|
|
|
+ /* We can fit 16 bits into command immediate, if we know the immediate
|
|
|
+ * is guaranteed to either always or never fit into 16 bit we only
|
|
|
+ * generate code to handle that particular case, otherwise generate
|
|
|
+ * code for both.
|
|
|
+ */
|
|
|
+ out = nfp_prog_current_offset(nfp_prog);
|
|
|
+ full_add = nfp_prog_current_offset(nfp_prog);
|
|
|
+
|
|
|
+ if (meta->insn.off) {
|
|
|
+ out += 2;
|
|
|
+ full_add += 2;
|
|
|
+ }
|
|
|
+ if (meta->xadd_maybe_16bit) {
|
|
|
+ out += 3;
|
|
|
+ full_add += 3;
|
|
|
+ }
|
|
|
+ if (meta->xadd_over_16bit)
|
|
|
+ out += 2 + is64;
|
|
|
+ if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
|
|
|
+ out += 5;
|
|
|
+ full_add += 5;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Generate the branch for choosing add_imm vs add */
|
|
|
+ if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
|
|
|
+ swreg max_imm = imm_a(nfp_prog);
|
|
|
+
|
|
|
+ wrp_immed(nfp_prog, max_imm, 0xffff);
|
|
|
+ emit_alu(nfp_prog, reg_none(),
|
|
|
+ max_imm, ALU_OP_SUB, reg_b(src_gpr));
|
|
|
+ emit_alu(nfp_prog, reg_none(),
|
|
|
+ reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1));
|
|
|
+ emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0);
|
|
|
+ /* defer for add */
|
|
|
+ }
|
|
|
+
|
|
|
+ /* If insn has an offset add to the address */
|
|
|
+ if (!meta->insn.off) {
|
|
|
+ addra = reg_a(dst_gpr);
|
|
|
+ addrb = reg_b(dst_gpr + 1);
|
|
|
+ } else {
|
|
|
+ emit_alu(nfp_prog, imma_a(nfp_prog),
|
|
|
+ reg_a(dst_gpr), ALU_OP_ADD, off);
|
|
|
+ emit_alu(nfp_prog, imma_b(nfp_prog),
|
|
|
+ reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0));
|
|
|
+ addra = imma_a(nfp_prog);
|
|
|
+ addrb = imma_b(nfp_prog);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Generate the add_imm if 16 bits are possible */
|
|
|
+ if (meta->xadd_maybe_16bit) {
|
|
|
+ swreg prev_alu = imm_a(nfp_prog);
|
|
|
+
|
|
|
+ wrp_immed(nfp_prog, prev_alu,
|
|
|
+ FIELD_PREP(CMD_OVE_DATA, 2) |
|
|
|
+ CMD_OVE_LEN |
|
|
|
+ FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
|
|
|
+ wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
|
|
|
+ emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
|
|
|
+ addra, addrb, 0, CMD_CTX_NO_SWAP);
|
|
|
+
|
|
|
+ if (meta->xadd_over_16bit)
|
|
|
+ emit_br(nfp_prog, BR_UNC, out, 0);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!nfp_prog_confirm_current_offset(nfp_prog, full_add))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ /* Generate the add if 16 bits are not guaranteed */
|
|
|
+ if (meta->xadd_over_16bit) {
|
|
|
+ emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0,
|
|
|
+ addra, addrb, is64 << 2,
|
|
|
+ is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1);
|
|
|
+
|
|
|
+ wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr));
|
|
|
+ if (is64)
|
|
|
+ wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1));
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!nfp_prog_confirm_current_offset(nfp_prog, out))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
+{
|
|
|
+ return mem_xadd(nfp_prog, meta, false);
|
|
|
+}
|
|
|
+
|
|
|
+static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
+{
|
|
|
+ return mem_xadd(nfp_prog, meta, true);
|
|
|
+}
|
|
|
+
|
|
|
static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
{
|
|
|
emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
|
|
@@ -2183,7 +2446,11 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|
|
case BPF_FUNC_xdp_adjust_head:
|
|
|
return adjust_head(nfp_prog, meta);
|
|
|
case BPF_FUNC_map_lookup_elem:
|
|
|
- return map_lookup_stack(nfp_prog, meta);
|
|
|
+ case BPF_FUNC_map_update_elem:
|
|
|
+ case BPF_FUNC_map_delete_elem:
|
|
|
+ return map_call_stack_common(nfp_prog, meta);
|
|
|
+ case BPF_FUNC_get_prandom_u32:
|
|
|
+ return nfp_get_prandom_u32(nfp_prog, meta);
|
|
|
default:
|
|
|
WARN_ONCE(1, "verifier allowed unsupported function\n");
|
|
|
return -EOPNOTSUPP;
|
|
@@ -2243,6 +2510,8 @@ static const instr_cb_t instr_cb[256] = {
|
|
|
[BPF_STX | BPF_MEM | BPF_H] = mem_stx2,
|
|
|
[BPF_STX | BPF_MEM | BPF_W] = mem_stx4,
|
|
|
[BPF_STX | BPF_MEM | BPF_DW] = mem_stx8,
|
|
|
+ [BPF_STX | BPF_XADD | BPF_W] = mem_xadd4,
|
|
|
+ [BPF_STX | BPF_XADD | BPF_DW] = mem_xadd8,
|
|
|
[BPF_ST | BPF_MEM | BPF_B] = mem_st1,
|
|
|
[BPF_ST | BPF_MEM | BPF_H] = mem_st2,
|
|
|
[BPF_ST | BPF_MEM | BPF_W] = mem_st4,
|
|
@@ -2821,6 +3090,120 @@ static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog)
|
|
|
+{
|
|
|
+ struct nfp_insn_meta *meta, *range_node = NULL;
|
|
|
+ s16 range_start = 0, range_end = 0;
|
|
|
+ bool cache_avail = false;
|
|
|
+ struct bpf_insn *insn;
|
|
|
+ s32 range_ptr_off = 0;
|
|
|
+ u32 range_ptr_id = 0;
|
|
|
+
|
|
|
+ list_for_each_entry(meta, &nfp_prog->insns, l) {
|
|
|
+ if (meta->flags & FLAG_INSN_IS_JUMP_DST)
|
|
|
+ cache_avail = false;
|
|
|
+
|
|
|
+ if (meta->skip)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ insn = &meta->insn;
|
|
|
+
|
|
|
+ if (is_mbpf_store_pkt(meta) ||
|
|
|
+ insn->code == (BPF_JMP | BPF_CALL) ||
|
|
|
+ is_mbpf_classic_store_pkt(meta) ||
|
|
|
+ is_mbpf_classic_load(meta)) {
|
|
|
+ cache_avail = false;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!is_mbpf_load(meta))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) {
|
|
|
+ cache_avail = false;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!cache_avail) {
|
|
|
+ cache_avail = true;
|
|
|
+ if (range_node)
|
|
|
+ goto end_current_then_start_new;
|
|
|
+ goto start_new;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Check ID to make sure two reads share the same
|
|
|
+ * variable offset against PTR_TO_PACKET, and check OFF
|
|
|
+ * to make sure they also share the same constant
|
|
|
+ * offset.
|
|
|
+ *
|
|
|
+ * OFFs don't really need to be the same, because they
|
|
|
+ * are the constant offsets against PTR_TO_PACKET, so
|
|
|
+ * for different OFFs, we could canonicalize them to
|
|
|
+ * offsets against original packet pointer. We don't
|
|
|
+ * support this.
|
|
|
+ */
|
|
|
+ if (meta->ptr.id == range_ptr_id &&
|
|
|
+ meta->ptr.off == range_ptr_off) {
|
|
|
+ s16 new_start = range_start;
|
|
|
+ s16 end, off = insn->off;
|
|
|
+ s16 new_end = range_end;
|
|
|
+ bool changed = false;
|
|
|
+
|
|
|
+ if (off < range_start) {
|
|
|
+ new_start = off;
|
|
|
+ changed = true;
|
|
|
+ }
|
|
|
+
|
|
|
+ end = off + BPF_LDST_BYTES(insn);
|
|
|
+ if (end > range_end) {
|
|
|
+ new_end = end;
|
|
|
+ changed = true;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!changed)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ if (new_end - new_start <= 64) {
|
|
|
+ /* Install new range. */
|
|
|
+ range_start = new_start;
|
|
|
+ range_end = new_end;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+end_current_then_start_new:
|
|
|
+ range_node->pkt_cache.range_start = range_start;
|
|
|
+ range_node->pkt_cache.range_end = range_end;
|
|
|
+start_new:
|
|
|
+ range_node = meta;
|
|
|
+ range_node->pkt_cache.do_init = true;
|
|
|
+ range_ptr_id = range_node->ptr.id;
|
|
|
+ range_ptr_off = range_node->ptr.off;
|
|
|
+ range_start = insn->off;
|
|
|
+ range_end = insn->off + BPF_LDST_BYTES(insn);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (range_node) {
|
|
|
+ range_node->pkt_cache.range_start = range_start;
|
|
|
+ range_node->pkt_cache.range_end = range_end;
|
|
|
+ }
|
|
|
+
|
|
|
+ list_for_each_entry(meta, &nfp_prog->insns, l) {
|
|
|
+ if (meta->skip)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) {
|
|
|
+ if (meta->pkt_cache.do_init) {
|
|
|
+ range_start = meta->pkt_cache.range_start;
|
|
|
+ range_end = meta->pkt_cache.range_end;
|
|
|
+ } else {
|
|
|
+ meta->pkt_cache.range_start = range_start;
|
|
|
+ meta->pkt_cache.range_end = range_end;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
|
|
|
{
|
|
|
nfp_bpf_opt_reg_init(nfp_prog);
|
|
@@ -2828,6 +3211,7 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
|
|
|
nfp_bpf_opt_ld_mask(nfp_prog);
|
|
|
nfp_bpf_opt_ld_shift(nfp_prog);
|
|
|
nfp_bpf_opt_ldst_gather(nfp_prog);
|
|
|
+ nfp_bpf_opt_pkt_cache(nfp_prog);
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
@@ -2952,6 +3336,12 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
|
|
|
case BPF_FUNC_map_lookup_elem:
|
|
|
val = nfp_prog->bpf->helpers.map_lookup;
|
|
|
break;
|
|
|
+ case BPF_FUNC_map_update_elem:
|
|
|
+ val = nfp_prog->bpf->helpers.map_update;
|
|
|
+ break;
|
|
|
+ case BPF_FUNC_map_delete_elem:
|
|
|
+ val = nfp_prog->bpf->helpers.map_delete;
|
|
|
+ break;
|
|
|
default:
|
|
|
pr_err("relocation of unknown helper %d\n",
|
|
|
val);
|