|
@@ -2352,12 +2352,249 @@ static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/* load/store pair that forms memory copy sould look like the following:
|
|
|
+ *
|
|
|
+ * ld_width R, [addr_src + offset_src]
|
|
|
+ * st_width [addr_dest + offset_dest], R
|
|
|
+ *
|
|
|
+ * The destination register of load and source register of store should
|
|
|
+ * be the same, load and store should also perform at the same width.
|
|
|
+ * If either of addr_src or addr_dest is stack pointer, we don't do the
|
|
|
+ * CPP optimization as stack is modelled by registers on NFP.
|
|
|
+ */
|
|
|
+static bool
|
|
|
+curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta,
|
|
|
+ struct nfp_insn_meta *st_meta)
|
|
|
+{
|
|
|
+ struct bpf_insn *ld = &ld_meta->insn;
|
|
|
+ struct bpf_insn *st = &st_meta->insn;
|
|
|
+
|
|
|
+ if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta))
|
|
|
+ return false;
|
|
|
+
|
|
|
+ if (ld_meta->ptr.type != PTR_TO_PACKET)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ if (st_meta->ptr.type != PTR_TO_PACKET)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ if (BPF_SIZE(ld->code) != BPF_SIZE(st->code))
|
|
|
+ return false;
|
|
|
+
|
|
|
+ if (ld->dst_reg != st->src_reg)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ /* There is jump to the store insn in this pair. */
|
|
|
+ if (st_meta->flags & FLAG_INSN_IS_JUMP_DST)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+/* Currently, we only support chaining load/store pairs if:
|
|
|
+ *
|
|
|
+ * - Their address base registers are the same.
|
|
|
+ * - Their address offsets are in the same order.
|
|
|
+ * - They operate at the same memory width.
|
|
|
+ * - There is no jump into the middle of them.
|
|
|
+ */
|
|
|
+static bool
|
|
|
+curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta,
|
|
|
+ struct nfp_insn_meta *st_meta,
|
|
|
+ struct bpf_insn *prev_ld,
|
|
|
+ struct bpf_insn *prev_st)
|
|
|
+{
|
|
|
+ u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst;
|
|
|
+ struct bpf_insn *ld = &ld_meta->insn;
|
|
|
+ struct bpf_insn *st = &st_meta->insn;
|
|
|
+ s16 prev_ld_off, prev_st_off;
|
|
|
+
|
|
|
+ /* This pair is the start pair. */
|
|
|
+ if (!prev_ld)
|
|
|
+ return true;
|
|
|
+
|
|
|
+ prev_size = BPF_LDST_BYTES(prev_ld);
|
|
|
+ curr_size = BPF_LDST_BYTES(ld);
|
|
|
+ prev_ld_base = prev_ld->src_reg;
|
|
|
+ prev_st_base = prev_st->dst_reg;
|
|
|
+ prev_ld_dst = prev_ld->dst_reg;
|
|
|
+ prev_ld_off = prev_ld->off;
|
|
|
+ prev_st_off = prev_st->off;
|
|
|
+
|
|
|
+ if (ld->dst_reg != prev_ld_dst)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ if (curr_size != prev_size)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ /* There is jump to the head of this pair. */
|
|
|
+ if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ /* Both in ascending order. */
|
|
|
+ if (prev_ld_off + prev_size == ld->off &&
|
|
|
+ prev_st_off + prev_size == st->off)
|
|
|
+ return true;
|
|
|
+
|
|
|
+ /* Both in descending order. */
|
|
|
+ if (ld->off + curr_size == prev_ld_off &&
|
|
|
+ st->off + curr_size == prev_st_off)
|
|
|
+ return true;
|
|
|
+
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
+/* Return TRUE if cross memory access happens. Cross memory access means
|
|
|
+ * store area is overlapping with load area that a later load might load
|
|
|
+ * the value from previous store, for this case we can't treat the sequence
|
|
|
+ * as an memory copy.
|
|
|
+ */
|
|
|
+static bool
|
|
|
+cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta,
|
|
|
+ struct nfp_insn_meta *head_st_meta)
|
|
|
+{
|
|
|
+ s16 head_ld_off, head_st_off, ld_off;
|
|
|
+
|
|
|
+ /* Different pointer types does not overlap. */
|
|
|
+ if (head_ld_meta->ptr.type != head_st_meta->ptr.type)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ /* load and store are both PTR_TO_PACKET, check ID info. */
|
|
|
+ if (head_ld_meta->ptr.id != head_st_meta->ptr.id)
|
|
|
+ return true;
|
|
|
+
|
|
|
+ /* Canonicalize the offsets. Turn all of them against the original
|
|
|
+ * base register.
|
|
|
+ */
|
|
|
+ head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off;
|
|
|
+ head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off;
|
|
|
+ ld_off = ld->off + head_ld_meta->ptr.off;
|
|
|
+
|
|
|
+ /* Ascending order cross. */
|
|
|
+ if (ld_off > head_ld_off &&
|
|
|
+ head_ld_off < head_st_off && ld_off >= head_st_off)
|
|
|
+ return true;
|
|
|
+
|
|
|
+ /* Descending order cross. */
|
|
|
+ if (ld_off < head_ld_off &&
|
|
|
+ head_ld_off > head_st_off && ld_off <= head_st_off)
|
|
|
+ return true;
|
|
|
+
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
+/* This pass try to identify the following instructoin sequences.
|
|
|
+ *
|
|
|
+ * load R, [regA + offA]
|
|
|
+ * store [regB + offB], R
|
|
|
+ * load R, [regA + offA + const_imm_A]
|
|
|
+ * store [regB + offB + const_imm_A], R
|
|
|
+ * load R, [regA + offA + 2 * const_imm_A]
|
|
|
+ * store [regB + offB + 2 * const_imm_A], R
|
|
|
+ * ...
|
|
|
+ *
|
|
|
+ * Above sequence is typically generated by compiler when lowering
|
|
|
+ * memcpy. NFP prefer using CPP instructions to accelerate it.
|
|
|
+ */
|
|
|
+static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
|
|
|
+{
|
|
|
+ struct nfp_insn_meta *head_ld_meta = NULL;
|
|
|
+ struct nfp_insn_meta *head_st_meta = NULL;
|
|
|
+ struct nfp_insn_meta *meta1, *meta2;
|
|
|
+ struct bpf_insn *prev_ld = NULL;
|
|
|
+ struct bpf_insn *prev_st = NULL;
|
|
|
+ u8 count = 0;
|
|
|
+
|
|
|
+ nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
|
|
|
+ struct bpf_insn *ld = &meta1->insn;
|
|
|
+ struct bpf_insn *st = &meta2->insn;
|
|
|
+
|
|
|
+ /* Reset record status if any of the following if true:
|
|
|
+ * - The current insn pair is not load/store.
|
|
|
+ * - The load/store pair doesn't chain with previous one.
|
|
|
+ * - The chained load/store pair crossed with previous pair.
|
|
|
+ * - The chained load/store pair has a total size of memory
|
|
|
+ * copy beyond 128 bytes which is the maximum length a
|
|
|
+ * single NFP CPP command can transfer.
|
|
|
+ */
|
|
|
+ if (!curr_pair_is_memcpy(meta1, meta2) ||
|
|
|
+ !curr_pair_chain_with_previous(meta1, meta2, prev_ld,
|
|
|
+ prev_st) ||
|
|
|
+ (head_ld_meta && (cross_mem_access(ld, head_ld_meta,
|
|
|
+ head_st_meta) ||
|
|
|
+ head_ld_meta->ldst_gather_len >= 128))) {
|
|
|
+ if (!count)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ if (count > 1) {
|
|
|
+ s16 prev_ld_off = prev_ld->off;
|
|
|
+ s16 prev_st_off = prev_st->off;
|
|
|
+ s16 head_ld_off = head_ld_meta->insn.off;
|
|
|
+
|
|
|
+ if (prev_ld_off < head_ld_off) {
|
|
|
+ head_ld_meta->insn.off = prev_ld_off;
|
|
|
+ head_st_meta->insn.off = prev_st_off;
|
|
|
+ head_ld_meta->ldst_gather_len =
|
|
|
+ -head_ld_meta->ldst_gather_len;
|
|
|
+ }
|
|
|
+
|
|
|
+ head_ld_meta->paired_st = &head_st_meta->insn;
|
|
|
+ head_st_meta->skip = true;
|
|
|
+ } else {
|
|
|
+ head_ld_meta->ldst_gather_len = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* If the chain is ended by an load/store pair then this
|
|
|
+ * could serve as the new head of the the next chain.
|
|
|
+ */
|
|
|
+ if (curr_pair_is_memcpy(meta1, meta2)) {
|
|
|
+ head_ld_meta = meta1;
|
|
|
+ head_st_meta = meta2;
|
|
|
+ head_ld_meta->ldst_gather_len =
|
|
|
+ BPF_LDST_BYTES(ld);
|
|
|
+ meta1 = nfp_meta_next(meta1);
|
|
|
+ meta2 = nfp_meta_next(meta2);
|
|
|
+ prev_ld = ld;
|
|
|
+ prev_st = st;
|
|
|
+ count = 1;
|
|
|
+ } else {
|
|
|
+ head_ld_meta = NULL;
|
|
|
+ head_st_meta = NULL;
|
|
|
+ prev_ld = NULL;
|
|
|
+ prev_st = NULL;
|
|
|
+ count = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!head_ld_meta) {
|
|
|
+ head_ld_meta = meta1;
|
|
|
+ head_st_meta = meta2;
|
|
|
+ } else {
|
|
|
+ meta1->skip = true;
|
|
|
+ meta2->skip = true;
|
|
|
+ }
|
|
|
+
|
|
|
+ head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld);
|
|
|
+ meta1 = nfp_meta_next(meta1);
|
|
|
+ meta2 = nfp_meta_next(meta2);
|
|
|
+ prev_ld = ld;
|
|
|
+ prev_st = st;
|
|
|
+ count++;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
|
|
|
{
|
|
|
nfp_bpf_opt_reg_init(nfp_prog);
|
|
|
|
|
|
nfp_bpf_opt_ld_mask(nfp_prog);
|
|
|
nfp_bpf_opt_ld_shift(nfp_prog);
|
|
|
+ nfp_bpf_opt_ldst_gather(nfp_prog);
|
|
|
|
|
|
return 0;
|
|
|
}
|