bpf_jit_comp64.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954
  1. /*
  2. * bpf_jit_comp64.c: eBPF JIT compiler
  3. *
  4. * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
  5. * IBM Corporation
  6. *
  7. * Based on the powerpc classic BPF JIT compiler by Matt Evans
  8. *
  9. * This program is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU General Public License
  11. * as published by the Free Software Foundation; version 2
  12. * of the License.
  13. */
  14. #include <linux/moduleloader.h>
  15. #include <asm/cacheflush.h>
  16. #include <linux/netdevice.h>
  17. #include <linux/filter.h>
  18. #include <linux/if_vlan.h>
  19. #include <asm/kprobes.h>
  20. #include "bpf_jit64.h"
  21. int bpf_jit_enable __read_mostly;
  22. static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
  23. {
  24. int *p = area;
  25. /* Fill whole space with trap instructions */
  26. while (p < (int *)((char *)area + size))
  27. *p++ = BREAKPOINT_INSTRUCTION;
  28. }
  29. static inline void bpf_flush_icache(void *start, void *end)
  30. {
  31. smp_wmb();
  32. flush_icache_range((unsigned long)start, (unsigned long)end);
  33. }
  34. static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
  35. {
  36. return (ctx->seen & (1 << (31 - b2p[i])));
  37. }
  38. static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
  39. {
  40. ctx->seen |= (1 << (31 - b2p[i]));
  41. }
  42. static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
  43. {
  44. /*
  45. * We only need a stack frame if:
  46. * - we call other functions (kernel helpers), or
  47. * - the bpf program uses its stack area
  48. * The latter condition is deduced from the usage of BPF_REG_FP
  49. */
  50. return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP);
  51. }
  52. static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx)
  53. {
  54. /*
  55. * Load skb->len and skb->data_len
  56. * r3 points to skb
  57. */
  58. PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len));
  59. PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len));
  60. /* header_len = len - data_len */
  61. PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]);
  62. /* skb->data pointer */
  63. PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data));
  64. }
  65. static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
  66. {
  67. #ifdef PPC64_ELF_ABI_v1
  68. /* func points to the function descriptor */
  69. PPC_LI64(b2p[TMP_REG_2], func);
  70. /* Load actual entry point from function descriptor */
  71. PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
  72. /* ... and move it to LR */
  73. PPC_MTLR(b2p[TMP_REG_1]);
  74. /*
  75. * Load TOC from function descriptor at offset 8.
  76. * We can clobber r2 since we get called through a
  77. * function pointer (so caller will save/restore r2)
  78. * and since we don't use a TOC ourself.
  79. */
  80. PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
  81. #else
  82. /* We can clobber r12 */
  83. PPC_FUNC_ADDR(12, func);
  84. PPC_MTLR(12);
  85. #endif
  86. PPC_BLRL();
  87. }
  88. static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
  89. {
  90. int i;
  91. bool new_stack_frame = bpf_has_stack_frame(ctx);
  92. if (new_stack_frame) {
  93. /*
  94. * We need a stack frame, but we don't necessarily need to
  95. * save/restore LR unless we call other functions
  96. */
  97. if (ctx->seen & SEEN_FUNC) {
  98. EMIT(PPC_INST_MFLR | __PPC_RT(R0));
  99. PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
  100. }
  101. PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME);
  102. }
  103. /*
  104. * Back up non-volatile regs -- BPF registers 6-10
  105. * If we haven't created our own stack frame, we save these
  106. * in the protected zone below the previous stack frame
  107. */
  108. for (i = BPF_REG_6; i <= BPF_REG_10; i++)
  109. if (bpf_is_seen_register(ctx, i))
  110. PPC_BPF_STL(b2p[i], 1,
  111. (new_stack_frame ? BPF_PPC_STACKFRAME : 0) -
  112. (8 * (32 - b2p[i])));
  113. /*
  114. * Save additional non-volatile regs if we cache skb
  115. * Also, setup skb data
  116. */
  117. if (ctx->seen & SEEN_SKB) {
  118. PPC_BPF_STL(b2p[SKB_HLEN_REG], 1,
  119. BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_HLEN_REG])));
  120. PPC_BPF_STL(b2p[SKB_DATA_REG], 1,
  121. BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_DATA_REG])));
  122. bpf_jit_emit_skb_loads(image, ctx);
  123. }
  124. /* Setup frame pointer to point to the bpf stack area */
  125. if (bpf_is_seen_register(ctx, BPF_REG_FP))
  126. PPC_ADDI(b2p[BPF_REG_FP], 1,
  127. BPF_PPC_STACKFRAME - BPF_PPC_STACK_SAVE);
  128. }
  129. static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
  130. {
  131. int i;
  132. bool new_stack_frame = bpf_has_stack_frame(ctx);
  133. /* Move result to r3 */
  134. PPC_MR(3, b2p[BPF_REG_0]);
  135. /* Restore NVRs */
  136. for (i = BPF_REG_6; i <= BPF_REG_10; i++)
  137. if (bpf_is_seen_register(ctx, i))
  138. PPC_BPF_LL(b2p[i], 1,
  139. (new_stack_frame ? BPF_PPC_STACKFRAME : 0) -
  140. (8 * (32 - b2p[i])));
  141. /* Restore non-volatile registers used for skb cache */
  142. if (ctx->seen & SEEN_SKB) {
  143. PPC_BPF_LL(b2p[SKB_HLEN_REG], 1,
  144. BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_HLEN_REG])));
  145. PPC_BPF_LL(b2p[SKB_DATA_REG], 1,
  146. BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_DATA_REG])));
  147. }
  148. /* Tear down our stack frame */
  149. if (new_stack_frame) {
  150. PPC_ADDI(1, 1, BPF_PPC_STACKFRAME);
  151. if (ctx->seen & SEEN_FUNC) {
  152. PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
  153. PPC_MTLR(0);
  154. }
  155. }
  156. PPC_BLR();
  157. }
  158. /* Assemble the body code between the prologue & epilogue */
  159. static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
  160. struct codegen_context *ctx,
  161. u32 *addrs)
  162. {
  163. const struct bpf_insn *insn = fp->insnsi;
  164. int flen = fp->len;
  165. int i;
  166. /* Start of epilogue code - will only be valid 2nd pass onwards */
  167. u32 exit_addr = addrs[flen];
  168. for (i = 0; i < flen; i++) {
  169. u32 code = insn[i].code;
  170. u32 dst_reg = b2p[insn[i].dst_reg];
  171. u32 src_reg = b2p[insn[i].src_reg];
  172. s16 off = insn[i].off;
  173. s32 imm = insn[i].imm;
  174. u64 imm64;
  175. u8 *func;
  176. u32 true_cond;
  177. int stack_local_off;
  178. /*
  179. * addrs[] maps a BPF bytecode address into a real offset from
  180. * the start of the body code.
  181. */
  182. addrs[i] = ctx->idx * 4;
  183. /*
  184. * As an optimization, we note down which non-volatile registers
  185. * are used so that we can only save/restore those in our
  186. * prologue and epilogue. We do this here regardless of whether
  187. * the actual BPF instruction uses src/dst registers or not
  188. * (for instance, BPF_CALL does not use them). The expectation
  189. * is that those instructions will have src_reg/dst_reg set to
  190. * 0. Even otherwise, we just lose some prologue/epilogue
  191. * optimization but everything else should work without
  192. * any issues.
  193. */
  194. if (dst_reg >= 24 && dst_reg <= 31)
  195. bpf_set_seen_register(ctx, insn[i].dst_reg);
  196. if (src_reg >= 24 && src_reg <= 31)
  197. bpf_set_seen_register(ctx, insn[i].src_reg);
  198. switch (code) {
  199. /*
  200. * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG
  201. */
  202. case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
  203. case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
  204. PPC_ADD(dst_reg, dst_reg, src_reg);
  205. goto bpf_alu32_trunc;
  206. case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
  207. case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
  208. PPC_SUB(dst_reg, dst_reg, src_reg);
  209. goto bpf_alu32_trunc;
  210. case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
  211. case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
  212. case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
  213. case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
  214. if (BPF_OP(code) == BPF_SUB)
  215. imm = -imm;
  216. if (imm) {
  217. if (imm >= -32768 && imm < 32768)
  218. PPC_ADDI(dst_reg, dst_reg, IMM_L(imm));
  219. else {
  220. PPC_LI32(b2p[TMP_REG_1], imm);
  221. PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]);
  222. }
  223. }
  224. goto bpf_alu32_trunc;
  225. case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
  226. case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
  227. if (BPF_CLASS(code) == BPF_ALU)
  228. PPC_MULW(dst_reg, dst_reg, src_reg);
  229. else
  230. PPC_MULD(dst_reg, dst_reg, src_reg);
  231. goto bpf_alu32_trunc;
  232. case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
  233. case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
  234. if (imm >= -32768 && imm < 32768)
  235. PPC_MULI(dst_reg, dst_reg, IMM_L(imm));
  236. else {
  237. PPC_LI32(b2p[TMP_REG_1], imm);
  238. if (BPF_CLASS(code) == BPF_ALU)
  239. PPC_MULW(dst_reg, dst_reg,
  240. b2p[TMP_REG_1]);
  241. else
  242. PPC_MULD(dst_reg, dst_reg,
  243. b2p[TMP_REG_1]);
  244. }
  245. goto bpf_alu32_trunc;
  246. case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
  247. case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
  248. PPC_CMPWI(src_reg, 0);
  249. PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12);
  250. PPC_LI(b2p[BPF_REG_0], 0);
  251. PPC_JMP(exit_addr);
  252. if (BPF_OP(code) == BPF_MOD) {
  253. PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg);
  254. PPC_MULW(b2p[TMP_REG_1], src_reg,
  255. b2p[TMP_REG_1]);
  256. PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
  257. } else
  258. PPC_DIVWU(dst_reg, dst_reg, src_reg);
  259. goto bpf_alu32_trunc;
  260. case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
  261. case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
  262. PPC_CMPDI(src_reg, 0);
  263. PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12);
  264. PPC_LI(b2p[BPF_REG_0], 0);
  265. PPC_JMP(exit_addr);
  266. if (BPF_OP(code) == BPF_MOD) {
  267. PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg);
  268. PPC_MULD(b2p[TMP_REG_1], src_reg,
  269. b2p[TMP_REG_1]);
  270. PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
  271. } else
  272. PPC_DIVD(dst_reg, dst_reg, src_reg);
  273. break;
  274. case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
  275. case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
  276. case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
  277. case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
  278. if (imm == 0)
  279. return -EINVAL;
  280. else if (imm == 1)
  281. goto bpf_alu32_trunc;
  282. PPC_LI32(b2p[TMP_REG_1], imm);
  283. switch (BPF_CLASS(code)) {
  284. case BPF_ALU:
  285. if (BPF_OP(code) == BPF_MOD) {
  286. PPC_DIVWU(b2p[TMP_REG_2], dst_reg,
  287. b2p[TMP_REG_1]);
  288. PPC_MULW(b2p[TMP_REG_1],
  289. b2p[TMP_REG_1],
  290. b2p[TMP_REG_2]);
  291. PPC_SUB(dst_reg, dst_reg,
  292. b2p[TMP_REG_1]);
  293. } else
  294. PPC_DIVWU(dst_reg, dst_reg,
  295. b2p[TMP_REG_1]);
  296. break;
  297. case BPF_ALU64:
  298. if (BPF_OP(code) == BPF_MOD) {
  299. PPC_DIVD(b2p[TMP_REG_2], dst_reg,
  300. b2p[TMP_REG_1]);
  301. PPC_MULD(b2p[TMP_REG_1],
  302. b2p[TMP_REG_1],
  303. b2p[TMP_REG_2]);
  304. PPC_SUB(dst_reg, dst_reg,
  305. b2p[TMP_REG_1]);
  306. } else
  307. PPC_DIVD(dst_reg, dst_reg,
  308. b2p[TMP_REG_1]);
  309. break;
  310. }
  311. goto bpf_alu32_trunc;
  312. case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
  313. case BPF_ALU64 | BPF_NEG: /* dst = -dst */
  314. PPC_NEG(dst_reg, dst_reg);
  315. goto bpf_alu32_trunc;
  316. /*
  317. * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH
  318. */
  319. case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
  320. case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
  321. PPC_AND(dst_reg, dst_reg, src_reg);
  322. goto bpf_alu32_trunc;
  323. case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
  324. case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
  325. if (!IMM_H(imm))
  326. PPC_ANDI(dst_reg, dst_reg, IMM_L(imm));
  327. else {
  328. /* Sign-extended */
  329. PPC_LI32(b2p[TMP_REG_1], imm);
  330. PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]);
  331. }
  332. goto bpf_alu32_trunc;
  333. case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
  334. case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
  335. PPC_OR(dst_reg, dst_reg, src_reg);
  336. goto bpf_alu32_trunc;
  337. case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
  338. case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
  339. if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
  340. /* Sign-extended */
  341. PPC_LI32(b2p[TMP_REG_1], imm);
  342. PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]);
  343. } else {
  344. if (IMM_L(imm))
  345. PPC_ORI(dst_reg, dst_reg, IMM_L(imm));
  346. if (IMM_H(imm))
  347. PPC_ORIS(dst_reg, dst_reg, IMM_H(imm));
  348. }
  349. goto bpf_alu32_trunc;
  350. case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
  351. case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
  352. PPC_XOR(dst_reg, dst_reg, src_reg);
  353. goto bpf_alu32_trunc;
  354. case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
  355. case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
  356. if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
  357. /* Sign-extended */
  358. PPC_LI32(b2p[TMP_REG_1], imm);
  359. PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]);
  360. } else {
  361. if (IMM_L(imm))
  362. PPC_XORI(dst_reg, dst_reg, IMM_L(imm));
  363. if (IMM_H(imm))
  364. PPC_XORIS(dst_reg, dst_reg, IMM_H(imm));
  365. }
  366. goto bpf_alu32_trunc;
  367. case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
  368. /* slw clears top 32 bits */
  369. PPC_SLW(dst_reg, dst_reg, src_reg);
  370. break;
  371. case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
  372. PPC_SLD(dst_reg, dst_reg, src_reg);
  373. break;
  374. case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
  375. /* with imm 0, we still need to clear top 32 bits */
  376. PPC_SLWI(dst_reg, dst_reg, imm);
  377. break;
  378. case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
  379. if (imm != 0)
  380. PPC_SLDI(dst_reg, dst_reg, imm);
  381. break;
  382. case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
  383. PPC_SRW(dst_reg, dst_reg, src_reg);
  384. break;
  385. case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
  386. PPC_SRD(dst_reg, dst_reg, src_reg);
  387. break;
  388. case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
  389. PPC_SRWI(dst_reg, dst_reg, imm);
  390. break;
  391. case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
  392. if (imm != 0)
  393. PPC_SRDI(dst_reg, dst_reg, imm);
  394. break;
  395. case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
  396. PPC_SRAD(dst_reg, dst_reg, src_reg);
  397. break;
  398. case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
  399. if (imm != 0)
  400. PPC_SRADI(dst_reg, dst_reg, imm);
  401. break;
  402. /*
  403. * MOV
  404. */
  405. case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
  406. case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
  407. PPC_MR(dst_reg, src_reg);
  408. goto bpf_alu32_trunc;
  409. case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
  410. case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
  411. PPC_LI32(dst_reg, imm);
  412. if (imm < 0)
  413. goto bpf_alu32_trunc;
  414. break;
  415. bpf_alu32_trunc:
  416. /* Truncate to 32-bits */
  417. if (BPF_CLASS(code) == BPF_ALU)
  418. PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
  419. break;
  420. /*
  421. * BPF_FROM_BE/LE
  422. */
  423. case BPF_ALU | BPF_END | BPF_FROM_LE:
  424. case BPF_ALU | BPF_END | BPF_FROM_BE:
  425. #ifdef __BIG_ENDIAN__
  426. if (BPF_SRC(code) == BPF_FROM_BE)
  427. goto emit_clear;
  428. #else /* !__BIG_ENDIAN__ */
  429. if (BPF_SRC(code) == BPF_FROM_LE)
  430. goto emit_clear;
  431. #endif
  432. switch (imm) {
  433. case 16:
  434. /* Rotate 8 bits left & mask with 0x0000ff00 */
  435. PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23);
  436. /* Rotate 8 bits right & insert LSB to reg */
  437. PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31);
  438. /* Move result back to dst_reg */
  439. PPC_MR(dst_reg, b2p[TMP_REG_1]);
  440. break;
  441. case 32:
  442. /*
  443. * Rotate word left by 8 bits:
  444. * 2 bytes are already in their final position
  445. * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
  446. */
  447. PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31);
  448. /* Rotate 24 bits and insert byte 1 */
  449. PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7);
  450. /* Rotate 24 bits and insert byte 3 */
  451. PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23);
  452. PPC_MR(dst_reg, b2p[TMP_REG_1]);
  453. break;
  454. case 64:
  455. /*
  456. * Way easier and faster(?) to store the value
  457. * into stack and then use ldbrx
  458. *
  459. * First, determine where in stack we can store
  460. * this:
  461. * - if we have allotted a stack frame, then we
  462. * will utilize the area set aside by
  463. * BPF_PPC_STACK_LOCALS
  464. * - else, we use the area beneath the NV GPR
  465. * save area
  466. *
  467. * ctx->seen will be reliable in pass2, but
  468. * the instructions generated will remain the
  469. * same across all passes
  470. */
  471. if (bpf_has_stack_frame(ctx))
  472. stack_local_off = STACK_FRAME_MIN_SIZE;
  473. else
  474. stack_local_off = -(BPF_PPC_STACK_SAVE + 8);
  475. PPC_STD(dst_reg, 1, stack_local_off);
  476. PPC_ADDI(b2p[TMP_REG_1], 1, stack_local_off);
  477. PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]);
  478. break;
  479. }
  480. break;
  481. emit_clear:
  482. switch (imm) {
  483. case 16:
  484. /* zero-extend 16 bits into 64 bits */
  485. PPC_RLDICL(dst_reg, dst_reg, 0, 48);
  486. break;
  487. case 32:
  488. /* zero-extend 32 bits into 64 bits */
  489. PPC_RLDICL(dst_reg, dst_reg, 0, 32);
  490. break;
  491. case 64:
  492. /* nop */
  493. break;
  494. }
  495. break;
  496. /*
  497. * BPF_ST(X)
  498. */
  499. case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
  500. case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
  501. if (BPF_CLASS(code) == BPF_ST) {
  502. PPC_LI(b2p[TMP_REG_1], imm);
  503. src_reg = b2p[TMP_REG_1];
  504. }
  505. PPC_STB(src_reg, dst_reg, off);
  506. break;
  507. case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
  508. case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
  509. if (BPF_CLASS(code) == BPF_ST) {
  510. PPC_LI(b2p[TMP_REG_1], imm);
  511. src_reg = b2p[TMP_REG_1];
  512. }
  513. PPC_STH(src_reg, dst_reg, off);
  514. break;
  515. case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
  516. case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
  517. if (BPF_CLASS(code) == BPF_ST) {
  518. PPC_LI32(b2p[TMP_REG_1], imm);
  519. src_reg = b2p[TMP_REG_1];
  520. }
  521. PPC_STW(src_reg, dst_reg, off);
  522. break;
  523. case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
  524. case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
  525. if (BPF_CLASS(code) == BPF_ST) {
  526. PPC_LI32(b2p[TMP_REG_1], imm);
  527. src_reg = b2p[TMP_REG_1];
  528. }
  529. PPC_STD(src_reg, dst_reg, off);
  530. break;
  531. /*
  532. * BPF_STX XADD (atomic_add)
  533. */
  534. /* *(u32 *)(dst + off) += src */
  535. case BPF_STX | BPF_XADD | BPF_W:
  536. /* Get EA into TMP_REG_1 */
  537. PPC_ADDI(b2p[TMP_REG_1], dst_reg, off);
  538. /* error if EA is not word-aligned */
  539. PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x03);
  540. PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + 12);
  541. PPC_LI(b2p[BPF_REG_0], 0);
  542. PPC_JMP(exit_addr);
  543. /* load value from memory into TMP_REG_2 */
  544. PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
  545. /* add value from src_reg into this */
  546. PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
  547. /* store result back */
  548. PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
  549. /* we're done if this succeeded */
  550. PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4));
  551. /* otherwise, let's try once more */
  552. PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
  553. PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
  554. PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
  555. /* exit if the store was not successful */
  556. PPC_LI(b2p[BPF_REG_0], 0);
  557. PPC_BCC(COND_NE, exit_addr);
  558. break;
  559. /* *(u64 *)(dst + off) += src */
  560. case BPF_STX | BPF_XADD | BPF_DW:
  561. PPC_ADDI(b2p[TMP_REG_1], dst_reg, off);
  562. /* error if EA is not doubleword-aligned */
  563. PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x07);
  564. PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (3*4));
  565. PPC_LI(b2p[BPF_REG_0], 0);
  566. PPC_JMP(exit_addr);
  567. PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
  568. PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
  569. PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
  570. PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4));
  571. PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
  572. PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
  573. PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
  574. PPC_LI(b2p[BPF_REG_0], 0);
  575. PPC_BCC(COND_NE, exit_addr);
  576. break;
  577. /*
  578. * BPF_LDX
  579. */
  580. /* dst = *(u8 *)(ul) (src + off) */
  581. case BPF_LDX | BPF_MEM | BPF_B:
  582. PPC_LBZ(dst_reg, src_reg, off);
  583. break;
  584. /* dst = *(u16 *)(ul) (src + off) */
  585. case BPF_LDX | BPF_MEM | BPF_H:
  586. PPC_LHZ(dst_reg, src_reg, off);
  587. break;
  588. /* dst = *(u32 *)(ul) (src + off) */
  589. case BPF_LDX | BPF_MEM | BPF_W:
  590. PPC_LWZ(dst_reg, src_reg, off);
  591. break;
  592. /* dst = *(u64 *)(ul) (src + off) */
  593. case BPF_LDX | BPF_MEM | BPF_DW:
  594. PPC_LD(dst_reg, src_reg, off);
  595. break;
  596. /*
  597. * Doubleword load
  598. * 16 byte instruction that uses two 'struct bpf_insn'
  599. */
  600. case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
  601. imm64 = ((u64)(u32) insn[i].imm) |
  602. (((u64)(u32) insn[i+1].imm) << 32);
  603. /* Adjust for two bpf instructions */
  604. addrs[++i] = ctx->idx * 4;
  605. PPC_LI64(dst_reg, imm64);
  606. break;
  607. /*
  608. * Return/Exit
  609. */
  610. case BPF_JMP | BPF_EXIT:
  611. /*
  612. * If this isn't the very last instruction, branch to
  613. * the epilogue. If we _are_ the last instruction,
  614. * we'll just fall through to the epilogue.
  615. */
  616. if (i != flen - 1)
  617. PPC_JMP(exit_addr);
  618. /* else fall through to the epilogue */
  619. break;
  620. /*
  621. * Call kernel helper
  622. */
  623. case BPF_JMP | BPF_CALL:
  624. ctx->seen |= SEEN_FUNC;
  625. func = (u8 *) __bpf_call_base + imm;
  626. /* Save skb pointer if we need to re-cache skb data */
  627. if (bpf_helper_changes_skb_data(func))
  628. PPC_BPF_STL(3, 1, STACK_FRAME_MIN_SIZE);
  629. bpf_jit_emit_func_call(image, ctx, (u64)func);
  630. /* move return value from r3 to BPF_REG_0 */
  631. PPC_MR(b2p[BPF_REG_0], 3);
  632. /* refresh skb cache */
  633. if (bpf_helper_changes_skb_data(func)) {
  634. /* reload skb pointer to r3 */
  635. PPC_BPF_LL(3, 1, STACK_FRAME_MIN_SIZE);
  636. bpf_jit_emit_skb_loads(image, ctx);
  637. }
  638. break;
  639. /*
  640. * Jumps and branches
  641. */
  642. case BPF_JMP | BPF_JA:
  643. PPC_JMP(addrs[i + 1 + off]);
  644. break;
  645. case BPF_JMP | BPF_JGT | BPF_K:
  646. case BPF_JMP | BPF_JGT | BPF_X:
  647. case BPF_JMP | BPF_JSGT | BPF_K:
  648. case BPF_JMP | BPF_JSGT | BPF_X:
  649. true_cond = COND_GT;
  650. goto cond_branch;
  651. case BPF_JMP | BPF_JGE | BPF_K:
  652. case BPF_JMP | BPF_JGE | BPF_X:
  653. case BPF_JMP | BPF_JSGE | BPF_K:
  654. case BPF_JMP | BPF_JSGE | BPF_X:
  655. true_cond = COND_GE;
  656. goto cond_branch;
  657. case BPF_JMP | BPF_JEQ | BPF_K:
  658. case BPF_JMP | BPF_JEQ | BPF_X:
  659. true_cond = COND_EQ;
  660. goto cond_branch;
  661. case BPF_JMP | BPF_JNE | BPF_K:
  662. case BPF_JMP | BPF_JNE | BPF_X:
  663. true_cond = COND_NE;
  664. goto cond_branch;
  665. case BPF_JMP | BPF_JSET | BPF_K:
  666. case BPF_JMP | BPF_JSET | BPF_X:
  667. true_cond = COND_NE;
  668. /* Fall through */
  669. cond_branch:
  670. switch (code) {
  671. case BPF_JMP | BPF_JGT | BPF_X:
  672. case BPF_JMP | BPF_JGE | BPF_X:
  673. case BPF_JMP | BPF_JEQ | BPF_X:
  674. case BPF_JMP | BPF_JNE | BPF_X:
  675. /* unsigned comparison */
  676. PPC_CMPLD(dst_reg, src_reg);
  677. break;
  678. case BPF_JMP | BPF_JSGT | BPF_X:
  679. case BPF_JMP | BPF_JSGE | BPF_X:
  680. /* signed comparison */
  681. PPC_CMPD(dst_reg, src_reg);
  682. break;
  683. case BPF_JMP | BPF_JSET | BPF_X:
  684. PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, src_reg);
  685. break;
  686. case BPF_JMP | BPF_JNE | BPF_K:
  687. case BPF_JMP | BPF_JEQ | BPF_K:
  688. case BPF_JMP | BPF_JGT | BPF_K:
  689. case BPF_JMP | BPF_JGE | BPF_K:
  690. /*
  691. * Need sign-extended load, so only positive
  692. * values can be used as imm in cmpldi
  693. */
  694. if (imm >= 0 && imm < 32768)
  695. PPC_CMPLDI(dst_reg, imm);
  696. else {
  697. /* sign-extending load */
  698. PPC_LI32(b2p[TMP_REG_1], imm);
  699. /* ... but unsigned comparison */
  700. PPC_CMPLD(dst_reg, b2p[TMP_REG_1]);
  701. }
  702. break;
  703. case BPF_JMP | BPF_JSGT | BPF_K:
  704. case BPF_JMP | BPF_JSGE | BPF_K:
  705. /*
  706. * signed comparison, so any 16-bit value
  707. * can be used in cmpdi
  708. */
  709. if (imm >= -32768 && imm < 32768)
  710. PPC_CMPDI(dst_reg, imm);
  711. else {
  712. PPC_LI32(b2p[TMP_REG_1], imm);
  713. PPC_CMPD(dst_reg, b2p[TMP_REG_1]);
  714. }
  715. break;
  716. case BPF_JMP | BPF_JSET | BPF_K:
  717. /* andi does not sign-extend the immediate */
  718. if (imm >= 0 && imm < 32768)
  719. /* PPC_ANDI is _only/always_ dot-form */
  720. PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm);
  721. else {
  722. PPC_LI32(b2p[TMP_REG_1], imm);
  723. PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
  724. b2p[TMP_REG_1]);
  725. }
  726. break;
  727. }
  728. PPC_BCC(true_cond, addrs[i + 1 + off]);
  729. break;
  730. /*
  731. * Loads from packet header/data
  732. * Assume 32-bit input value in imm and X (src_reg)
  733. */
  734. /* Absolute loads */
  735. case BPF_LD | BPF_W | BPF_ABS:
  736. func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word);
  737. goto common_load_abs;
  738. case BPF_LD | BPF_H | BPF_ABS:
  739. func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half);
  740. goto common_load_abs;
  741. case BPF_LD | BPF_B | BPF_ABS:
  742. func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte);
  743. common_load_abs:
  744. /*
  745. * Load from [imm]
  746. * Load into r4, which can just be passed onto
  747. * skb load helpers as the second parameter
  748. */
  749. PPC_LI32(4, imm);
  750. goto common_load;
  751. /* Indirect loads */
  752. case BPF_LD | BPF_W | BPF_IND:
  753. func = (u8 *)sk_load_word;
  754. goto common_load_ind;
  755. case BPF_LD | BPF_H | BPF_IND:
  756. func = (u8 *)sk_load_half;
  757. goto common_load_ind;
  758. case BPF_LD | BPF_B | BPF_IND:
  759. func = (u8 *)sk_load_byte;
  760. common_load_ind:
  761. /*
  762. * Load from [src_reg + imm]
  763. * Treat src_reg as a 32-bit value
  764. */
  765. PPC_EXTSW(4, src_reg);
  766. if (imm) {
  767. if (imm >= -32768 && imm < 32768)
  768. PPC_ADDI(4, 4, IMM_L(imm));
  769. else {
  770. PPC_LI32(b2p[TMP_REG_1], imm);
  771. PPC_ADD(4, 4, b2p[TMP_REG_1]);
  772. }
  773. }
  774. common_load:
  775. ctx->seen |= SEEN_SKB;
  776. ctx->seen |= SEEN_FUNC;
  777. bpf_jit_emit_func_call(image, ctx, (u64)func);
  778. /*
  779. * Helper returns 'lt' condition on error, and an
  780. * appropriate return value in BPF_REG_0
  781. */
  782. PPC_BCC(COND_LT, exit_addr);
  783. break;
  784. /*
  785. * TODO: Tail call
  786. */
  787. case BPF_JMP | BPF_CALL | BPF_X:
  788. default:
  789. /*
  790. * The filter contains something cruel & unusual.
  791. * We don't handle it, but also there shouldn't be
  792. * anything missing from our list.
  793. */
  794. pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n",
  795. code, i);
  796. return -ENOTSUPP;
  797. }
  798. }
  799. /* Set end-of-body-code address for exit. */
  800. addrs[i] = ctx->idx * 4;
  801. return 0;
  802. }
  803. void bpf_jit_compile(struct bpf_prog *fp) { }
  804. struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
  805. {
  806. u32 proglen;
  807. u32 alloclen;
  808. u8 *image = NULL;
  809. u32 *code_base;
  810. u32 *addrs;
  811. struct codegen_context cgctx;
  812. int pass;
  813. int flen;
  814. struct bpf_binary_header *bpf_hdr;
  815. if (!bpf_jit_enable)
  816. return fp;
  817. flen = fp->len;
  818. addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
  819. if (addrs == NULL)
  820. return fp;
  821. cgctx.idx = 0;
  822. cgctx.seen = 0;
  823. /* Scouting faux-generate pass 0 */
  824. if (bpf_jit_build_body(fp, 0, &cgctx, addrs))
  825. /* We hit something illegal or unsupported. */
  826. goto out;
  827. /*
  828. * Pretend to build prologue, given the features we've seen. This will
  829. * update ctgtx.idx as it pretends to output instructions, then we can
  830. * calculate total size from idx.
  831. */
  832. bpf_jit_build_prologue(0, &cgctx);
  833. bpf_jit_build_epilogue(0, &cgctx);
  834. proglen = cgctx.idx * 4;
  835. alloclen = proglen + FUNCTION_DESCR_SIZE;
  836. bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4,
  837. bpf_jit_fill_ill_insns);
  838. if (!bpf_hdr)
  839. goto out;
  840. code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
  841. /* Code generation passes 1-2 */
  842. for (pass = 1; pass < 3; pass++) {
  843. /* Now build the prologue, body code & epilogue for real. */
  844. cgctx.idx = 0;
  845. bpf_jit_build_prologue(code_base, &cgctx);
  846. bpf_jit_build_body(fp, code_base, &cgctx, addrs);
  847. bpf_jit_build_epilogue(code_base, &cgctx);
  848. if (bpf_jit_enable > 1)
  849. pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
  850. proglen - (cgctx.idx * 4), cgctx.seen);
  851. }
  852. if (bpf_jit_enable > 1)
  853. /*
  854. * Note that we output the base address of the code_base
  855. * rather than image, since opcodes are in code_base.
  856. */
  857. bpf_jit_dump(flen, proglen, pass, code_base);
  858. if (image) {
  859. bpf_flush_icache(bpf_hdr, image + alloclen);
  860. #ifdef PPC64_ELF_ABI_v1
  861. /* Function descriptor nastiness: Address + TOC */
  862. ((u64 *)image)[0] = (u64)code_base;
  863. ((u64 *)image)[1] = local_paca->kernel_toc;
  864. #endif
  865. fp->bpf_func = (void *)image;
  866. fp->jited = 1;
  867. }
  868. out:
  869. kfree(addrs);
  870. return fp;
  871. }
  872. void bpf_jit_free(struct bpf_prog *fp)
  873. {
  874. unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
  875. struct bpf_binary_header *bpf_hdr = (void *)addr;
  876. if (fp->jited)
  877. bpf_jit_binary_free(bpf_hdr);
  878. bpf_prog_unlock_free(fp);
  879. }