bpf_jit_comp32.c 61 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Just-In-Time compiler for eBPF filters on IA32 (32bit x86)
  4. *
  5. * Author: Wang YanQing (udknight@gmail.com)
  6. * The code based on code and ideas from:
  7. * Eric Dumazet (eric.dumazet@gmail.com)
  8. * and from:
  9. * Shubham Bansal <illusionist.neo@gmail.com>
  10. */
  11. #include <linux/netdevice.h>
  12. #include <linux/filter.h>
  13. #include <linux/if_vlan.h>
  14. #include <asm/cacheflush.h>
  15. #include <asm/set_memory.h>
  16. #include <asm/nospec-branch.h>
  17. #include <linux/bpf.h>
  18. /*
  19. * eBPF prog stack layout:
  20. *
  21. * high
  22. * original ESP => +-----+
  23. * | | callee saved registers
  24. * +-----+
  25. * | ... | eBPF JIT scratch space
  26. * BPF_FP,IA32_EBP => +-----+
  27. * | ... | eBPF prog stack
  28. * +-----+
  29. * |RSVD | JIT scratchpad
  30. * current ESP => +-----+
  31. * | |
  32. * | ... | Function call stack
  33. * | |
  34. * +-----+
  35. * low
  36. *
  37. * The callee saved registers:
  38. *
  39. * high
  40. * original ESP => +------------------+ \
  41. * | ebp | |
  42. * current EBP => +------------------+ } callee saved registers
  43. * | ebx,esi,edi | |
  44. * +------------------+ /
  45. * low
  46. */
  47. static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
  48. {
  49. if (len == 1)
  50. *ptr = bytes;
  51. else if (len == 2)
  52. *(u16 *)ptr = bytes;
  53. else {
  54. *(u32 *)ptr = bytes;
  55. barrier();
  56. }
  57. return ptr + len;
  58. }
  59. #define EMIT(bytes, len) \
  60. do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
  61. #define EMIT1(b1) EMIT(b1, 1)
  62. #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
  63. #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
  64. #define EMIT4(b1, b2, b3, b4) \
  65. EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
  66. #define EMIT1_off32(b1, off) \
  67. do { EMIT1(b1); EMIT(off, 4); } while (0)
  68. #define EMIT2_off32(b1, b2, off) \
  69. do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
  70. #define EMIT3_off32(b1, b2, b3, off) \
  71. do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
  72. #define EMIT4_off32(b1, b2, b3, b4, off) \
  73. do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
  74. #define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len)
  75. static bool is_imm8(int value)
  76. {
  77. return value <= 127 && value >= -128;
  78. }
  79. static bool is_simm32(s64 value)
  80. {
  81. return value == (s64) (s32) value;
  82. }
  83. #define STACK_OFFSET(k) (k)
  84. #define TCALL_CNT (MAX_BPF_JIT_REG + 0) /* Tail Call Count */
  85. #define IA32_EAX (0x0)
  86. #define IA32_EBX (0x3)
  87. #define IA32_ECX (0x1)
  88. #define IA32_EDX (0x2)
  89. #define IA32_ESI (0x6)
  90. #define IA32_EDI (0x7)
  91. #define IA32_EBP (0x5)
  92. #define IA32_ESP (0x4)
  93. /*
  94. * List of x86 cond jumps opcodes (. + s8)
  95. * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
  96. */
  97. #define IA32_JB 0x72
  98. #define IA32_JAE 0x73
  99. #define IA32_JE 0x74
  100. #define IA32_JNE 0x75
  101. #define IA32_JBE 0x76
  102. #define IA32_JA 0x77
  103. #define IA32_JL 0x7C
  104. #define IA32_JGE 0x7D
  105. #define IA32_JLE 0x7E
  106. #define IA32_JG 0x7F
  107. /*
  108. * Map eBPF registers to IA32 32bit registers or stack scratch space.
  109. *
  110. * 1. All the registers, R0-R10, are mapped to scratch space on stack.
  111. * 2. We need two 64 bit temp registers to do complex operations on eBPF
  112. * registers.
  113. * 3. For performance reason, the BPF_REG_AX for blinding constant, is
  114. * mapped to real hardware register pair, IA32_ESI and IA32_EDI.
  115. *
  116. * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit
  117. * registers, we have to map each eBPF registers with two IA32 32 bit regs
  118. * or scratch memory space and we have to build eBPF 64 bit register from those.
  119. *
  120. * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers.
  121. */
  122. static const u8 bpf2ia32[][2] = {
  123. /* Return value from in-kernel function, and exit value from eBPF */
  124. [BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)},
  125. /* The arguments from eBPF program to in-kernel function */
  126. /* Stored on stack scratch space */
  127. [BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)},
  128. [BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)},
  129. [BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)},
  130. [BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)},
  131. [BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)},
  132. /* Callee saved registers that in-kernel function will preserve */
  133. /* Stored on stack scratch space */
  134. [BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)},
  135. [BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)},
  136. [BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)},
  137. [BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)},
  138. /* Read only Frame Pointer to access Stack */
  139. [BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)},
  140. /* Temporary register for blinding constants. */
  141. [BPF_REG_AX] = {IA32_ESI, IA32_EDI},
  142. /* Tail call count. Stored on stack scratch space. */
  143. [TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)},
  144. };
  145. #define dst_lo dst[0]
  146. #define dst_hi dst[1]
  147. #define src_lo src[0]
  148. #define src_hi src[1]
  149. #define STACK_ALIGNMENT 8
  150. /*
  151. * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4,
  152. * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9,
  153. * BPF_REG_FP, BPF_REG_AX and Tail call counts.
  154. */
  155. #define SCRATCH_SIZE 96
  156. /* Total stack size used in JITed code */
  157. #define _STACK_SIZE (stack_depth + SCRATCH_SIZE)
  158. #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
  159. /* Get the offset of eBPF REGISTERs stored on scratch space. */
  160. #define STACK_VAR(off) (off)
  161. /* Encode 'dst_reg' register into IA32 opcode 'byte' */
  162. static u8 add_1reg(u8 byte, u32 dst_reg)
  163. {
  164. return byte + dst_reg;
  165. }
  166. /* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */
  167. static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
  168. {
  169. return byte + dst_reg + (src_reg << 3);
  170. }
  171. static void jit_fill_hole(void *area, unsigned int size)
  172. {
  173. /* Fill whole space with int3 instructions */
  174. memset(area, 0xcc, size);
  175. }
  176. static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk,
  177. u8 **pprog)
  178. {
  179. u8 *prog = *pprog;
  180. int cnt = 0;
  181. if (dstk) {
  182. if (val == 0) {
  183. /* xor eax,eax */
  184. EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX));
  185. /* mov dword ptr [ebp+off],eax */
  186. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
  187. STACK_VAR(dst));
  188. } else {
  189. EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP),
  190. STACK_VAR(dst), val);
  191. }
  192. } else {
  193. if (val == 0)
  194. EMIT2(0x33, add_2reg(0xC0, dst, dst));
  195. else
  196. EMIT2_off32(0xC7, add_1reg(0xC0, dst),
  197. val);
  198. }
  199. *pprog = prog;
  200. }
  201. /* dst = imm (4 bytes)*/
  202. static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
  203. bool sstk, u8 **pprog)
  204. {
  205. u8 *prog = *pprog;
  206. int cnt = 0;
  207. u8 sreg = sstk ? IA32_EAX : src;
  208. if (sstk)
  209. /* mov eax,dword ptr [ebp+off] */
  210. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
  211. if (dstk)
  212. /* mov dword ptr [ebp+off],eax */
  213. EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst));
  214. else
  215. /* mov dst,sreg */
  216. EMIT2(0x89, add_2reg(0xC0, dst, sreg));
  217. *pprog = prog;
  218. }
  219. /* dst = src */
  220. static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
  221. const u8 src[], bool dstk,
  222. bool sstk, u8 **pprog)
  223. {
  224. emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
  225. if (is64)
  226. /* complete 8 byte move */
  227. emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
  228. else
  229. /* zero out high 4 bytes */
  230. emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
  231. }
  232. /* Sign extended move */
  233. static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[],
  234. const u32 val, bool dstk, u8 **pprog)
  235. {
  236. u32 hi = 0;
  237. if (is64 && (val & (1<<31)))
  238. hi = (u32)~0;
  239. emit_ia32_mov_i(dst_lo, val, dstk, pprog);
  240. emit_ia32_mov_i(dst_hi, hi, dstk, pprog);
  241. }
  242. /*
  243. * ALU operation (32 bit)
  244. * dst = dst * src
  245. */
  246. static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
  247. bool sstk, u8 **pprog)
  248. {
  249. u8 *prog = *pprog;
  250. int cnt = 0;
  251. u8 sreg = sstk ? IA32_ECX : src;
  252. if (sstk)
  253. /* mov ecx,dword ptr [ebp+off] */
  254. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
  255. if (dstk)
  256. /* mov eax,dword ptr [ebp+off] */
  257. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
  258. else
  259. /* mov eax,dst */
  260. EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
  261. EMIT2(0xF7, add_1reg(0xE0, sreg));
  262. if (dstk)
  263. /* mov dword ptr [ebp+off],eax */
  264. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
  265. STACK_VAR(dst));
  266. else
  267. /* mov dst,eax */
  268. EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
  269. *pprog = prog;
  270. }
  271. static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
  272. bool dstk, u8 **pprog)
  273. {
  274. u8 *prog = *pprog;
  275. int cnt = 0;
  276. u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
  277. u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
  278. if (dstk && val != 64) {
  279. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  280. STACK_VAR(dst_lo));
  281. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  282. STACK_VAR(dst_hi));
  283. }
  284. switch (val) {
  285. case 16:
  286. /*
  287. * Emit 'movzwl eax,ax' to zero extend 16-bit
  288. * into 64 bit
  289. */
  290. EMIT2(0x0F, 0xB7);
  291. EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
  292. /* xor dreg_hi,dreg_hi */
  293. EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
  294. break;
  295. case 32:
  296. /* xor dreg_hi,dreg_hi */
  297. EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
  298. break;
  299. case 64:
  300. /* nop */
  301. break;
  302. }
  303. if (dstk && val != 64) {
  304. /* mov dword ptr [ebp+off],dreg_lo */
  305. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
  306. STACK_VAR(dst_lo));
  307. /* mov dword ptr [ebp+off],dreg_hi */
  308. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
  309. STACK_VAR(dst_hi));
  310. }
  311. *pprog = prog;
  312. }
  313. static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
  314. bool dstk, u8 **pprog)
  315. {
  316. u8 *prog = *pprog;
  317. int cnt = 0;
  318. u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
  319. u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
  320. if (dstk) {
  321. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  322. STACK_VAR(dst_lo));
  323. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  324. STACK_VAR(dst_hi));
  325. }
  326. switch (val) {
  327. case 16:
  328. /* Emit 'ror %ax, 8' to swap lower 2 bytes */
  329. EMIT1(0x66);
  330. EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8);
  331. EMIT2(0x0F, 0xB7);
  332. EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
  333. /* xor dreg_hi,dreg_hi */
  334. EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
  335. break;
  336. case 32:
  337. /* Emit 'bswap eax' to swap lower 4 bytes */
  338. EMIT1(0x0F);
  339. EMIT1(add_1reg(0xC8, dreg_lo));
  340. /* xor dreg_hi,dreg_hi */
  341. EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
  342. break;
  343. case 64:
  344. /* Emit 'bswap eax' to swap lower 4 bytes */
  345. EMIT1(0x0F);
  346. EMIT1(add_1reg(0xC8, dreg_lo));
  347. /* Emit 'bswap edx' to swap lower 4 bytes */
  348. EMIT1(0x0F);
  349. EMIT1(add_1reg(0xC8, dreg_hi));
  350. /* mov ecx,dreg_hi */
  351. EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi));
  352. /* mov dreg_hi,dreg_lo */
  353. EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
  354. /* mov dreg_lo,ecx */
  355. EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
  356. break;
  357. }
  358. if (dstk) {
  359. /* mov dword ptr [ebp+off],dreg_lo */
  360. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
  361. STACK_VAR(dst_lo));
  362. /* mov dword ptr [ebp+off],dreg_hi */
  363. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
  364. STACK_VAR(dst_hi));
  365. }
  366. *pprog = prog;
  367. }
  368. /*
  369. * ALU operation (32 bit)
  370. * dst = dst (div|mod) src
  371. */
  372. static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src,
  373. bool dstk, bool sstk, u8 **pprog)
  374. {
  375. u8 *prog = *pprog;
  376. int cnt = 0;
  377. if (sstk)
  378. /* mov ecx,dword ptr [ebp+off] */
  379. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
  380. STACK_VAR(src));
  381. else if (src != IA32_ECX)
  382. /* mov ecx,src */
  383. EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
  384. if (dstk)
  385. /* mov eax,dword ptr [ebp+off] */
  386. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  387. STACK_VAR(dst));
  388. else
  389. /* mov eax,dst */
  390. EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
  391. /* xor edx,edx */
  392. EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX));
  393. /* div ecx */
  394. EMIT2(0xF7, add_1reg(0xF0, IA32_ECX));
  395. if (op == BPF_MOD) {
  396. if (dstk)
  397. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
  398. STACK_VAR(dst));
  399. else
  400. EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX));
  401. } else {
  402. if (dstk)
  403. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
  404. STACK_VAR(dst));
  405. else
  406. EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
  407. }
  408. *pprog = prog;
  409. }
  410. /*
  411. * ALU operation (32 bit)
  412. * dst = dst (shift) src
  413. */
  414. static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src,
  415. bool dstk, bool sstk, u8 **pprog)
  416. {
  417. u8 *prog = *pprog;
  418. int cnt = 0;
  419. u8 dreg = dstk ? IA32_EAX : dst;
  420. u8 b2;
  421. if (dstk)
  422. /* mov eax,dword ptr [ebp+off] */
  423. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
  424. if (sstk)
  425. /* mov ecx,dword ptr [ebp+off] */
  426. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
  427. else if (src != IA32_ECX)
  428. /* mov ecx,src */
  429. EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
  430. switch (op) {
  431. case BPF_LSH:
  432. b2 = 0xE0; break;
  433. case BPF_RSH:
  434. b2 = 0xE8; break;
  435. case BPF_ARSH:
  436. b2 = 0xF8; break;
  437. default:
  438. return;
  439. }
  440. EMIT2(0xD3, add_1reg(b2, dreg));
  441. if (dstk)
  442. /* mov dword ptr [ebp+off],dreg */
  443. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst));
  444. *pprog = prog;
  445. }
  446. /*
  447. * ALU operation (32 bit)
  448. * dst = dst (op) src
  449. */
  450. static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
  451. const u8 dst, const u8 src, bool dstk,
  452. bool sstk, u8 **pprog)
  453. {
  454. u8 *prog = *pprog;
  455. int cnt = 0;
  456. u8 sreg = sstk ? IA32_EAX : src;
  457. u8 dreg = dstk ? IA32_EDX : dst;
  458. if (sstk)
  459. /* mov eax,dword ptr [ebp+off] */
  460. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
  461. if (dstk)
  462. /* mov eax,dword ptr [ebp+off] */
  463. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst));
  464. switch (BPF_OP(op)) {
  465. /* dst = dst + src */
  466. case BPF_ADD:
  467. if (hi && is64)
  468. EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
  469. else
  470. EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
  471. break;
  472. /* dst = dst - src */
  473. case BPF_SUB:
  474. if (hi && is64)
  475. EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
  476. else
  477. EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
  478. break;
  479. /* dst = dst | src */
  480. case BPF_OR:
  481. EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
  482. break;
  483. /* dst = dst & src */
  484. case BPF_AND:
  485. EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
  486. break;
  487. /* dst = dst ^ src */
  488. case BPF_XOR:
  489. EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
  490. break;
  491. }
  492. if (dstk)
  493. /* mov dword ptr [ebp+off],dreg */
  494. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
  495. STACK_VAR(dst));
  496. *pprog = prog;
  497. }
  498. /* ALU operation (64 bit) */
  499. static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
  500. const u8 dst[], const u8 src[],
  501. bool dstk, bool sstk,
  502. u8 **pprog)
  503. {
  504. u8 *prog = *pprog;
  505. emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog);
  506. if (is64)
  507. emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
  508. &prog);
  509. else
  510. emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
  511. *pprog = prog;
  512. }
  513. /*
  514. * ALU operation (32 bit)
  515. * dst = dst (op) val
  516. */
  517. static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
  518. const u8 dst, const s32 val, bool dstk,
  519. u8 **pprog)
  520. {
  521. u8 *prog = *pprog;
  522. int cnt = 0;
  523. u8 dreg = dstk ? IA32_EAX : dst;
  524. u8 sreg = IA32_EDX;
  525. if (dstk)
  526. /* mov eax,dword ptr [ebp+off] */
  527. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
  528. if (!is_imm8(val))
  529. /* mov edx,imm32*/
  530. EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val);
  531. switch (op) {
  532. /* dst = dst + val */
  533. case BPF_ADD:
  534. if (hi && is64) {
  535. if (is_imm8(val))
  536. EMIT3(0x83, add_1reg(0xD0, dreg), val);
  537. else
  538. EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
  539. } else {
  540. if (is_imm8(val))
  541. EMIT3(0x83, add_1reg(0xC0, dreg), val);
  542. else
  543. EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
  544. }
  545. break;
  546. /* dst = dst - val */
  547. case BPF_SUB:
  548. if (hi && is64) {
  549. if (is_imm8(val))
  550. EMIT3(0x83, add_1reg(0xD8, dreg), val);
  551. else
  552. EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
  553. } else {
  554. if (is_imm8(val))
  555. EMIT3(0x83, add_1reg(0xE8, dreg), val);
  556. else
  557. EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
  558. }
  559. break;
  560. /* dst = dst | val */
  561. case BPF_OR:
  562. if (is_imm8(val))
  563. EMIT3(0x83, add_1reg(0xC8, dreg), val);
  564. else
  565. EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
  566. break;
  567. /* dst = dst & val */
  568. case BPF_AND:
  569. if (is_imm8(val))
  570. EMIT3(0x83, add_1reg(0xE0, dreg), val);
  571. else
  572. EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
  573. break;
  574. /* dst = dst ^ val */
  575. case BPF_XOR:
  576. if (is_imm8(val))
  577. EMIT3(0x83, add_1reg(0xF0, dreg), val);
  578. else
  579. EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
  580. break;
  581. case BPF_NEG:
  582. EMIT2(0xF7, add_1reg(0xD8, dreg));
  583. break;
  584. }
  585. if (dstk)
  586. /* mov dword ptr [ebp+off],dreg */
  587. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
  588. STACK_VAR(dst));
  589. *pprog = prog;
  590. }
  591. /* ALU operation (64 bit) */
  592. static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
  593. const u8 dst[], const u32 val,
  594. bool dstk, u8 **pprog)
  595. {
  596. u8 *prog = *pprog;
  597. u32 hi = 0;
  598. if (is64 && (val & (1<<31)))
  599. hi = (u32)~0;
  600. emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
  601. if (is64)
  602. emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
  603. else
  604. emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
  605. *pprog = prog;
  606. }
  607. /* dst = ~dst (64 bit) */
  608. static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
  609. {
  610. u8 *prog = *pprog;
  611. int cnt = 0;
  612. u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
  613. u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
  614. if (dstk) {
  615. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  616. STACK_VAR(dst_lo));
  617. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  618. STACK_VAR(dst_hi));
  619. }
  620. /* xor ecx,ecx */
  621. EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
  622. /* sub dreg_lo,ecx */
  623. EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX));
  624. /* mov dreg_lo,ecx */
  625. EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
  626. /* xor ecx,ecx */
  627. EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
  628. /* sbb dreg_hi,ecx */
  629. EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX));
  630. /* mov dreg_hi,ecx */
  631. EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX));
  632. if (dstk) {
  633. /* mov dword ptr [ebp+off],dreg_lo */
  634. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
  635. STACK_VAR(dst_lo));
  636. /* mov dword ptr [ebp+off],dreg_hi */
  637. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
  638. STACK_VAR(dst_hi));
  639. }
  640. *pprog = prog;
  641. }
  642. /* dst = dst << src */
  643. static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
  644. bool dstk, bool sstk, u8 **pprog)
  645. {
  646. u8 *prog = *pprog;
  647. int cnt = 0;
  648. static int jmp_label1 = -1;
  649. static int jmp_label2 = -1;
  650. static int jmp_label3 = -1;
  651. u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
  652. u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
  653. if (dstk) {
  654. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  655. STACK_VAR(dst_lo));
  656. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  657. STACK_VAR(dst_hi));
  658. }
  659. if (sstk)
  660. /* mov ecx,dword ptr [ebp+off] */
  661. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
  662. STACK_VAR(src_lo));
  663. else
  664. /* mov ecx,src_lo */
  665. EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
  666. /* cmp ecx,32 */
  667. EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
  668. /* Jumps when >= 32 */
  669. if (is_imm8(jmp_label(jmp_label1, 2)))
  670. EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
  671. else
  672. EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
  673. /* < 32 */
  674. /* shl dreg_hi,cl */
  675. EMIT2(0xD3, add_1reg(0xE0, dreg_hi));
  676. /* mov ebx,dreg_lo */
  677. EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
  678. /* shl dreg_lo,cl */
  679. EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
  680. /* IA32_ECX = -IA32_ECX + 32 */
  681. /* neg ecx */
  682. EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
  683. /* add ecx,32 */
  684. EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
  685. /* shr ebx,cl */
  686. EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
  687. /* or dreg_hi,ebx */
  688. EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
  689. /* goto out; */
  690. if (is_imm8(jmp_label(jmp_label3, 2)))
  691. EMIT2(0xEB, jmp_label(jmp_label3, 2));
  692. else
  693. EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
  694. /* >= 32 */
  695. if (jmp_label1 == -1)
  696. jmp_label1 = cnt;
  697. /* cmp ecx,64 */
  698. EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
  699. /* Jumps when >= 64 */
  700. if (is_imm8(jmp_label(jmp_label2, 2)))
  701. EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
  702. else
  703. EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
  704. /* >= 32 && < 64 */
  705. /* sub ecx,32 */
  706. EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
  707. /* shl dreg_lo,cl */
  708. EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
  709. /* mov dreg_hi,dreg_lo */
  710. EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
  711. /* xor dreg_lo,dreg_lo */
  712. EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
  713. /* goto out; */
  714. if (is_imm8(jmp_label(jmp_label3, 2)))
  715. EMIT2(0xEB, jmp_label(jmp_label3, 2));
  716. else
  717. EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
  718. /* >= 64 */
  719. if (jmp_label2 == -1)
  720. jmp_label2 = cnt;
  721. /* xor dreg_lo,dreg_lo */
  722. EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
  723. /* xor dreg_hi,dreg_hi */
  724. EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
  725. if (jmp_label3 == -1)
  726. jmp_label3 = cnt;
  727. if (dstk) {
  728. /* mov dword ptr [ebp+off],dreg_lo */
  729. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
  730. STACK_VAR(dst_lo));
  731. /* mov dword ptr [ebp+off],dreg_hi */
  732. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
  733. STACK_VAR(dst_hi));
  734. }
  735. /* out: */
  736. *pprog = prog;
  737. }
  738. /* dst = dst >> src (signed)*/
  739. static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
  740. bool dstk, bool sstk, u8 **pprog)
  741. {
  742. u8 *prog = *pprog;
  743. int cnt = 0;
  744. static int jmp_label1 = -1;
  745. static int jmp_label2 = -1;
  746. static int jmp_label3 = -1;
  747. u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
  748. u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
  749. if (dstk) {
  750. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  751. STACK_VAR(dst_lo));
  752. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  753. STACK_VAR(dst_hi));
  754. }
  755. if (sstk)
  756. /* mov ecx,dword ptr [ebp+off] */
  757. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
  758. STACK_VAR(src_lo));
  759. else
  760. /* mov ecx,src_lo */
  761. EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
  762. /* cmp ecx,32 */
  763. EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
  764. /* Jumps when >= 32 */
  765. if (is_imm8(jmp_label(jmp_label1, 2)))
  766. EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
  767. else
  768. EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
  769. /* < 32 */
  770. /* lshr dreg_lo,cl */
  771. EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
  772. /* mov ebx,dreg_hi */
  773. EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
  774. /* ashr dreg_hi,cl */
  775. EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
  776. /* IA32_ECX = -IA32_ECX + 32 */
  777. /* neg ecx */
  778. EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
  779. /* add ecx,32 */
  780. EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
  781. /* shl ebx,cl */
  782. EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
  783. /* or dreg_lo,ebx */
  784. EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
  785. /* goto out; */
  786. if (is_imm8(jmp_label(jmp_label3, 2)))
  787. EMIT2(0xEB, jmp_label(jmp_label3, 2));
  788. else
  789. EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
  790. /* >= 32 */
  791. if (jmp_label1 == -1)
  792. jmp_label1 = cnt;
  793. /* cmp ecx,64 */
  794. EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
  795. /* Jumps when >= 64 */
  796. if (is_imm8(jmp_label(jmp_label2, 2)))
  797. EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
  798. else
  799. EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
  800. /* >= 32 && < 64 */
  801. /* sub ecx,32 */
  802. EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
  803. /* ashr dreg_hi,cl */
  804. EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
  805. /* mov dreg_lo,dreg_hi */
  806. EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
  807. /* ashr dreg_hi,imm8 */
  808. EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
  809. /* goto out; */
  810. if (is_imm8(jmp_label(jmp_label3, 2)))
  811. EMIT2(0xEB, jmp_label(jmp_label3, 2));
  812. else
  813. EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
  814. /* >= 64 */
  815. if (jmp_label2 == -1)
  816. jmp_label2 = cnt;
  817. /* ashr dreg_hi,imm8 */
  818. EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
  819. /* mov dreg_lo,dreg_hi */
  820. EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
  821. if (jmp_label3 == -1)
  822. jmp_label3 = cnt;
  823. if (dstk) {
  824. /* mov dword ptr [ebp+off],dreg_lo */
  825. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
  826. STACK_VAR(dst_lo));
  827. /* mov dword ptr [ebp+off],dreg_hi */
  828. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
  829. STACK_VAR(dst_hi));
  830. }
  831. /* out: */
  832. *pprog = prog;
  833. }
  834. /* dst = dst >> src */
  835. static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
  836. bool sstk, u8 **pprog)
  837. {
  838. u8 *prog = *pprog;
  839. int cnt = 0;
  840. static int jmp_label1 = -1;
  841. static int jmp_label2 = -1;
  842. static int jmp_label3 = -1;
  843. u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
  844. u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
  845. if (dstk) {
  846. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  847. STACK_VAR(dst_lo));
  848. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  849. STACK_VAR(dst_hi));
  850. }
  851. if (sstk)
  852. /* mov ecx,dword ptr [ebp+off] */
  853. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
  854. STACK_VAR(src_lo));
  855. else
  856. /* mov ecx,src_lo */
  857. EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
  858. /* cmp ecx,32 */
  859. EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
  860. /* Jumps when >= 32 */
  861. if (is_imm8(jmp_label(jmp_label1, 2)))
  862. EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
  863. else
  864. EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
  865. /* < 32 */
  866. /* lshr dreg_lo,cl */
  867. EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
  868. /* mov ebx,dreg_hi */
  869. EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
  870. /* shr dreg_hi,cl */
  871. EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
  872. /* IA32_ECX = -IA32_ECX + 32 */
  873. /* neg ecx */
  874. EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
  875. /* add ecx,32 */
  876. EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
  877. /* shl ebx,cl */
  878. EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
  879. /* or dreg_lo,ebx */
  880. EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
  881. /* goto out; */
  882. if (is_imm8(jmp_label(jmp_label3, 2)))
  883. EMIT2(0xEB, jmp_label(jmp_label3, 2));
  884. else
  885. EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
  886. /* >= 32 */
  887. if (jmp_label1 == -1)
  888. jmp_label1 = cnt;
  889. /* cmp ecx,64 */
  890. EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
  891. /* Jumps when >= 64 */
  892. if (is_imm8(jmp_label(jmp_label2, 2)))
  893. EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
  894. else
  895. EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
  896. /* >= 32 && < 64 */
  897. /* sub ecx,32 */
  898. EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
  899. /* shr dreg_hi,cl */
  900. EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
  901. /* mov dreg_lo,dreg_hi */
  902. EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
  903. /* xor dreg_hi,dreg_hi */
  904. EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
  905. /* goto out; */
  906. if (is_imm8(jmp_label(jmp_label3, 2)))
  907. EMIT2(0xEB, jmp_label(jmp_label3, 2));
  908. else
  909. EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
  910. /* >= 64 */
  911. if (jmp_label2 == -1)
  912. jmp_label2 = cnt;
  913. /* xor dreg_lo,dreg_lo */
  914. EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
  915. /* xor dreg_hi,dreg_hi */
  916. EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
  917. if (jmp_label3 == -1)
  918. jmp_label3 = cnt;
  919. if (dstk) {
  920. /* mov dword ptr [ebp+off],dreg_lo */
  921. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
  922. STACK_VAR(dst_lo));
  923. /* mov dword ptr [ebp+off],dreg_hi */
  924. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
  925. STACK_VAR(dst_hi));
  926. }
  927. /* out: */
  928. *pprog = prog;
  929. }
  930. /* dst = dst << val */
  931. static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
  932. bool dstk, u8 **pprog)
  933. {
  934. u8 *prog = *pprog;
  935. int cnt = 0;
  936. u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
  937. u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
  938. if (dstk) {
  939. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  940. STACK_VAR(dst_lo));
  941. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  942. STACK_VAR(dst_hi));
  943. }
  944. /* Do LSH operation */
  945. if (val < 32) {
  946. /* shl dreg_hi,imm8 */
  947. EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val);
  948. /* mov ebx,dreg_lo */
  949. EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
  950. /* shl dreg_lo,imm8 */
  951. EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
  952. /* IA32_ECX = 32 - val */
  953. /* mov ecx,val */
  954. EMIT2(0xB1, val);
  955. /* movzx ecx,ecx */
  956. EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
  957. /* neg ecx */
  958. EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
  959. /* add ecx,32 */
  960. EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
  961. /* shr ebx,cl */
  962. EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
  963. /* or dreg_hi,ebx */
  964. EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
  965. } else if (val >= 32 && val < 64) {
  966. u32 value = val - 32;
  967. /* shl dreg_lo,imm8 */
  968. EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value);
  969. /* mov dreg_hi,dreg_lo */
  970. EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
  971. /* xor dreg_lo,dreg_lo */
  972. EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
  973. } else {
  974. /* xor dreg_lo,dreg_lo */
  975. EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
  976. /* xor dreg_hi,dreg_hi */
  977. EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
  978. }
  979. if (dstk) {
  980. /* mov dword ptr [ebp+off],dreg_lo */
  981. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
  982. STACK_VAR(dst_lo));
  983. /* mov dword ptr [ebp+off],dreg_hi */
  984. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
  985. STACK_VAR(dst_hi));
  986. }
  987. *pprog = prog;
  988. }
  989. /* dst = dst >> val */
  990. static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
  991. bool dstk, u8 **pprog)
  992. {
  993. u8 *prog = *pprog;
  994. int cnt = 0;
  995. u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
  996. u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
  997. if (dstk) {
  998. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  999. STACK_VAR(dst_lo));
  1000. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  1001. STACK_VAR(dst_hi));
  1002. }
  1003. /* Do RSH operation */
  1004. if (val < 32) {
  1005. /* shr dreg_lo,imm8 */
  1006. EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
  1007. /* mov ebx,dreg_hi */
  1008. EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
  1009. /* shr dreg_hi,imm8 */
  1010. EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
  1011. /* IA32_ECX = 32 - val */
  1012. /* mov ecx,val */
  1013. EMIT2(0xB1, val);
  1014. /* movzx ecx,ecx */
  1015. EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
  1016. /* neg ecx */
  1017. EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
  1018. /* add ecx,32 */
  1019. EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
  1020. /* shl ebx,cl */
  1021. EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
  1022. /* or dreg_lo,ebx */
  1023. EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
  1024. } else if (val >= 32 && val < 64) {
  1025. u32 value = val - 32;
  1026. /* shr dreg_hi,imm8 */
  1027. EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value);
  1028. /* mov dreg_lo,dreg_hi */
  1029. EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
  1030. /* xor dreg_hi,dreg_hi */
  1031. EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
  1032. } else {
  1033. /* xor dreg_lo,dreg_lo */
  1034. EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
  1035. /* xor dreg_hi,dreg_hi */
  1036. EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
  1037. }
  1038. if (dstk) {
  1039. /* mov dword ptr [ebp+off],dreg_lo */
  1040. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
  1041. STACK_VAR(dst_lo));
  1042. /* mov dword ptr [ebp+off],dreg_hi */
  1043. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
  1044. STACK_VAR(dst_hi));
  1045. }
  1046. *pprog = prog;
  1047. }
  1048. /* dst = dst >> val (signed) */
  1049. static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
  1050. bool dstk, u8 **pprog)
  1051. {
  1052. u8 *prog = *pprog;
  1053. int cnt = 0;
  1054. u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
  1055. u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
  1056. if (dstk) {
  1057. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1058. STACK_VAR(dst_lo));
  1059. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  1060. STACK_VAR(dst_hi));
  1061. }
  1062. /* Do RSH operation */
  1063. if (val < 32) {
  1064. /* shr dreg_lo,imm8 */
  1065. EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
  1066. /* mov ebx,dreg_hi */
  1067. EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
  1068. /* ashr dreg_hi,imm8 */
  1069. EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
  1070. /* IA32_ECX = 32 - val */
  1071. /* mov ecx,val */
  1072. EMIT2(0xB1, val);
  1073. /* movzx ecx,ecx */
  1074. EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
  1075. /* neg ecx */
  1076. EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
  1077. /* add ecx,32 */
  1078. EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
  1079. /* shl ebx,cl */
  1080. EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
  1081. /* or dreg_lo,ebx */
  1082. EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
  1083. } else if (val >= 32 && val < 64) {
  1084. u32 value = val - 32;
  1085. /* ashr dreg_hi,imm8 */
  1086. EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value);
  1087. /* mov dreg_lo,dreg_hi */
  1088. EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
  1089. /* ashr dreg_hi,imm8 */
  1090. EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
  1091. } else {
  1092. /* ashr dreg_hi,imm8 */
  1093. EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
  1094. /* mov dreg_lo,dreg_hi */
  1095. EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
  1096. }
  1097. if (dstk) {
  1098. /* mov dword ptr [ebp+off],dreg_lo */
  1099. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
  1100. STACK_VAR(dst_lo));
  1101. /* mov dword ptr [ebp+off],dreg_hi */
  1102. EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
  1103. STACK_VAR(dst_hi));
  1104. }
  1105. *pprog = prog;
  1106. }
  1107. static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
  1108. bool sstk, u8 **pprog)
  1109. {
  1110. u8 *prog = *pprog;
  1111. int cnt = 0;
  1112. if (dstk)
  1113. /* mov eax,dword ptr [ebp+off] */
  1114. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1115. STACK_VAR(dst_hi));
  1116. else
  1117. /* mov eax,dst_hi */
  1118. EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX));
  1119. if (sstk)
  1120. /* mul dword ptr [ebp+off] */
  1121. EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
  1122. else
  1123. /* mul src_lo */
  1124. EMIT2(0xF7, add_1reg(0xE0, src_lo));
  1125. /* mov ecx,eax */
  1126. EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
  1127. if (dstk)
  1128. /* mov eax,dword ptr [ebp+off] */
  1129. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1130. STACK_VAR(dst_lo));
  1131. else
  1132. /* mov eax,dst_lo */
  1133. EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
  1134. if (sstk)
  1135. /* mul dword ptr [ebp+off] */
  1136. EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi));
  1137. else
  1138. /* mul src_hi */
  1139. EMIT2(0xF7, add_1reg(0xE0, src_hi));
  1140. /* add eax,eax */
  1141. EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
  1142. if (dstk)
  1143. /* mov eax,dword ptr [ebp+off] */
  1144. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1145. STACK_VAR(dst_lo));
  1146. else
  1147. /* mov eax,dst_lo */
  1148. EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
  1149. if (sstk)
  1150. /* mul dword ptr [ebp+off] */
  1151. EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
  1152. else
  1153. /* mul src_lo */
  1154. EMIT2(0xF7, add_1reg(0xE0, src_lo));
  1155. /* add ecx,edx */
  1156. EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
  1157. if (dstk) {
  1158. /* mov dword ptr [ebp+off],eax */
  1159. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1160. STACK_VAR(dst_lo));
  1161. /* mov dword ptr [ebp+off],ecx */
  1162. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
  1163. STACK_VAR(dst_hi));
  1164. } else {
  1165. /* mov dst_lo,eax */
  1166. EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
  1167. /* mov dst_hi,ecx */
  1168. EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
  1169. }
  1170. *pprog = prog;
  1171. }
  1172. static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val,
  1173. bool dstk, u8 **pprog)
  1174. {
  1175. u8 *prog = *pprog;
  1176. int cnt = 0;
  1177. u32 hi;
  1178. hi = val & (1<<31) ? (u32)~0 : 0;
  1179. /* movl eax,imm32 */
  1180. EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
  1181. if (dstk)
  1182. /* mul dword ptr [ebp+off] */
  1183. EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi));
  1184. else
  1185. /* mul dst_hi */
  1186. EMIT2(0xF7, add_1reg(0xE0, dst_hi));
  1187. /* mov ecx,eax */
  1188. EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
  1189. /* movl eax,imm32 */
  1190. EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi);
  1191. if (dstk)
  1192. /* mul dword ptr [ebp+off] */
  1193. EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
  1194. else
  1195. /* mul dst_lo */
  1196. EMIT2(0xF7, add_1reg(0xE0, dst_lo));
  1197. /* add ecx,eax */
  1198. EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
  1199. /* movl eax,imm32 */
  1200. EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
  1201. if (dstk)
  1202. /* mul dword ptr [ebp+off] */
  1203. EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
  1204. else
  1205. /* mul dst_lo */
  1206. EMIT2(0xF7, add_1reg(0xE0, dst_lo));
  1207. /* add ecx,edx */
  1208. EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
  1209. if (dstk) {
  1210. /* mov dword ptr [ebp+off],eax */
  1211. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1212. STACK_VAR(dst_lo));
  1213. /* mov dword ptr [ebp+off],ecx */
  1214. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
  1215. STACK_VAR(dst_hi));
  1216. } else {
  1217. /* mov dword ptr [ebp+off],eax */
  1218. EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
  1219. /* mov dword ptr [ebp+off],ecx */
  1220. EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
  1221. }
  1222. *pprog = prog;
  1223. }
  1224. static int bpf_size_to_x86_bytes(int bpf_size)
  1225. {
  1226. if (bpf_size == BPF_W)
  1227. return 4;
  1228. else if (bpf_size == BPF_H)
  1229. return 2;
  1230. else if (bpf_size == BPF_B)
  1231. return 1;
  1232. else if (bpf_size == BPF_DW)
  1233. return 4; /* imm32 */
  1234. else
  1235. return 0;
  1236. }
  1237. struct jit_context {
  1238. int cleanup_addr; /* Epilogue code offset */
  1239. };
  1240. /* Maximum number of bytes emitted while JITing one eBPF insn */
  1241. #define BPF_MAX_INSN_SIZE 128
  1242. #define BPF_INSN_SAFETY 64
  1243. #define PROLOGUE_SIZE 35
  1244. /*
  1245. * Emit prologue code for BPF program and check it's size.
  1246. * bpf_tail_call helper will skip it while jumping into another program.
  1247. */
  1248. static void emit_prologue(u8 **pprog, u32 stack_depth)
  1249. {
  1250. u8 *prog = *pprog;
  1251. int cnt = 0;
  1252. const u8 *r1 = bpf2ia32[BPF_REG_1];
  1253. const u8 fplo = bpf2ia32[BPF_REG_FP][0];
  1254. const u8 fphi = bpf2ia32[BPF_REG_FP][1];
  1255. const u8 *tcc = bpf2ia32[TCALL_CNT];
  1256. /* push ebp */
  1257. EMIT1(0x55);
  1258. /* mov ebp,esp */
  1259. EMIT2(0x89, 0xE5);
  1260. /* push edi */
  1261. EMIT1(0x57);
  1262. /* push esi */
  1263. EMIT1(0x56);
  1264. /* push ebx */
  1265. EMIT1(0x53);
  1266. /* sub esp,STACK_SIZE */
  1267. EMIT2_off32(0x81, 0xEC, STACK_SIZE);
  1268. /* sub ebp,SCRATCH_SIZE+4+12*/
  1269. EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16);
  1270. /* xor ebx,ebx */
  1271. EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
  1272. /* Set up BPF prog stack base register */
  1273. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo));
  1274. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi));
  1275. /* Move BPF_CTX (EAX) to BPF_REG_R1 */
  1276. /* mov dword ptr [ebp+off],eax */
  1277. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
  1278. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1]));
  1279. /* Initialize Tail Count */
  1280. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0]));
  1281. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
  1282. BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
  1283. *pprog = prog;
  1284. }
  1285. /* Emit epilogue code for BPF program */
  1286. static void emit_epilogue(u8 **pprog, u32 stack_depth)
  1287. {
  1288. u8 *prog = *pprog;
  1289. const u8 *r0 = bpf2ia32[BPF_REG_0];
  1290. int cnt = 0;
  1291. /* mov eax,dword ptr [ebp+off]*/
  1292. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0]));
  1293. /* mov edx,dword ptr [ebp+off]*/
  1294. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
  1295. /* add ebp,SCRATCH_SIZE+4+12*/
  1296. EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16);
  1297. /* mov ebx,dword ptr [ebp-12]*/
  1298. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
  1299. /* mov esi,dword ptr [ebp-8]*/
  1300. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8);
  1301. /* mov edi,dword ptr [ebp-4]*/
  1302. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4);
  1303. EMIT1(0xC9); /* leave */
  1304. EMIT1(0xC3); /* ret */
  1305. *pprog = prog;
  1306. }
  1307. /*
  1308. * Generate the following code:
  1309. * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
  1310. * if (index >= array->map.max_entries)
  1311. * goto out;
  1312. * if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
  1313. * goto out;
  1314. * prog = array->ptrs[index];
  1315. * if (prog == NULL)
  1316. * goto out;
  1317. * goto *(prog->bpf_func + prologue_size);
  1318. * out:
  1319. */
  1320. static void emit_bpf_tail_call(u8 **pprog)
  1321. {
  1322. u8 *prog = *pprog;
  1323. int cnt = 0;
  1324. const u8 *r1 = bpf2ia32[BPF_REG_1];
  1325. const u8 *r2 = bpf2ia32[BPF_REG_2];
  1326. const u8 *r3 = bpf2ia32[BPF_REG_3];
  1327. const u8 *tcc = bpf2ia32[TCALL_CNT];
  1328. u32 lo, hi;
  1329. static int jmp_label1 = -1;
  1330. /*
  1331. * if (index >= array->map.max_entries)
  1332. * goto out;
  1333. */
  1334. /* mov eax,dword ptr [ebp+off] */
  1335. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0]));
  1336. /* mov edx,dword ptr [ebp+off] */
  1337. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0]));
  1338. /* cmp dword ptr [eax+off],edx */
  1339. EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX),
  1340. offsetof(struct bpf_array, map.max_entries));
  1341. /* jbe out */
  1342. EMIT2(IA32_JBE, jmp_label(jmp_label1, 2));
  1343. /*
  1344. * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
  1345. * goto out;
  1346. */
  1347. lo = (u32)MAX_TAIL_CALL_CNT;
  1348. hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
  1349. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
  1350. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
  1351. /* cmp edx,hi */
  1352. EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi);
  1353. EMIT2(IA32_JNE, 3);
  1354. /* cmp ecx,lo */
  1355. EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo);
  1356. /* ja out */
  1357. EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
  1358. /* add eax,0x1 */
  1359. EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01);
  1360. /* adc ebx,0x0 */
  1361. EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00);
  1362. /* mov dword ptr [ebp+off],eax */
  1363. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
  1364. /* mov dword ptr [ebp+off],edx */
  1365. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
  1366. /* prog = array->ptrs[index]; */
  1367. /* mov edx, [eax + edx * 4 + offsetof(...)] */
  1368. EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs));
  1369. /*
  1370. * if (prog == NULL)
  1371. * goto out;
  1372. */
  1373. /* test edx,edx */
  1374. EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX));
  1375. /* je out */
  1376. EMIT2(IA32_JE, jmp_label(jmp_label1, 2));
  1377. /* goto *(prog->bpf_func + prologue_size); */
  1378. /* mov edx, dword ptr [edx + 32] */
  1379. EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX),
  1380. offsetof(struct bpf_prog, bpf_func));
  1381. /* add edx,prologue_size */
  1382. EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE);
  1383. /* mov eax,dword ptr [ebp+off] */
  1384. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
  1385. /*
  1386. * Now we're ready to jump into next BPF program:
  1387. * eax == ctx (1st arg)
  1388. * edx == prog->bpf_func + prologue_size
  1389. */
  1390. RETPOLINE_EDX_BPF_JIT();
  1391. if (jmp_label1 == -1)
  1392. jmp_label1 = cnt;
  1393. /* out: */
  1394. *pprog = prog;
  1395. }
  1396. /* Push the scratch stack register on top of the stack. */
  1397. static inline void emit_push_r64(const u8 src[], u8 **pprog)
  1398. {
  1399. u8 *prog = *pprog;
  1400. int cnt = 0;
  1401. /* mov ecx,dword ptr [ebp+off] */
  1402. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi));
  1403. /* push ecx */
  1404. EMIT1(0x51);
  1405. /* mov ecx,dword ptr [ebp+off] */
  1406. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
  1407. /* push ecx */
  1408. EMIT1(0x51);
  1409. *pprog = prog;
  1410. }
  1411. static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
  1412. int oldproglen, struct jit_context *ctx)
  1413. {
  1414. struct bpf_insn *insn = bpf_prog->insnsi;
  1415. int insn_cnt = bpf_prog->len;
  1416. bool seen_exit = false;
  1417. u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
  1418. int i, cnt = 0;
  1419. int proglen = 0;
  1420. u8 *prog = temp;
  1421. emit_prologue(&prog, bpf_prog->aux->stack_depth);
  1422. for (i = 0; i < insn_cnt; i++, insn++) {
  1423. const s32 imm32 = insn->imm;
  1424. const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
  1425. const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true;
  1426. const bool sstk = insn->src_reg == BPF_REG_AX ? false : true;
  1427. const u8 code = insn->code;
  1428. const u8 *dst = bpf2ia32[insn->dst_reg];
  1429. const u8 *src = bpf2ia32[insn->src_reg];
  1430. const u8 *r0 = bpf2ia32[BPF_REG_0];
  1431. s64 jmp_offset;
  1432. u8 jmp_cond;
  1433. int ilen;
  1434. u8 *func;
  1435. switch (code) {
  1436. /* ALU operations */
  1437. /* dst = src */
  1438. case BPF_ALU | BPF_MOV | BPF_K:
  1439. case BPF_ALU | BPF_MOV | BPF_X:
  1440. case BPF_ALU64 | BPF_MOV | BPF_K:
  1441. case BPF_ALU64 | BPF_MOV | BPF_X:
  1442. switch (BPF_SRC(code)) {
  1443. case BPF_X:
  1444. emit_ia32_mov_r64(is64, dst, src, dstk,
  1445. sstk, &prog);
  1446. break;
  1447. case BPF_K:
  1448. /* Sign-extend immediate value to dst reg */
  1449. emit_ia32_mov_i64(is64, dst, imm32,
  1450. dstk, &prog);
  1451. break;
  1452. }
  1453. break;
  1454. /* dst = dst + src/imm */
  1455. /* dst = dst - src/imm */
  1456. /* dst = dst | src/imm */
  1457. /* dst = dst & src/imm */
  1458. /* dst = dst ^ src/imm */
  1459. /* dst = dst * src/imm */
  1460. /* dst = dst << src */
  1461. /* dst = dst >> src */
  1462. case BPF_ALU | BPF_ADD | BPF_K:
  1463. case BPF_ALU | BPF_ADD | BPF_X:
  1464. case BPF_ALU | BPF_SUB | BPF_K:
  1465. case BPF_ALU | BPF_SUB | BPF_X:
  1466. case BPF_ALU | BPF_OR | BPF_K:
  1467. case BPF_ALU | BPF_OR | BPF_X:
  1468. case BPF_ALU | BPF_AND | BPF_K:
  1469. case BPF_ALU | BPF_AND | BPF_X:
  1470. case BPF_ALU | BPF_XOR | BPF_K:
  1471. case BPF_ALU | BPF_XOR | BPF_X:
  1472. case BPF_ALU64 | BPF_ADD | BPF_K:
  1473. case BPF_ALU64 | BPF_ADD | BPF_X:
  1474. case BPF_ALU64 | BPF_SUB | BPF_K:
  1475. case BPF_ALU64 | BPF_SUB | BPF_X:
  1476. case BPF_ALU64 | BPF_OR | BPF_K:
  1477. case BPF_ALU64 | BPF_OR | BPF_X:
  1478. case BPF_ALU64 | BPF_AND | BPF_K:
  1479. case BPF_ALU64 | BPF_AND | BPF_X:
  1480. case BPF_ALU64 | BPF_XOR | BPF_K:
  1481. case BPF_ALU64 | BPF_XOR | BPF_X:
  1482. switch (BPF_SRC(code)) {
  1483. case BPF_X:
  1484. emit_ia32_alu_r64(is64, BPF_OP(code), dst,
  1485. src, dstk, sstk, &prog);
  1486. break;
  1487. case BPF_K:
  1488. emit_ia32_alu_i64(is64, BPF_OP(code), dst,
  1489. imm32, dstk, &prog);
  1490. break;
  1491. }
  1492. break;
  1493. case BPF_ALU | BPF_MUL | BPF_K:
  1494. case BPF_ALU | BPF_MUL | BPF_X:
  1495. switch (BPF_SRC(code)) {
  1496. case BPF_X:
  1497. emit_ia32_mul_r(dst_lo, src_lo, dstk,
  1498. sstk, &prog);
  1499. break;
  1500. case BPF_K:
  1501. /* mov ecx,imm32*/
  1502. EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
  1503. imm32);
  1504. emit_ia32_mul_r(dst_lo, IA32_ECX, dstk,
  1505. false, &prog);
  1506. break;
  1507. }
  1508. emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
  1509. break;
  1510. case BPF_ALU | BPF_LSH | BPF_X:
  1511. case BPF_ALU | BPF_RSH | BPF_X:
  1512. case BPF_ALU | BPF_ARSH | BPF_K:
  1513. case BPF_ALU | BPF_ARSH | BPF_X:
  1514. switch (BPF_SRC(code)) {
  1515. case BPF_X:
  1516. emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo,
  1517. dstk, sstk, &prog);
  1518. break;
  1519. case BPF_K:
  1520. /* mov ecx,imm32*/
  1521. EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
  1522. imm32);
  1523. emit_ia32_shift_r(BPF_OP(code), dst_lo,
  1524. IA32_ECX, dstk, false,
  1525. &prog);
  1526. break;
  1527. }
  1528. emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
  1529. break;
  1530. /* dst = dst / src(imm) */
  1531. /* dst = dst % src(imm) */
  1532. case BPF_ALU | BPF_DIV | BPF_K:
  1533. case BPF_ALU | BPF_DIV | BPF_X:
  1534. case BPF_ALU | BPF_MOD | BPF_K:
  1535. case BPF_ALU | BPF_MOD | BPF_X:
  1536. switch (BPF_SRC(code)) {
  1537. case BPF_X:
  1538. emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
  1539. src_lo, dstk, sstk, &prog);
  1540. break;
  1541. case BPF_K:
  1542. /* mov ecx,imm32*/
  1543. EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
  1544. imm32);
  1545. emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
  1546. IA32_ECX, dstk, false,
  1547. &prog);
  1548. break;
  1549. }
  1550. emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
  1551. break;
  1552. case BPF_ALU64 | BPF_DIV | BPF_K:
  1553. case BPF_ALU64 | BPF_DIV | BPF_X:
  1554. case BPF_ALU64 | BPF_MOD | BPF_K:
  1555. case BPF_ALU64 | BPF_MOD | BPF_X:
  1556. goto notyet;
  1557. /* dst = dst >> imm */
  1558. /* dst = dst << imm */
  1559. case BPF_ALU | BPF_RSH | BPF_K:
  1560. case BPF_ALU | BPF_LSH | BPF_K:
  1561. if (unlikely(imm32 > 31))
  1562. return -EINVAL;
  1563. /* mov ecx,imm32*/
  1564. EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
  1565. emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
  1566. false, &prog);
  1567. emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
  1568. break;
  1569. /* dst = dst << imm */
  1570. case BPF_ALU64 | BPF_LSH | BPF_K:
  1571. if (unlikely(imm32 > 63))
  1572. return -EINVAL;
  1573. emit_ia32_lsh_i64(dst, imm32, dstk, &prog);
  1574. break;
  1575. /* dst = dst >> imm */
  1576. case BPF_ALU64 | BPF_RSH | BPF_K:
  1577. if (unlikely(imm32 > 63))
  1578. return -EINVAL;
  1579. emit_ia32_rsh_i64(dst, imm32, dstk, &prog);
  1580. break;
  1581. /* dst = dst << src */
  1582. case BPF_ALU64 | BPF_LSH | BPF_X:
  1583. emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog);
  1584. break;
  1585. /* dst = dst >> src */
  1586. case BPF_ALU64 | BPF_RSH | BPF_X:
  1587. emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog);
  1588. break;
  1589. /* dst = dst >> src (signed) */
  1590. case BPF_ALU64 | BPF_ARSH | BPF_X:
  1591. emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog);
  1592. break;
  1593. /* dst = dst >> imm (signed) */
  1594. case BPF_ALU64 | BPF_ARSH | BPF_K:
  1595. if (unlikely(imm32 > 63))
  1596. return -EINVAL;
  1597. emit_ia32_arsh_i64(dst, imm32, dstk, &prog);
  1598. break;
  1599. /* dst = ~dst */
  1600. case BPF_ALU | BPF_NEG:
  1601. emit_ia32_alu_i(is64, false, BPF_OP(code),
  1602. dst_lo, 0, dstk, &prog);
  1603. emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
  1604. break;
  1605. /* dst = ~dst (64 bit) */
  1606. case BPF_ALU64 | BPF_NEG:
  1607. emit_ia32_neg64(dst, dstk, &prog);
  1608. break;
  1609. /* dst = dst * src/imm */
  1610. case BPF_ALU64 | BPF_MUL | BPF_X:
  1611. case BPF_ALU64 | BPF_MUL | BPF_K:
  1612. switch (BPF_SRC(code)) {
  1613. case BPF_X:
  1614. emit_ia32_mul_r64(dst, src, dstk, sstk, &prog);
  1615. break;
  1616. case BPF_K:
  1617. emit_ia32_mul_i64(dst, imm32, dstk, &prog);
  1618. break;
  1619. }
  1620. break;
  1621. /* dst = htole(dst) */
  1622. case BPF_ALU | BPF_END | BPF_FROM_LE:
  1623. emit_ia32_to_le_r64(dst, imm32, dstk, &prog);
  1624. break;
  1625. /* dst = htobe(dst) */
  1626. case BPF_ALU | BPF_END | BPF_FROM_BE:
  1627. emit_ia32_to_be_r64(dst, imm32, dstk, &prog);
  1628. break;
  1629. /* dst = imm64 */
  1630. case BPF_LD | BPF_IMM | BPF_DW: {
  1631. s32 hi, lo = imm32;
  1632. hi = insn[1].imm;
  1633. emit_ia32_mov_i(dst_lo, lo, dstk, &prog);
  1634. emit_ia32_mov_i(dst_hi, hi, dstk, &prog);
  1635. insn++;
  1636. i++;
  1637. break;
  1638. }
  1639. /* ST: *(u8*)(dst_reg + off) = imm */
  1640. case BPF_ST | BPF_MEM | BPF_H:
  1641. case BPF_ST | BPF_MEM | BPF_B:
  1642. case BPF_ST | BPF_MEM | BPF_W:
  1643. case BPF_ST | BPF_MEM | BPF_DW:
  1644. if (dstk)
  1645. /* mov eax,dword ptr [ebp+off] */
  1646. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1647. STACK_VAR(dst_lo));
  1648. else
  1649. /* mov eax,dst_lo */
  1650. EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
  1651. switch (BPF_SIZE(code)) {
  1652. case BPF_B:
  1653. EMIT(0xC6, 1); break;
  1654. case BPF_H:
  1655. EMIT2(0x66, 0xC7); break;
  1656. case BPF_W:
  1657. case BPF_DW:
  1658. EMIT(0xC7, 1); break;
  1659. }
  1660. if (is_imm8(insn->off))
  1661. EMIT2(add_1reg(0x40, IA32_EAX), insn->off);
  1662. else
  1663. EMIT1_off32(add_1reg(0x80, IA32_EAX),
  1664. insn->off);
  1665. EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code)));
  1666. if (BPF_SIZE(code) == BPF_DW) {
  1667. u32 hi;
  1668. hi = imm32 & (1<<31) ? (u32)~0 : 0;
  1669. EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX),
  1670. insn->off + 4);
  1671. EMIT(hi, 4);
  1672. }
  1673. break;
  1674. /* STX: *(u8*)(dst_reg + off) = src_reg */
  1675. case BPF_STX | BPF_MEM | BPF_B:
  1676. case BPF_STX | BPF_MEM | BPF_H:
  1677. case BPF_STX | BPF_MEM | BPF_W:
  1678. case BPF_STX | BPF_MEM | BPF_DW:
  1679. if (dstk)
  1680. /* mov eax,dword ptr [ebp+off] */
  1681. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1682. STACK_VAR(dst_lo));
  1683. else
  1684. /* mov eax,dst_lo */
  1685. EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
  1686. if (sstk)
  1687. /* mov edx,dword ptr [ebp+off] */
  1688. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  1689. STACK_VAR(src_lo));
  1690. else
  1691. /* mov edx,src_lo */
  1692. EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX));
  1693. switch (BPF_SIZE(code)) {
  1694. case BPF_B:
  1695. EMIT(0x88, 1); break;
  1696. case BPF_H:
  1697. EMIT2(0x66, 0x89); break;
  1698. case BPF_W:
  1699. case BPF_DW:
  1700. EMIT(0x89, 1); break;
  1701. }
  1702. if (is_imm8(insn->off))
  1703. EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
  1704. insn->off);
  1705. else
  1706. EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
  1707. insn->off);
  1708. if (BPF_SIZE(code) == BPF_DW) {
  1709. if (sstk)
  1710. /* mov edi,dword ptr [ebp+off] */
  1711. EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
  1712. IA32_EDX),
  1713. STACK_VAR(src_hi));
  1714. else
  1715. /* mov edi,src_hi */
  1716. EMIT2(0x8B, add_2reg(0xC0, src_hi,
  1717. IA32_EDX));
  1718. EMIT1(0x89);
  1719. if (is_imm8(insn->off + 4)) {
  1720. EMIT2(add_2reg(0x40, IA32_EAX,
  1721. IA32_EDX),
  1722. insn->off + 4);
  1723. } else {
  1724. EMIT1(add_2reg(0x80, IA32_EAX,
  1725. IA32_EDX));
  1726. EMIT(insn->off + 4, 4);
  1727. }
  1728. }
  1729. break;
  1730. /* LDX: dst_reg = *(u8*)(src_reg + off) */
  1731. case BPF_LDX | BPF_MEM | BPF_B:
  1732. case BPF_LDX | BPF_MEM | BPF_H:
  1733. case BPF_LDX | BPF_MEM | BPF_W:
  1734. case BPF_LDX | BPF_MEM | BPF_DW:
  1735. if (sstk)
  1736. /* mov eax,dword ptr [ebp+off] */
  1737. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1738. STACK_VAR(src_lo));
  1739. else
  1740. /* mov eax,dword ptr [ebp+off] */
  1741. EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX));
  1742. switch (BPF_SIZE(code)) {
  1743. case BPF_B:
  1744. EMIT2(0x0F, 0xB6); break;
  1745. case BPF_H:
  1746. EMIT2(0x0F, 0xB7); break;
  1747. case BPF_W:
  1748. case BPF_DW:
  1749. EMIT(0x8B, 1); break;
  1750. }
  1751. if (is_imm8(insn->off))
  1752. EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
  1753. insn->off);
  1754. else
  1755. EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
  1756. insn->off);
  1757. if (dstk)
  1758. /* mov dword ptr [ebp+off],edx */
  1759. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
  1760. STACK_VAR(dst_lo));
  1761. else
  1762. /* mov dst_lo,edx */
  1763. EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX));
  1764. switch (BPF_SIZE(code)) {
  1765. case BPF_B:
  1766. case BPF_H:
  1767. case BPF_W:
  1768. if (dstk) {
  1769. EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
  1770. STACK_VAR(dst_hi));
  1771. EMIT(0x0, 4);
  1772. } else {
  1773. EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0);
  1774. }
  1775. break;
  1776. case BPF_DW:
  1777. EMIT2_off32(0x8B,
  1778. add_2reg(0x80, IA32_EAX, IA32_EDX),
  1779. insn->off + 4);
  1780. if (dstk)
  1781. EMIT3(0x89,
  1782. add_2reg(0x40, IA32_EBP,
  1783. IA32_EDX),
  1784. STACK_VAR(dst_hi));
  1785. else
  1786. EMIT2(0x89,
  1787. add_2reg(0xC0, dst_hi, IA32_EDX));
  1788. break;
  1789. default:
  1790. break;
  1791. }
  1792. break;
  1793. /* call */
  1794. case BPF_JMP | BPF_CALL:
  1795. {
  1796. const u8 *r1 = bpf2ia32[BPF_REG_1];
  1797. const u8 *r2 = bpf2ia32[BPF_REG_2];
  1798. const u8 *r3 = bpf2ia32[BPF_REG_3];
  1799. const u8 *r4 = bpf2ia32[BPF_REG_4];
  1800. const u8 *r5 = bpf2ia32[BPF_REG_5];
  1801. if (insn->src_reg == BPF_PSEUDO_CALL)
  1802. goto notyet;
  1803. func = (u8 *) __bpf_call_base + imm32;
  1804. jmp_offset = func - (image + addrs[i]);
  1805. if (!imm32 || !is_simm32(jmp_offset)) {
  1806. pr_err("unsupported BPF func %d addr %p image %p\n",
  1807. imm32, func, image);
  1808. return -EINVAL;
  1809. }
  1810. /* mov eax,dword ptr [ebp+off] */
  1811. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1812. STACK_VAR(r1[0]));
  1813. /* mov edx,dword ptr [ebp+off] */
  1814. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  1815. STACK_VAR(r1[1]));
  1816. emit_push_r64(r5, &prog);
  1817. emit_push_r64(r4, &prog);
  1818. emit_push_r64(r3, &prog);
  1819. emit_push_r64(r2, &prog);
  1820. EMIT1_off32(0xE8, jmp_offset + 9);
  1821. /* mov dword ptr [ebp+off],eax */
  1822. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1823. STACK_VAR(r0[0]));
  1824. /* mov dword ptr [ebp+off],edx */
  1825. EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
  1826. STACK_VAR(r0[1]));
  1827. /* add esp,32 */
  1828. EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32);
  1829. break;
  1830. }
  1831. case BPF_JMP | BPF_TAIL_CALL:
  1832. emit_bpf_tail_call(&prog);
  1833. break;
  1834. /* cond jump */
  1835. case BPF_JMP | BPF_JEQ | BPF_X:
  1836. case BPF_JMP | BPF_JNE | BPF_X:
  1837. case BPF_JMP | BPF_JGT | BPF_X:
  1838. case BPF_JMP | BPF_JLT | BPF_X:
  1839. case BPF_JMP | BPF_JGE | BPF_X:
  1840. case BPF_JMP | BPF_JLE | BPF_X:
  1841. case BPF_JMP | BPF_JSGT | BPF_X:
  1842. case BPF_JMP | BPF_JSLE | BPF_X:
  1843. case BPF_JMP | BPF_JSLT | BPF_X:
  1844. case BPF_JMP | BPF_JSGE | BPF_X: {
  1845. u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
  1846. u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
  1847. u8 sreg_lo = sstk ? IA32_ECX : src_lo;
  1848. u8 sreg_hi = sstk ? IA32_EBX : src_hi;
  1849. if (dstk) {
  1850. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1851. STACK_VAR(dst_lo));
  1852. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  1853. STACK_VAR(dst_hi));
  1854. }
  1855. if (sstk) {
  1856. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
  1857. STACK_VAR(src_lo));
  1858. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
  1859. STACK_VAR(src_hi));
  1860. }
  1861. /* cmp dreg_hi,sreg_hi */
  1862. EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
  1863. EMIT2(IA32_JNE, 2);
  1864. /* cmp dreg_lo,sreg_lo */
  1865. EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
  1866. goto emit_cond_jmp;
  1867. }
  1868. case BPF_JMP | BPF_JSET | BPF_X: {
  1869. u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
  1870. u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
  1871. u8 sreg_lo = sstk ? IA32_ECX : src_lo;
  1872. u8 sreg_hi = sstk ? IA32_EBX : src_hi;
  1873. if (dstk) {
  1874. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1875. STACK_VAR(dst_lo));
  1876. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  1877. STACK_VAR(dst_hi));
  1878. }
  1879. if (sstk) {
  1880. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
  1881. STACK_VAR(src_lo));
  1882. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
  1883. STACK_VAR(src_hi));
  1884. }
  1885. /* and dreg_lo,sreg_lo */
  1886. EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
  1887. /* and dreg_hi,sreg_hi */
  1888. EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
  1889. /* or dreg_lo,dreg_hi */
  1890. EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
  1891. goto emit_cond_jmp;
  1892. }
  1893. case BPF_JMP | BPF_JSET | BPF_K: {
  1894. u32 hi;
  1895. u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
  1896. u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
  1897. u8 sreg_lo = IA32_ECX;
  1898. u8 sreg_hi = IA32_EBX;
  1899. if (dstk) {
  1900. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1901. STACK_VAR(dst_lo));
  1902. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  1903. STACK_VAR(dst_hi));
  1904. }
  1905. hi = imm32 & (1<<31) ? (u32)~0 : 0;
  1906. /* mov ecx,imm32 */
  1907. EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
  1908. /* mov ebx,imm32 */
  1909. EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
  1910. /* and dreg_lo,sreg_lo */
  1911. EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
  1912. /* and dreg_hi,sreg_hi */
  1913. EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
  1914. /* or dreg_lo,dreg_hi */
  1915. EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
  1916. goto emit_cond_jmp;
  1917. }
  1918. case BPF_JMP | BPF_JEQ | BPF_K:
  1919. case BPF_JMP | BPF_JNE | BPF_K:
  1920. case BPF_JMP | BPF_JGT | BPF_K:
  1921. case BPF_JMP | BPF_JLT | BPF_K:
  1922. case BPF_JMP | BPF_JGE | BPF_K:
  1923. case BPF_JMP | BPF_JLE | BPF_K:
  1924. case BPF_JMP | BPF_JSGT | BPF_K:
  1925. case BPF_JMP | BPF_JSLE | BPF_K:
  1926. case BPF_JMP | BPF_JSLT | BPF_K:
  1927. case BPF_JMP | BPF_JSGE | BPF_K: {
  1928. u32 hi;
  1929. u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
  1930. u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
  1931. u8 sreg_lo = IA32_ECX;
  1932. u8 sreg_hi = IA32_EBX;
  1933. if (dstk) {
  1934. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
  1935. STACK_VAR(dst_lo));
  1936. EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
  1937. STACK_VAR(dst_hi));
  1938. }
  1939. hi = imm32 & (1<<31) ? (u32)~0 : 0;
  1940. /* mov ecx,imm32 */
  1941. EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
  1942. /* mov ebx,imm32 */
  1943. EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
  1944. /* cmp dreg_hi,sreg_hi */
  1945. EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
  1946. EMIT2(IA32_JNE, 2);
  1947. /* cmp dreg_lo,sreg_lo */
  1948. EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
  1949. emit_cond_jmp: /* Convert BPF opcode to x86 */
  1950. switch (BPF_OP(code)) {
  1951. case BPF_JEQ:
  1952. jmp_cond = IA32_JE;
  1953. break;
  1954. case BPF_JSET:
  1955. case BPF_JNE:
  1956. jmp_cond = IA32_JNE;
  1957. break;
  1958. case BPF_JGT:
  1959. /* GT is unsigned '>', JA in x86 */
  1960. jmp_cond = IA32_JA;
  1961. break;
  1962. case BPF_JLT:
  1963. /* LT is unsigned '<', JB in x86 */
  1964. jmp_cond = IA32_JB;
  1965. break;
  1966. case BPF_JGE:
  1967. /* GE is unsigned '>=', JAE in x86 */
  1968. jmp_cond = IA32_JAE;
  1969. break;
  1970. case BPF_JLE:
  1971. /* LE is unsigned '<=', JBE in x86 */
  1972. jmp_cond = IA32_JBE;
  1973. break;
  1974. case BPF_JSGT:
  1975. /* Signed '>', GT in x86 */
  1976. jmp_cond = IA32_JG;
  1977. break;
  1978. case BPF_JSLT:
  1979. /* Signed '<', LT in x86 */
  1980. jmp_cond = IA32_JL;
  1981. break;
  1982. case BPF_JSGE:
  1983. /* Signed '>=', GE in x86 */
  1984. jmp_cond = IA32_JGE;
  1985. break;
  1986. case BPF_JSLE:
  1987. /* Signed '<=', LE in x86 */
  1988. jmp_cond = IA32_JLE;
  1989. break;
  1990. default: /* to silence GCC warning */
  1991. return -EFAULT;
  1992. }
  1993. jmp_offset = addrs[i + insn->off] - addrs[i];
  1994. if (is_imm8(jmp_offset)) {
  1995. EMIT2(jmp_cond, jmp_offset);
  1996. } else if (is_simm32(jmp_offset)) {
  1997. EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
  1998. } else {
  1999. pr_err("cond_jmp gen bug %llx\n", jmp_offset);
  2000. return -EFAULT;
  2001. }
  2002. break;
  2003. }
  2004. case BPF_JMP | BPF_JA:
  2005. if (insn->off == -1)
  2006. /* -1 jmp instructions will always jump
  2007. * backwards two bytes. Explicitly handling
  2008. * this case avoids wasting too many passes
  2009. * when there are long sequences of replaced
  2010. * dead code.
  2011. */
  2012. jmp_offset = -2;
  2013. else
  2014. jmp_offset = addrs[i + insn->off] - addrs[i];
  2015. if (!jmp_offset)
  2016. /* Optimize out nop jumps */
  2017. break;
  2018. emit_jmp:
  2019. if (is_imm8(jmp_offset)) {
  2020. EMIT2(0xEB, jmp_offset);
  2021. } else if (is_simm32(jmp_offset)) {
  2022. EMIT1_off32(0xE9, jmp_offset);
  2023. } else {
  2024. pr_err("jmp gen bug %llx\n", jmp_offset);
  2025. return -EFAULT;
  2026. }
  2027. break;
  2028. /* STX XADD: lock *(u32 *)(dst + off) += src */
  2029. case BPF_STX | BPF_XADD | BPF_W:
  2030. /* STX XADD: lock *(u64 *)(dst + off) += src */
  2031. case BPF_STX | BPF_XADD | BPF_DW:
  2032. goto notyet;
  2033. case BPF_JMP | BPF_EXIT:
  2034. if (seen_exit) {
  2035. jmp_offset = ctx->cleanup_addr - addrs[i];
  2036. goto emit_jmp;
  2037. }
  2038. seen_exit = true;
  2039. /* Update cleanup_addr */
  2040. ctx->cleanup_addr = proglen;
  2041. emit_epilogue(&prog, bpf_prog->aux->stack_depth);
  2042. break;
  2043. notyet:
  2044. pr_info_once("*** NOT YET: opcode %02x ***\n", code);
  2045. return -EFAULT;
  2046. default:
  2047. /*
  2048. * This error will be seen if new instruction was added
  2049. * to interpreter, but not to JIT or if there is junk in
  2050. * bpf_prog
  2051. */
  2052. pr_err("bpf_jit: unknown opcode %02x\n", code);
  2053. return -EINVAL;
  2054. }
  2055. ilen = prog - temp;
  2056. if (ilen > BPF_MAX_INSN_SIZE) {
  2057. pr_err("bpf_jit: fatal insn size error\n");
  2058. return -EFAULT;
  2059. }
  2060. if (image) {
  2061. if (unlikely(proglen + ilen > oldproglen)) {
  2062. pr_err("bpf_jit: fatal error\n");
  2063. return -EFAULT;
  2064. }
  2065. memcpy(image + proglen, temp, ilen);
  2066. }
  2067. proglen += ilen;
  2068. addrs[i] = proglen;
  2069. prog = temp;
  2070. }
  2071. return proglen;
  2072. }
  2073. struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
  2074. {
  2075. struct bpf_binary_header *header = NULL;
  2076. struct bpf_prog *tmp, *orig_prog = prog;
  2077. int proglen, oldproglen = 0;
  2078. struct jit_context ctx = {};
  2079. bool tmp_blinded = false;
  2080. u8 *image = NULL;
  2081. int *addrs;
  2082. int pass;
  2083. int i;
  2084. if (!prog->jit_requested)
  2085. return orig_prog;
  2086. tmp = bpf_jit_blind_constants(prog);
  2087. /*
  2088. * If blinding was requested and we failed during blinding,
  2089. * we must fall back to the interpreter.
  2090. */
  2091. if (IS_ERR(tmp))
  2092. return orig_prog;
  2093. if (tmp != prog) {
  2094. tmp_blinded = true;
  2095. prog = tmp;
  2096. }
  2097. addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL);
  2098. if (!addrs) {
  2099. prog = orig_prog;
  2100. goto out;
  2101. }
  2102. /*
  2103. * Before first pass, make a rough estimation of addrs[]
  2104. * each BPF instruction is translated to less than 64 bytes
  2105. */
  2106. for (proglen = 0, i = 0; i < prog->len; i++) {
  2107. proglen += 64;
  2108. addrs[i] = proglen;
  2109. }
  2110. ctx.cleanup_addr = proglen;
  2111. /*
  2112. * JITed image shrinks with every pass and the loop iterates
  2113. * until the image stops shrinking. Very large BPF programs
  2114. * may converge on the last pass. In such case do one more
  2115. * pass to emit the final image.
  2116. */
  2117. for (pass = 0; pass < 20 || image; pass++) {
  2118. proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
  2119. if (proglen <= 0) {
  2120. out_image:
  2121. image = NULL;
  2122. if (header)
  2123. bpf_jit_binary_free(header);
  2124. prog = orig_prog;
  2125. goto out_addrs;
  2126. }
  2127. if (image) {
  2128. if (proglen != oldproglen) {
  2129. pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
  2130. proglen, oldproglen);
  2131. goto out_image;
  2132. }
  2133. break;
  2134. }
  2135. if (proglen == oldproglen) {
  2136. header = bpf_jit_binary_alloc(proglen, &image,
  2137. 1, jit_fill_hole);
  2138. if (!header) {
  2139. prog = orig_prog;
  2140. goto out_addrs;
  2141. }
  2142. }
  2143. oldproglen = proglen;
  2144. cond_resched();
  2145. }
  2146. if (bpf_jit_enable > 1)
  2147. bpf_jit_dump(prog->len, proglen, pass + 1, image);
  2148. if (image) {
  2149. bpf_jit_binary_lock_ro(header);
  2150. prog->bpf_func = (void *)image;
  2151. prog->jited = 1;
  2152. prog->jited_len = proglen;
  2153. } else {
  2154. prog = orig_prog;
  2155. }
  2156. out_addrs:
  2157. kfree(addrs);
  2158. out:
  2159. if (tmp_blinded)
  2160. bpf_jit_prog_release_other(prog, prog == orig_prog ?
  2161. tmp : orig_prog);
  2162. return prog;
  2163. }