bpf_load.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786
  1. #include <stdio.h>
  2. #include <sys/types.h>
  3. #include <sys/stat.h>
  4. #include <fcntl.h>
  5. #include <libelf.h>
  6. #include <gelf.h>
  7. #include <errno.h>
  8. #include <unistd.h>
  9. #include <string.h>
  10. #include <stdbool.h>
  11. #include <stdlib.h>
  12. #include <linux/bpf.h>
  13. #include <linux/filter.h>
  14. #include <linux/perf_event.h>
  15. #include <linux/netlink.h>
  16. #include <linux/rtnetlink.h>
  17. #include <linux/types.h>
  18. #include <sys/types.h>
  19. #include <sys/socket.h>
  20. #include <sys/syscall.h>
  21. #include <sys/ioctl.h>
  22. #include <sys/mman.h>
  23. #include <poll.h>
  24. #include <ctype.h>
  25. #include <assert.h>
  26. #include "libbpf.h"
  27. #include "bpf_load.h"
  28. #include "perf-sys.h"
  29. #define DEBUGFS "/sys/kernel/debug/tracing/"
  30. static char license[128];
  31. static int kern_version;
  32. static bool processed_sec[128];
  33. char bpf_log_buf[BPF_LOG_BUF_SIZE];
  34. int map_fd[MAX_MAPS];
  35. int prog_fd[MAX_PROGS];
  36. int event_fd[MAX_PROGS];
  37. int prog_cnt;
  38. int prog_array_fd = -1;
  39. struct bpf_map_data map_data[MAX_MAPS];
  40. int map_data_count = 0;
  41. static int populate_prog_array(const char *event, int prog_fd)
  42. {
  43. int ind = atoi(event), err;
  44. err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
  45. if (err < 0) {
  46. printf("failed to store prog_fd in prog_array\n");
  47. return -1;
  48. }
  49. return 0;
  50. }
  51. static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
  52. {
  53. bool is_socket = strncmp(event, "socket", 6) == 0;
  54. bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
  55. bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
  56. bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
  57. bool is_xdp = strncmp(event, "xdp", 3) == 0;
  58. bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
  59. bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
  60. bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
  61. bool is_sockops = strncmp(event, "sockops", 7) == 0;
  62. bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
  63. size_t insns_cnt = size / sizeof(struct bpf_insn);
  64. enum bpf_prog_type prog_type;
  65. char buf[256];
  66. int fd, efd, err, id;
  67. struct perf_event_attr attr = {};
  68. attr.type = PERF_TYPE_TRACEPOINT;
  69. attr.sample_type = PERF_SAMPLE_RAW;
  70. attr.sample_period = 1;
  71. attr.wakeup_events = 1;
  72. if (is_socket) {
  73. prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
  74. } else if (is_kprobe || is_kretprobe) {
  75. prog_type = BPF_PROG_TYPE_KPROBE;
  76. } else if (is_tracepoint) {
  77. prog_type = BPF_PROG_TYPE_TRACEPOINT;
  78. } else if (is_xdp) {
  79. prog_type = BPF_PROG_TYPE_XDP;
  80. } else if (is_perf_event) {
  81. prog_type = BPF_PROG_TYPE_PERF_EVENT;
  82. } else if (is_cgroup_skb) {
  83. prog_type = BPF_PROG_TYPE_CGROUP_SKB;
  84. } else if (is_cgroup_sk) {
  85. prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
  86. } else if (is_sockops) {
  87. prog_type = BPF_PROG_TYPE_SOCK_OPS;
  88. } else if (is_sk_skb) {
  89. prog_type = BPF_PROG_TYPE_SK_SKB;
  90. } else {
  91. printf("Unknown event '%s'\n", event);
  92. return -1;
  93. }
  94. fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
  95. bpf_log_buf, BPF_LOG_BUF_SIZE);
  96. if (fd < 0) {
  97. printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf);
  98. return -1;
  99. }
  100. prog_fd[prog_cnt++] = fd;
  101. if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
  102. return 0;
  103. if (is_socket || is_sockops || is_sk_skb) {
  104. if (is_socket)
  105. event += 6;
  106. else
  107. event += 7;
  108. if (*event != '/')
  109. return 0;
  110. event++;
  111. if (!isdigit(*event)) {
  112. printf("invalid prog number\n");
  113. return -1;
  114. }
  115. return populate_prog_array(event, fd);
  116. }
  117. if (is_kprobe || is_kretprobe) {
  118. if (is_kprobe)
  119. event += 7;
  120. else
  121. event += 10;
  122. if (*event == 0) {
  123. printf("event name cannot be empty\n");
  124. return -1;
  125. }
  126. if (isdigit(*event))
  127. return populate_prog_array(event, fd);
  128. snprintf(buf, sizeof(buf),
  129. "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
  130. is_kprobe ? 'p' : 'r', event, event);
  131. err = system(buf);
  132. if (err < 0) {
  133. printf("failed to create kprobe '%s' error '%s'\n",
  134. event, strerror(errno));
  135. return -1;
  136. }
  137. strcpy(buf, DEBUGFS);
  138. strcat(buf, "events/kprobes/");
  139. strcat(buf, event);
  140. strcat(buf, "/id");
  141. } else if (is_tracepoint) {
  142. event += 11;
  143. if (*event == 0) {
  144. printf("event name cannot be empty\n");
  145. return -1;
  146. }
  147. strcpy(buf, DEBUGFS);
  148. strcat(buf, "events/");
  149. strcat(buf, event);
  150. strcat(buf, "/id");
  151. }
  152. efd = open(buf, O_RDONLY, 0);
  153. if (efd < 0) {
  154. printf("failed to open event %s\n", event);
  155. return -1;
  156. }
  157. err = read(efd, buf, sizeof(buf));
  158. if (err < 0 || err >= sizeof(buf)) {
  159. printf("read from '%s' failed '%s'\n", event, strerror(errno));
  160. return -1;
  161. }
  162. close(efd);
  163. buf[err] = 0;
  164. id = atoi(buf);
  165. attr.config = id;
  166. efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
  167. if (efd < 0) {
  168. printf("event %d fd %d err %s\n", id, efd, strerror(errno));
  169. return -1;
  170. }
  171. event_fd[prog_cnt - 1] = efd;
  172. ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
  173. ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
  174. return 0;
  175. }
  176. static int load_maps(struct bpf_map_data *maps, int nr_maps,
  177. fixup_map_cb fixup_map)
  178. {
  179. int i, numa_node;
  180. for (i = 0; i < nr_maps; i++) {
  181. if (fixup_map) {
  182. fixup_map(&maps[i], i);
  183. /* Allow userspace to assign map FD prior to creation */
  184. if (maps[i].fd != -1) {
  185. map_fd[i] = maps[i].fd;
  186. continue;
  187. }
  188. }
  189. numa_node = maps[i].def.map_flags & BPF_F_NUMA_NODE ?
  190. maps[i].def.numa_node : -1;
  191. if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
  192. maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
  193. int inner_map_fd = map_fd[maps[i].def.inner_map_idx];
  194. map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type,
  195. maps[i].def.key_size,
  196. inner_map_fd,
  197. maps[i].def.max_entries,
  198. maps[i].def.map_flags,
  199. numa_node);
  200. } else {
  201. map_fd[i] = bpf_create_map_node(maps[i].def.type,
  202. maps[i].def.key_size,
  203. maps[i].def.value_size,
  204. maps[i].def.max_entries,
  205. maps[i].def.map_flags,
  206. numa_node);
  207. }
  208. if (map_fd[i] < 0) {
  209. printf("failed to create a map: %d %s\n",
  210. errno, strerror(errno));
  211. return 1;
  212. }
  213. maps[i].fd = map_fd[i];
  214. if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY)
  215. prog_array_fd = map_fd[i];
  216. }
  217. return 0;
  218. }
  219. static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
  220. GElf_Shdr *shdr, Elf_Data **data)
  221. {
  222. Elf_Scn *scn;
  223. scn = elf_getscn(elf, i);
  224. if (!scn)
  225. return 1;
  226. if (gelf_getshdr(scn, shdr) != shdr)
  227. return 2;
  228. *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name);
  229. if (!*shname || !shdr->sh_size)
  230. return 3;
  231. *data = elf_getdata(scn, 0);
  232. if (!*data || elf_getdata(scn, *data) != NULL)
  233. return 4;
  234. return 0;
  235. }
  236. static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
  237. GElf_Shdr *shdr, struct bpf_insn *insn,
  238. struct bpf_map_data *maps, int nr_maps)
  239. {
  240. int i, nrels;
  241. nrels = shdr->sh_size / shdr->sh_entsize;
  242. for (i = 0; i < nrels; i++) {
  243. GElf_Sym sym;
  244. GElf_Rel rel;
  245. unsigned int insn_idx;
  246. bool match = false;
  247. int j, map_idx;
  248. gelf_getrel(data, i, &rel);
  249. insn_idx = rel.r_offset / sizeof(struct bpf_insn);
  250. gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym);
  251. if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
  252. printf("invalid relo for insn[%d].code 0x%x\n",
  253. insn_idx, insn[insn_idx].code);
  254. return 1;
  255. }
  256. insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
  257. /* Match FD relocation against recorded map_data[] offset */
  258. for (map_idx = 0; map_idx < nr_maps; map_idx++) {
  259. if (maps[map_idx].elf_offset == sym.st_value) {
  260. match = true;
  261. break;
  262. }
  263. }
  264. if (match) {
  265. insn[insn_idx].imm = maps[map_idx].fd;
  266. } else {
  267. printf("invalid relo for insn[%d] no map_data match\n",
  268. insn_idx);
  269. return 1;
  270. }
  271. }
  272. return 0;
  273. }
  274. static int cmp_symbols(const void *l, const void *r)
  275. {
  276. const GElf_Sym *lsym = (const GElf_Sym *)l;
  277. const GElf_Sym *rsym = (const GElf_Sym *)r;
  278. if (lsym->st_value < rsym->st_value)
  279. return -1;
  280. else if (lsym->st_value > rsym->st_value)
  281. return 1;
  282. else
  283. return 0;
  284. }
  285. static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
  286. Elf *elf, Elf_Data *symbols, int strtabidx)
  287. {
  288. int map_sz_elf, map_sz_copy;
  289. bool validate_zero = false;
  290. Elf_Data *data_maps;
  291. int i, nr_maps;
  292. GElf_Sym *sym;
  293. Elf_Scn *scn;
  294. int copy_sz;
  295. if (maps_shndx < 0)
  296. return -EINVAL;
  297. if (!symbols)
  298. return -EINVAL;
  299. /* Get data for maps section via elf index */
  300. scn = elf_getscn(elf, maps_shndx);
  301. if (scn)
  302. data_maps = elf_getdata(scn, NULL);
  303. if (!scn || !data_maps) {
  304. printf("Failed to get Elf_Data from maps section %d\n",
  305. maps_shndx);
  306. return -EINVAL;
  307. }
  308. /* For each map get corrosponding symbol table entry */
  309. sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym));
  310. for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
  311. assert(nr_maps < MAX_MAPS+1);
  312. if (!gelf_getsym(symbols, i, &sym[nr_maps]))
  313. continue;
  314. if (sym[nr_maps].st_shndx != maps_shndx)
  315. continue;
  316. /* Only increment iif maps section */
  317. nr_maps++;
  318. }
  319. /* Align to map_fd[] order, via sort on offset in sym.st_value */
  320. qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols);
  321. /* Keeping compatible with ELF maps section changes
  322. * ------------------------------------------------
  323. * The program size of struct bpf_map_def is known by loader
  324. * code, but struct stored in ELF file can be different.
  325. *
  326. * Unfortunately sym[i].st_size is zero. To calculate the
  327. * struct size stored in the ELF file, assume all struct have
  328. * the same size, and simply divide with number of map
  329. * symbols.
  330. */
  331. map_sz_elf = data_maps->d_size / nr_maps;
  332. map_sz_copy = sizeof(struct bpf_map_def);
  333. if (map_sz_elf < map_sz_copy) {
  334. /*
  335. * Backward compat, loading older ELF file with
  336. * smaller struct, keeping remaining bytes zero.
  337. */
  338. map_sz_copy = map_sz_elf;
  339. } else if (map_sz_elf > map_sz_copy) {
  340. /*
  341. * Forward compat, loading newer ELF file with larger
  342. * struct with unknown features. Assume zero means
  343. * feature not used. Thus, validate rest of struct
  344. * data is zero.
  345. */
  346. validate_zero = true;
  347. }
  348. /* Memcpy relevant part of ELF maps data to loader maps */
  349. for (i = 0; i < nr_maps; i++) {
  350. unsigned char *addr, *end;
  351. struct bpf_map_def *def;
  352. const char *map_name;
  353. size_t offset;
  354. map_name = elf_strptr(elf, strtabidx, sym[i].st_name);
  355. maps[i].name = strdup(map_name);
  356. if (!maps[i].name) {
  357. printf("strdup(%s): %s(%d)\n", map_name,
  358. strerror(errno), errno);
  359. free(sym);
  360. return -errno;
  361. }
  362. /* Symbol value is offset into ELF maps section data area */
  363. offset = sym[i].st_value;
  364. def = (struct bpf_map_def *)(data_maps->d_buf + offset);
  365. maps[i].elf_offset = offset;
  366. memset(&maps[i].def, 0, sizeof(struct bpf_map_def));
  367. memcpy(&maps[i].def, def, map_sz_copy);
  368. /* Verify no newer features were requested */
  369. if (validate_zero) {
  370. addr = (unsigned char*) def + map_sz_copy;
  371. end = (unsigned char*) def + map_sz_elf;
  372. for (; addr < end; addr++) {
  373. if (*addr != 0) {
  374. free(sym);
  375. return -EFBIG;
  376. }
  377. }
  378. }
  379. }
  380. free(sym);
  381. return nr_maps;
  382. }
  383. static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
  384. {
  385. int fd, i, ret, maps_shndx = -1, strtabidx = -1;
  386. Elf *elf;
  387. GElf_Ehdr ehdr;
  388. GElf_Shdr shdr, shdr_prog;
  389. Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL;
  390. char *shname, *shname_prog;
  391. int nr_maps = 0;
  392. /* reset global variables */
  393. kern_version = 0;
  394. memset(license, 0, sizeof(license));
  395. memset(processed_sec, 0, sizeof(processed_sec));
  396. if (elf_version(EV_CURRENT) == EV_NONE)
  397. return 1;
  398. fd = open(path, O_RDONLY, 0);
  399. if (fd < 0)
  400. return 1;
  401. elf = elf_begin(fd, ELF_C_READ, NULL);
  402. if (!elf)
  403. return 1;
  404. if (gelf_getehdr(elf, &ehdr) != &ehdr)
  405. return 1;
  406. /* clear all kprobes */
  407. i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
  408. /* scan over all elf sections to get license and map info */
  409. for (i = 1; i < ehdr.e_shnum; i++) {
  410. if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
  411. continue;
  412. if (0) /* helpful for llvm debugging */
  413. printf("section %d:%s data %p size %zd link %d flags %d\n",
  414. i, shname, data->d_buf, data->d_size,
  415. shdr.sh_link, (int) shdr.sh_flags);
  416. if (strcmp(shname, "license") == 0) {
  417. processed_sec[i] = true;
  418. memcpy(license, data->d_buf, data->d_size);
  419. } else if (strcmp(shname, "version") == 0) {
  420. processed_sec[i] = true;
  421. if (data->d_size != sizeof(int)) {
  422. printf("invalid size of version section %zd\n",
  423. data->d_size);
  424. return 1;
  425. }
  426. memcpy(&kern_version, data->d_buf, sizeof(int));
  427. } else if (strcmp(shname, "maps") == 0) {
  428. int j;
  429. maps_shndx = i;
  430. data_maps = data;
  431. for (j = 0; j < MAX_MAPS; j++)
  432. map_data[j].fd = -1;
  433. } else if (shdr.sh_type == SHT_SYMTAB) {
  434. strtabidx = shdr.sh_link;
  435. symbols = data;
  436. }
  437. }
  438. ret = 1;
  439. if (!symbols) {
  440. printf("missing SHT_SYMTAB section\n");
  441. goto done;
  442. }
  443. if (data_maps) {
  444. nr_maps = load_elf_maps_section(map_data, maps_shndx,
  445. elf, symbols, strtabidx);
  446. if (nr_maps < 0) {
  447. printf("Error: Failed loading ELF maps (errno:%d):%s\n",
  448. nr_maps, strerror(-nr_maps));
  449. ret = 1;
  450. goto done;
  451. }
  452. if (load_maps(map_data, nr_maps, fixup_map))
  453. goto done;
  454. map_data_count = nr_maps;
  455. processed_sec[maps_shndx] = true;
  456. }
  457. /* process all relo sections, and rewrite bpf insns for maps */
  458. for (i = 1; i < ehdr.e_shnum; i++) {
  459. if (processed_sec[i])
  460. continue;
  461. if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
  462. continue;
  463. if (shdr.sh_type == SHT_REL) {
  464. struct bpf_insn *insns;
  465. /* locate prog sec that need map fixup (relocations) */
  466. if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
  467. &shdr_prog, &data_prog))
  468. continue;
  469. if (shdr_prog.sh_type != SHT_PROGBITS ||
  470. !(shdr_prog.sh_flags & SHF_EXECINSTR))
  471. continue;
  472. insns = (struct bpf_insn *) data_prog->d_buf;
  473. processed_sec[i] = true; /* relo section */
  474. if (parse_relo_and_apply(data, symbols, &shdr, insns,
  475. map_data, nr_maps))
  476. continue;
  477. }
  478. }
  479. /* load programs */
  480. for (i = 1; i < ehdr.e_shnum; i++) {
  481. if (processed_sec[i])
  482. continue;
  483. if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
  484. continue;
  485. if (memcmp(shname, "kprobe/", 7) == 0 ||
  486. memcmp(shname, "kretprobe/", 10) == 0 ||
  487. memcmp(shname, "tracepoint/", 11) == 0 ||
  488. memcmp(shname, "xdp", 3) == 0 ||
  489. memcmp(shname, "perf_event", 10) == 0 ||
  490. memcmp(shname, "socket", 6) == 0 ||
  491. memcmp(shname, "cgroup/", 7) == 0 ||
  492. memcmp(shname, "sockops", 7) == 0 ||
  493. memcmp(shname, "sk_skb", 6) == 0) {
  494. ret = load_and_attach(shname, data->d_buf,
  495. data->d_size);
  496. if (ret != 0)
  497. goto done;
  498. }
  499. }
  500. ret = 0;
  501. done:
  502. close(fd);
  503. return ret;
  504. }
  505. int load_bpf_file(char *path)
  506. {
  507. return do_load_bpf_file(path, NULL);
  508. }
  509. int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map)
  510. {
  511. return do_load_bpf_file(path, fixup_map);
  512. }
  513. void read_trace_pipe(void)
  514. {
  515. int trace_fd;
  516. trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
  517. if (trace_fd < 0)
  518. return;
  519. while (1) {
  520. static char buf[4096];
  521. ssize_t sz;
  522. sz = read(trace_fd, buf, sizeof(buf));
  523. if (sz > 0) {
  524. buf[sz] = 0;
  525. puts(buf);
  526. }
  527. }
  528. }
  529. #define MAX_SYMS 300000
  530. static struct ksym syms[MAX_SYMS];
  531. static int sym_cnt;
  532. static int ksym_cmp(const void *p1, const void *p2)
  533. {
  534. return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
  535. }
  536. int load_kallsyms(void)
  537. {
  538. FILE *f = fopen("/proc/kallsyms", "r");
  539. char func[256], buf[256];
  540. char symbol;
  541. void *addr;
  542. int i = 0;
  543. if (!f)
  544. return -ENOENT;
  545. while (!feof(f)) {
  546. if (!fgets(buf, sizeof(buf), f))
  547. break;
  548. if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
  549. break;
  550. if (!addr)
  551. continue;
  552. syms[i].addr = (long) addr;
  553. syms[i].name = strdup(func);
  554. i++;
  555. }
  556. sym_cnt = i;
  557. qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
  558. return 0;
  559. }
  560. struct ksym *ksym_search(long key)
  561. {
  562. int start = 0, end = sym_cnt;
  563. int result;
  564. while (start < end) {
  565. size_t mid = start + (end - start) / 2;
  566. result = key - syms[mid].addr;
  567. if (result < 0)
  568. end = mid;
  569. else if (result > 0)
  570. start = mid + 1;
  571. else
  572. return &syms[mid];
  573. }
  574. if (start >= 1 && syms[start - 1].addr < key &&
  575. key < syms[start].addr)
  576. /* valid ksym */
  577. return &syms[start - 1];
  578. /* out of range. return _stext */
  579. return &syms[0];
  580. }
  581. int set_link_xdp_fd(int ifindex, int fd, __u32 flags)
  582. {
  583. struct sockaddr_nl sa;
  584. int sock, seq = 0, len, ret = -1;
  585. char buf[4096];
  586. struct nlattr *nla, *nla_xdp;
  587. struct {
  588. struct nlmsghdr nh;
  589. struct ifinfomsg ifinfo;
  590. char attrbuf[64];
  591. } req;
  592. struct nlmsghdr *nh;
  593. struct nlmsgerr *err;
  594. memset(&sa, 0, sizeof(sa));
  595. sa.nl_family = AF_NETLINK;
  596. sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  597. if (sock < 0) {
  598. printf("open netlink socket: %s\n", strerror(errno));
  599. return -1;
  600. }
  601. if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
  602. printf("bind to netlink: %s\n", strerror(errno));
  603. goto cleanup;
  604. }
  605. memset(&req, 0, sizeof(req));
  606. req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
  607. req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
  608. req.nh.nlmsg_type = RTM_SETLINK;
  609. req.nh.nlmsg_pid = 0;
  610. req.nh.nlmsg_seq = ++seq;
  611. req.ifinfo.ifi_family = AF_UNSPEC;
  612. req.ifinfo.ifi_index = ifindex;
  613. /* started nested attribute for XDP */
  614. nla = (struct nlattr *)(((char *)&req)
  615. + NLMSG_ALIGN(req.nh.nlmsg_len));
  616. nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
  617. nla->nla_len = NLA_HDRLEN;
  618. /* add XDP fd */
  619. nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
  620. nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
  621. nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
  622. memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
  623. nla->nla_len += nla_xdp->nla_len;
  624. /* if user passed in any flags, add those too */
  625. if (flags) {
  626. nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
  627. nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
  628. nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
  629. memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
  630. nla->nla_len += nla_xdp->nla_len;
  631. }
  632. req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
  633. if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
  634. printf("send to netlink: %s\n", strerror(errno));
  635. goto cleanup;
  636. }
  637. len = recv(sock, buf, sizeof(buf), 0);
  638. if (len < 0) {
  639. printf("recv from netlink: %s\n", strerror(errno));
  640. goto cleanup;
  641. }
  642. for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
  643. nh = NLMSG_NEXT(nh, len)) {
  644. if (nh->nlmsg_pid != getpid()) {
  645. printf("Wrong pid %d, expected %d\n",
  646. nh->nlmsg_pid, getpid());
  647. goto cleanup;
  648. }
  649. if (nh->nlmsg_seq != seq) {
  650. printf("Wrong seq %d, expected %d\n",
  651. nh->nlmsg_seq, seq);
  652. goto cleanup;
  653. }
  654. switch (nh->nlmsg_type) {
  655. case NLMSG_ERROR:
  656. err = (struct nlmsgerr *)NLMSG_DATA(nh);
  657. if (!err->error)
  658. continue;
  659. printf("nlmsg error %s\n", strerror(-err->error));
  660. goto cleanup;
  661. case NLMSG_DONE:
  662. break;
  663. }
  664. }
  665. ret = 0;
  666. cleanup:
  667. close(sock);
  668. return ret;
  669. }