bpf_load.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788
  1. #include <stdio.h>
  2. #include <sys/types.h>
  3. #include <sys/stat.h>
  4. #include <fcntl.h>
  5. #include <libelf.h>
  6. #include <gelf.h>
  7. #include <errno.h>
  8. #include <unistd.h>
  9. #include <string.h>
  10. #include <stdbool.h>
  11. #include <stdlib.h>
  12. #include <linux/bpf.h>
  13. #include <linux/filter.h>
  14. #include <linux/perf_event.h>
  15. #include <linux/netlink.h>
  16. #include <linux/rtnetlink.h>
  17. #include <linux/types.h>
  18. #include <sys/types.h>
  19. #include <sys/socket.h>
  20. #include <sys/syscall.h>
  21. #include <sys/ioctl.h>
  22. #include <sys/mman.h>
  23. #include <poll.h>
  24. #include <ctype.h>
  25. #include <assert.h>
  26. #include "libbpf.h"
  27. #include "bpf_load.h"
  28. #include "perf-sys.h"
  29. #define DEBUGFS "/sys/kernel/debug/tracing/"
  30. static char license[128];
  31. static int kern_version;
  32. static bool processed_sec[128];
  33. char bpf_log_buf[BPF_LOG_BUF_SIZE];
  34. int map_fd[MAX_MAPS];
  35. int prog_fd[MAX_PROGS];
  36. int event_fd[MAX_PROGS];
  37. int prog_cnt;
  38. int prog_array_fd = -1;
  39. struct bpf_map_data map_data[MAX_MAPS];
  40. int map_data_count = 0;
  41. static int populate_prog_array(const char *event, int prog_fd)
  42. {
  43. int ind = atoi(event), err;
  44. err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
  45. if (err < 0) {
  46. printf("failed to store prog_fd in prog_array\n");
  47. return -1;
  48. }
  49. return 0;
  50. }
  51. static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
  52. {
  53. bool is_socket = strncmp(event, "socket", 6) == 0;
  54. bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
  55. bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
  56. bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
  57. bool is_xdp = strncmp(event, "xdp", 3) == 0;
  58. bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
  59. bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
  60. bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
  61. bool is_sockops = strncmp(event, "sockops", 7) == 0;
  62. bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0;
  63. size_t insns_cnt = size / sizeof(struct bpf_insn);
  64. enum bpf_prog_type prog_type;
  65. char buf[256];
  66. int fd, efd, err, id;
  67. struct perf_event_attr attr = {};
  68. attr.type = PERF_TYPE_TRACEPOINT;
  69. attr.sample_type = PERF_SAMPLE_RAW;
  70. attr.sample_period = 1;
  71. attr.wakeup_events = 1;
  72. if (is_socket) {
  73. prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
  74. } else if (is_kprobe || is_kretprobe) {
  75. prog_type = BPF_PROG_TYPE_KPROBE;
  76. } else if (is_tracepoint) {
  77. prog_type = BPF_PROG_TYPE_TRACEPOINT;
  78. } else if (is_xdp) {
  79. prog_type = BPF_PROG_TYPE_XDP;
  80. } else if (is_perf_event) {
  81. prog_type = BPF_PROG_TYPE_PERF_EVENT;
  82. } else if (is_cgroup_skb) {
  83. prog_type = BPF_PROG_TYPE_CGROUP_SKB;
  84. } else if (is_cgroup_sk) {
  85. prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
  86. } else if (is_sockops) {
  87. prog_type = BPF_PROG_TYPE_SOCK_OPS;
  88. } else if (is_sk_skb) {
  89. prog_type = BPF_PROG_TYPE_SK_SKB;
  90. } else {
  91. printf("Unknown event '%s'\n", event);
  92. return -1;
  93. }
  94. fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
  95. bpf_log_buf, BPF_LOG_BUF_SIZE);
  96. if (fd < 0) {
  97. printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf);
  98. return -1;
  99. }
  100. prog_fd[prog_cnt++] = fd;
  101. if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
  102. return 0;
  103. if (is_socket || is_sockops || is_sk_skb) {
  104. if (is_socket)
  105. event += 6;
  106. else
  107. event += 7;
  108. if (*event != '/')
  109. return 0;
  110. event++;
  111. if (!isdigit(*event)) {
  112. printf("invalid prog number\n");
  113. return -1;
  114. }
  115. return populate_prog_array(event, fd);
  116. }
  117. if (is_kprobe || is_kretprobe) {
  118. if (is_kprobe)
  119. event += 7;
  120. else
  121. event += 10;
  122. if (*event == 0) {
  123. printf("event name cannot be empty\n");
  124. return -1;
  125. }
  126. if (isdigit(*event))
  127. return populate_prog_array(event, fd);
  128. snprintf(buf, sizeof(buf),
  129. "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
  130. is_kprobe ? 'p' : 'r', event, event);
  131. err = system(buf);
  132. if (err < 0) {
  133. printf("failed to create kprobe '%s' error '%s'\n",
  134. event, strerror(errno));
  135. return -1;
  136. }
  137. strcpy(buf, DEBUGFS);
  138. strcat(buf, "events/kprobes/");
  139. strcat(buf, event);
  140. strcat(buf, "/id");
  141. } else if (is_tracepoint) {
  142. event += 11;
  143. if (*event == 0) {
  144. printf("event name cannot be empty\n");
  145. return -1;
  146. }
  147. strcpy(buf, DEBUGFS);
  148. strcat(buf, "events/");
  149. strcat(buf, event);
  150. strcat(buf, "/id");
  151. }
  152. efd = open(buf, O_RDONLY, 0);
  153. if (efd < 0) {
  154. printf("failed to open event %s\n", event);
  155. return -1;
  156. }
  157. err = read(efd, buf, sizeof(buf));
  158. if (err < 0 || err >= sizeof(buf)) {
  159. printf("read from '%s' failed '%s'\n", event, strerror(errno));
  160. return -1;
  161. }
  162. close(efd);
  163. buf[err] = 0;
  164. id = atoi(buf);
  165. attr.config = id;
  166. efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
  167. if (efd < 0) {
  168. printf("event %d fd %d err %s\n", id, efd, strerror(errno));
  169. return -1;
  170. }
  171. event_fd[prog_cnt - 1] = efd;
  172. ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
  173. ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
  174. return 0;
  175. }
  176. static int load_maps(struct bpf_map_data *maps, int nr_maps,
  177. fixup_map_cb fixup_map)
  178. {
  179. int i, numa_node;
  180. for (i = 0; i < nr_maps; i++) {
  181. if (fixup_map) {
  182. fixup_map(&maps[i], i);
  183. /* Allow userspace to assign map FD prior to creation */
  184. if (maps[i].fd != -1) {
  185. map_fd[i] = maps[i].fd;
  186. continue;
  187. }
  188. }
  189. numa_node = maps[i].def.map_flags & BPF_F_NUMA_NODE ?
  190. maps[i].def.numa_node : -1;
  191. if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
  192. maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
  193. int inner_map_fd = map_fd[maps[i].def.inner_map_idx];
  194. map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type,
  195. maps[i].name,
  196. maps[i].def.key_size,
  197. inner_map_fd,
  198. maps[i].def.max_entries,
  199. maps[i].def.map_flags,
  200. numa_node);
  201. } else {
  202. map_fd[i] = bpf_create_map_node(maps[i].def.type,
  203. maps[i].name,
  204. maps[i].def.key_size,
  205. maps[i].def.value_size,
  206. maps[i].def.max_entries,
  207. maps[i].def.map_flags,
  208. numa_node);
  209. }
  210. if (map_fd[i] < 0) {
  211. printf("failed to create a map: %d %s\n",
  212. errno, strerror(errno));
  213. return 1;
  214. }
  215. maps[i].fd = map_fd[i];
  216. if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY)
  217. prog_array_fd = map_fd[i];
  218. }
  219. return 0;
  220. }
  221. static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
  222. GElf_Shdr *shdr, Elf_Data **data)
  223. {
  224. Elf_Scn *scn;
  225. scn = elf_getscn(elf, i);
  226. if (!scn)
  227. return 1;
  228. if (gelf_getshdr(scn, shdr) != shdr)
  229. return 2;
  230. *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name);
  231. if (!*shname || !shdr->sh_size)
  232. return 3;
  233. *data = elf_getdata(scn, 0);
  234. if (!*data || elf_getdata(scn, *data) != NULL)
  235. return 4;
  236. return 0;
  237. }
  238. static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
  239. GElf_Shdr *shdr, struct bpf_insn *insn,
  240. struct bpf_map_data *maps, int nr_maps)
  241. {
  242. int i, nrels;
  243. nrels = shdr->sh_size / shdr->sh_entsize;
  244. for (i = 0; i < nrels; i++) {
  245. GElf_Sym sym;
  246. GElf_Rel rel;
  247. unsigned int insn_idx;
  248. bool match = false;
  249. int j, map_idx;
  250. gelf_getrel(data, i, &rel);
  251. insn_idx = rel.r_offset / sizeof(struct bpf_insn);
  252. gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym);
  253. if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
  254. printf("invalid relo for insn[%d].code 0x%x\n",
  255. insn_idx, insn[insn_idx].code);
  256. return 1;
  257. }
  258. insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
  259. /* Match FD relocation against recorded map_data[] offset */
  260. for (map_idx = 0; map_idx < nr_maps; map_idx++) {
  261. if (maps[map_idx].elf_offset == sym.st_value) {
  262. match = true;
  263. break;
  264. }
  265. }
  266. if (match) {
  267. insn[insn_idx].imm = maps[map_idx].fd;
  268. } else {
  269. printf("invalid relo for insn[%d] no map_data match\n",
  270. insn_idx);
  271. return 1;
  272. }
  273. }
  274. return 0;
  275. }
  276. static int cmp_symbols(const void *l, const void *r)
  277. {
  278. const GElf_Sym *lsym = (const GElf_Sym *)l;
  279. const GElf_Sym *rsym = (const GElf_Sym *)r;
  280. if (lsym->st_value < rsym->st_value)
  281. return -1;
  282. else if (lsym->st_value > rsym->st_value)
  283. return 1;
  284. else
  285. return 0;
  286. }
  287. static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
  288. Elf *elf, Elf_Data *symbols, int strtabidx)
  289. {
  290. int map_sz_elf, map_sz_copy;
  291. bool validate_zero = false;
  292. Elf_Data *data_maps;
  293. int i, nr_maps;
  294. GElf_Sym *sym;
  295. Elf_Scn *scn;
  296. int copy_sz;
  297. if (maps_shndx < 0)
  298. return -EINVAL;
  299. if (!symbols)
  300. return -EINVAL;
  301. /* Get data for maps section via elf index */
  302. scn = elf_getscn(elf, maps_shndx);
  303. if (scn)
  304. data_maps = elf_getdata(scn, NULL);
  305. if (!scn || !data_maps) {
  306. printf("Failed to get Elf_Data from maps section %d\n",
  307. maps_shndx);
  308. return -EINVAL;
  309. }
  310. /* For each map get corrosponding symbol table entry */
  311. sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym));
  312. for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
  313. assert(nr_maps < MAX_MAPS+1);
  314. if (!gelf_getsym(symbols, i, &sym[nr_maps]))
  315. continue;
  316. if (sym[nr_maps].st_shndx != maps_shndx)
  317. continue;
  318. /* Only increment iif maps section */
  319. nr_maps++;
  320. }
  321. /* Align to map_fd[] order, via sort on offset in sym.st_value */
  322. qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols);
  323. /* Keeping compatible with ELF maps section changes
  324. * ------------------------------------------------
  325. * The program size of struct bpf_map_def is known by loader
  326. * code, but struct stored in ELF file can be different.
  327. *
  328. * Unfortunately sym[i].st_size is zero. To calculate the
  329. * struct size stored in the ELF file, assume all struct have
  330. * the same size, and simply divide with number of map
  331. * symbols.
  332. */
  333. map_sz_elf = data_maps->d_size / nr_maps;
  334. map_sz_copy = sizeof(struct bpf_map_def);
  335. if (map_sz_elf < map_sz_copy) {
  336. /*
  337. * Backward compat, loading older ELF file with
  338. * smaller struct, keeping remaining bytes zero.
  339. */
  340. map_sz_copy = map_sz_elf;
  341. } else if (map_sz_elf > map_sz_copy) {
  342. /*
  343. * Forward compat, loading newer ELF file with larger
  344. * struct with unknown features. Assume zero means
  345. * feature not used. Thus, validate rest of struct
  346. * data is zero.
  347. */
  348. validate_zero = true;
  349. }
  350. /* Memcpy relevant part of ELF maps data to loader maps */
  351. for (i = 0; i < nr_maps; i++) {
  352. unsigned char *addr, *end;
  353. struct bpf_map_def *def;
  354. const char *map_name;
  355. size_t offset;
  356. map_name = elf_strptr(elf, strtabidx, sym[i].st_name);
  357. maps[i].name = strdup(map_name);
  358. if (!maps[i].name) {
  359. printf("strdup(%s): %s(%d)\n", map_name,
  360. strerror(errno), errno);
  361. free(sym);
  362. return -errno;
  363. }
  364. /* Symbol value is offset into ELF maps section data area */
  365. offset = sym[i].st_value;
  366. def = (struct bpf_map_def *)(data_maps->d_buf + offset);
  367. maps[i].elf_offset = offset;
  368. memset(&maps[i].def, 0, sizeof(struct bpf_map_def));
  369. memcpy(&maps[i].def, def, map_sz_copy);
  370. /* Verify no newer features were requested */
  371. if (validate_zero) {
  372. addr = (unsigned char*) def + map_sz_copy;
  373. end = (unsigned char*) def + map_sz_elf;
  374. for (; addr < end; addr++) {
  375. if (*addr != 0) {
  376. free(sym);
  377. return -EFBIG;
  378. }
  379. }
  380. }
  381. }
  382. free(sym);
  383. return nr_maps;
  384. }
  385. static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
  386. {
  387. int fd, i, ret, maps_shndx = -1, strtabidx = -1;
  388. Elf *elf;
  389. GElf_Ehdr ehdr;
  390. GElf_Shdr shdr, shdr_prog;
  391. Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL;
  392. char *shname, *shname_prog;
  393. int nr_maps = 0;
  394. /* reset global variables */
  395. kern_version = 0;
  396. memset(license, 0, sizeof(license));
  397. memset(processed_sec, 0, sizeof(processed_sec));
  398. if (elf_version(EV_CURRENT) == EV_NONE)
  399. return 1;
  400. fd = open(path, O_RDONLY, 0);
  401. if (fd < 0)
  402. return 1;
  403. elf = elf_begin(fd, ELF_C_READ, NULL);
  404. if (!elf)
  405. return 1;
  406. if (gelf_getehdr(elf, &ehdr) != &ehdr)
  407. return 1;
  408. /* clear all kprobes */
  409. i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
  410. /* scan over all elf sections to get license and map info */
  411. for (i = 1; i < ehdr.e_shnum; i++) {
  412. if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
  413. continue;
  414. if (0) /* helpful for llvm debugging */
  415. printf("section %d:%s data %p size %zd link %d flags %d\n",
  416. i, shname, data->d_buf, data->d_size,
  417. shdr.sh_link, (int) shdr.sh_flags);
  418. if (strcmp(shname, "license") == 0) {
  419. processed_sec[i] = true;
  420. memcpy(license, data->d_buf, data->d_size);
  421. } else if (strcmp(shname, "version") == 0) {
  422. processed_sec[i] = true;
  423. if (data->d_size != sizeof(int)) {
  424. printf("invalid size of version section %zd\n",
  425. data->d_size);
  426. return 1;
  427. }
  428. memcpy(&kern_version, data->d_buf, sizeof(int));
  429. } else if (strcmp(shname, "maps") == 0) {
  430. int j;
  431. maps_shndx = i;
  432. data_maps = data;
  433. for (j = 0; j < MAX_MAPS; j++)
  434. map_data[j].fd = -1;
  435. } else if (shdr.sh_type == SHT_SYMTAB) {
  436. strtabidx = shdr.sh_link;
  437. symbols = data;
  438. }
  439. }
  440. ret = 1;
  441. if (!symbols) {
  442. printf("missing SHT_SYMTAB section\n");
  443. goto done;
  444. }
  445. if (data_maps) {
  446. nr_maps = load_elf_maps_section(map_data, maps_shndx,
  447. elf, symbols, strtabidx);
  448. if (nr_maps < 0) {
  449. printf("Error: Failed loading ELF maps (errno:%d):%s\n",
  450. nr_maps, strerror(-nr_maps));
  451. ret = 1;
  452. goto done;
  453. }
  454. if (load_maps(map_data, nr_maps, fixup_map))
  455. goto done;
  456. map_data_count = nr_maps;
  457. processed_sec[maps_shndx] = true;
  458. }
  459. /* process all relo sections, and rewrite bpf insns for maps */
  460. for (i = 1; i < ehdr.e_shnum; i++) {
  461. if (processed_sec[i])
  462. continue;
  463. if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
  464. continue;
  465. if (shdr.sh_type == SHT_REL) {
  466. struct bpf_insn *insns;
  467. /* locate prog sec that need map fixup (relocations) */
  468. if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
  469. &shdr_prog, &data_prog))
  470. continue;
  471. if (shdr_prog.sh_type != SHT_PROGBITS ||
  472. !(shdr_prog.sh_flags & SHF_EXECINSTR))
  473. continue;
  474. insns = (struct bpf_insn *) data_prog->d_buf;
  475. processed_sec[i] = true; /* relo section */
  476. if (parse_relo_and_apply(data, symbols, &shdr, insns,
  477. map_data, nr_maps))
  478. continue;
  479. }
  480. }
  481. /* load programs */
  482. for (i = 1; i < ehdr.e_shnum; i++) {
  483. if (processed_sec[i])
  484. continue;
  485. if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
  486. continue;
  487. if (memcmp(shname, "kprobe/", 7) == 0 ||
  488. memcmp(shname, "kretprobe/", 10) == 0 ||
  489. memcmp(shname, "tracepoint/", 11) == 0 ||
  490. memcmp(shname, "xdp", 3) == 0 ||
  491. memcmp(shname, "perf_event", 10) == 0 ||
  492. memcmp(shname, "socket", 6) == 0 ||
  493. memcmp(shname, "cgroup/", 7) == 0 ||
  494. memcmp(shname, "sockops", 7) == 0 ||
  495. memcmp(shname, "sk_skb", 6) == 0) {
  496. ret = load_and_attach(shname, data->d_buf,
  497. data->d_size);
  498. if (ret != 0)
  499. goto done;
  500. }
  501. }
  502. ret = 0;
  503. done:
  504. close(fd);
  505. return ret;
  506. }
  507. int load_bpf_file(char *path)
  508. {
  509. return do_load_bpf_file(path, NULL);
  510. }
  511. int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map)
  512. {
  513. return do_load_bpf_file(path, fixup_map);
  514. }
  515. void read_trace_pipe(void)
  516. {
  517. int trace_fd;
  518. trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
  519. if (trace_fd < 0)
  520. return;
  521. while (1) {
  522. static char buf[4096];
  523. ssize_t sz;
  524. sz = read(trace_fd, buf, sizeof(buf));
  525. if (sz > 0) {
  526. buf[sz] = 0;
  527. puts(buf);
  528. }
  529. }
  530. }
  531. #define MAX_SYMS 300000
  532. static struct ksym syms[MAX_SYMS];
  533. static int sym_cnt;
  534. static int ksym_cmp(const void *p1, const void *p2)
  535. {
  536. return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
  537. }
  538. int load_kallsyms(void)
  539. {
  540. FILE *f = fopen("/proc/kallsyms", "r");
  541. char func[256], buf[256];
  542. char symbol;
  543. void *addr;
  544. int i = 0;
  545. if (!f)
  546. return -ENOENT;
  547. while (!feof(f)) {
  548. if (!fgets(buf, sizeof(buf), f))
  549. break;
  550. if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
  551. break;
  552. if (!addr)
  553. continue;
  554. syms[i].addr = (long) addr;
  555. syms[i].name = strdup(func);
  556. i++;
  557. }
  558. sym_cnt = i;
  559. qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
  560. return 0;
  561. }
  562. struct ksym *ksym_search(long key)
  563. {
  564. int start = 0, end = sym_cnt;
  565. int result;
  566. while (start < end) {
  567. size_t mid = start + (end - start) / 2;
  568. result = key - syms[mid].addr;
  569. if (result < 0)
  570. end = mid;
  571. else if (result > 0)
  572. start = mid + 1;
  573. else
  574. return &syms[mid];
  575. }
  576. if (start >= 1 && syms[start - 1].addr < key &&
  577. key < syms[start].addr)
  578. /* valid ksym */
  579. return &syms[start - 1];
  580. /* out of range. return _stext */
  581. return &syms[0];
  582. }
  583. int set_link_xdp_fd(int ifindex, int fd, __u32 flags)
  584. {
  585. struct sockaddr_nl sa;
  586. int sock, seq = 0, len, ret = -1;
  587. char buf[4096];
  588. struct nlattr *nla, *nla_xdp;
  589. struct {
  590. struct nlmsghdr nh;
  591. struct ifinfomsg ifinfo;
  592. char attrbuf[64];
  593. } req;
  594. struct nlmsghdr *nh;
  595. struct nlmsgerr *err;
  596. memset(&sa, 0, sizeof(sa));
  597. sa.nl_family = AF_NETLINK;
  598. sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  599. if (sock < 0) {
  600. printf("open netlink socket: %s\n", strerror(errno));
  601. return -1;
  602. }
  603. if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
  604. printf("bind to netlink: %s\n", strerror(errno));
  605. goto cleanup;
  606. }
  607. memset(&req, 0, sizeof(req));
  608. req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
  609. req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
  610. req.nh.nlmsg_type = RTM_SETLINK;
  611. req.nh.nlmsg_pid = 0;
  612. req.nh.nlmsg_seq = ++seq;
  613. req.ifinfo.ifi_family = AF_UNSPEC;
  614. req.ifinfo.ifi_index = ifindex;
  615. /* started nested attribute for XDP */
  616. nla = (struct nlattr *)(((char *)&req)
  617. + NLMSG_ALIGN(req.nh.nlmsg_len));
  618. nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
  619. nla->nla_len = NLA_HDRLEN;
  620. /* add XDP fd */
  621. nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
  622. nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
  623. nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
  624. memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
  625. nla->nla_len += nla_xdp->nla_len;
  626. /* if user passed in any flags, add those too */
  627. if (flags) {
  628. nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
  629. nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
  630. nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
  631. memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
  632. nla->nla_len += nla_xdp->nla_len;
  633. }
  634. req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
  635. if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
  636. printf("send to netlink: %s\n", strerror(errno));
  637. goto cleanup;
  638. }
  639. len = recv(sock, buf, sizeof(buf), 0);
  640. if (len < 0) {
  641. printf("recv from netlink: %s\n", strerror(errno));
  642. goto cleanup;
  643. }
  644. for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
  645. nh = NLMSG_NEXT(nh, len)) {
  646. if (nh->nlmsg_pid != getpid()) {
  647. printf("Wrong pid %d, expected %d\n",
  648. nh->nlmsg_pid, getpid());
  649. goto cleanup;
  650. }
  651. if (nh->nlmsg_seq != seq) {
  652. printf("Wrong seq %d, expected %d\n",
  653. nh->nlmsg_seq, seq);
  654. goto cleanup;
  655. }
  656. switch (nh->nlmsg_type) {
  657. case NLMSG_ERROR:
  658. err = (struct nlmsgerr *)NLMSG_DATA(nh);
  659. if (!err->error)
  660. continue;
  661. printf("nlmsg error %s\n", strerror(-err->error));
  662. goto cleanup;
  663. case NLMSG_DONE:
  664. break;
  665. }
  666. }
  667. ret = 0;
  668. cleanup:
  669. close(sock);
  670. return ret;
  671. }