bpf_load.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777
  1. #include <stdio.h>
  2. #include <sys/types.h>
  3. #include <sys/stat.h>
  4. #include <fcntl.h>
  5. #include <libelf.h>
  6. #include <gelf.h>
  7. #include <errno.h>
  8. #include <unistd.h>
  9. #include <string.h>
  10. #include <stdbool.h>
  11. #include <stdlib.h>
  12. #include <linux/bpf.h>
  13. #include <linux/filter.h>
  14. #include <linux/perf_event.h>
  15. #include <linux/netlink.h>
  16. #include <linux/rtnetlink.h>
  17. #include <linux/types.h>
  18. #include <sys/types.h>
  19. #include <sys/socket.h>
  20. #include <sys/syscall.h>
  21. #include <sys/ioctl.h>
  22. #include <sys/mman.h>
  23. #include <poll.h>
  24. #include <ctype.h>
  25. #include <assert.h>
  26. #include "libbpf.h"
  27. #include "bpf_load.h"
  28. #include "perf-sys.h"
  29. #define DEBUGFS "/sys/kernel/debug/tracing/"
  30. static char license[128];
  31. static int kern_version;
  32. static bool processed_sec[128];
  33. char bpf_log_buf[BPF_LOG_BUF_SIZE];
  34. int map_fd[MAX_MAPS];
  35. int prog_fd[MAX_PROGS];
  36. int event_fd[MAX_PROGS];
  37. int prog_cnt;
  38. int prog_array_fd = -1;
  39. struct bpf_map_data map_data[MAX_MAPS];
  40. int map_data_count = 0;
  41. static int populate_prog_array(const char *event, int prog_fd)
  42. {
  43. int ind = atoi(event), err;
  44. err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
  45. if (err < 0) {
  46. printf("failed to store prog_fd in prog_array\n");
  47. return -1;
  48. }
  49. return 0;
  50. }
  51. static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
  52. {
  53. bool is_socket = strncmp(event, "socket", 6) == 0;
  54. bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
  55. bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
  56. bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
  57. bool is_xdp = strncmp(event, "xdp", 3) == 0;
  58. bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
  59. bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
  60. bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
  61. bool is_sockops = strncmp(event, "sockops", 7) == 0;
  62. size_t insns_cnt = size / sizeof(struct bpf_insn);
  63. enum bpf_prog_type prog_type;
  64. char buf[256];
  65. int fd, efd, err, id;
  66. struct perf_event_attr attr = {};
  67. attr.type = PERF_TYPE_TRACEPOINT;
  68. attr.sample_type = PERF_SAMPLE_RAW;
  69. attr.sample_period = 1;
  70. attr.wakeup_events = 1;
  71. if (is_socket) {
  72. prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
  73. } else if (is_kprobe || is_kretprobe) {
  74. prog_type = BPF_PROG_TYPE_KPROBE;
  75. } else if (is_tracepoint) {
  76. prog_type = BPF_PROG_TYPE_TRACEPOINT;
  77. } else if (is_xdp) {
  78. prog_type = BPF_PROG_TYPE_XDP;
  79. } else if (is_perf_event) {
  80. prog_type = BPF_PROG_TYPE_PERF_EVENT;
  81. } else if (is_cgroup_skb) {
  82. prog_type = BPF_PROG_TYPE_CGROUP_SKB;
  83. } else if (is_cgroup_sk) {
  84. prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
  85. } else if (is_sockops) {
  86. prog_type = BPF_PROG_TYPE_SOCK_OPS;
  87. } else {
  88. printf("Unknown event '%s'\n", event);
  89. return -1;
  90. }
  91. fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
  92. bpf_log_buf, BPF_LOG_BUF_SIZE);
  93. if (fd < 0) {
  94. printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf);
  95. return -1;
  96. }
  97. prog_fd[prog_cnt++] = fd;
  98. if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
  99. return 0;
  100. if (is_socket || is_sockops) {
  101. if (is_socket)
  102. event += 6;
  103. else
  104. event += 7;
  105. if (*event != '/')
  106. return 0;
  107. event++;
  108. if (!isdigit(*event)) {
  109. printf("invalid prog number\n");
  110. return -1;
  111. }
  112. return populate_prog_array(event, fd);
  113. }
  114. if (is_kprobe || is_kretprobe) {
  115. if (is_kprobe)
  116. event += 7;
  117. else
  118. event += 10;
  119. if (*event == 0) {
  120. printf("event name cannot be empty\n");
  121. return -1;
  122. }
  123. if (isdigit(*event))
  124. return populate_prog_array(event, fd);
  125. snprintf(buf, sizeof(buf),
  126. "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
  127. is_kprobe ? 'p' : 'r', event, event);
  128. err = system(buf);
  129. if (err < 0) {
  130. printf("failed to create kprobe '%s' error '%s'\n",
  131. event, strerror(errno));
  132. return -1;
  133. }
  134. strcpy(buf, DEBUGFS);
  135. strcat(buf, "events/kprobes/");
  136. strcat(buf, event);
  137. strcat(buf, "/id");
  138. } else if (is_tracepoint) {
  139. event += 11;
  140. if (*event == 0) {
  141. printf("event name cannot be empty\n");
  142. return -1;
  143. }
  144. strcpy(buf, DEBUGFS);
  145. strcat(buf, "events/");
  146. strcat(buf, event);
  147. strcat(buf, "/id");
  148. }
  149. efd = open(buf, O_RDONLY, 0);
  150. if (efd < 0) {
  151. printf("failed to open event %s\n", event);
  152. return -1;
  153. }
  154. err = read(efd, buf, sizeof(buf));
  155. if (err < 0 || err >= sizeof(buf)) {
  156. printf("read from '%s' failed '%s'\n", event, strerror(errno));
  157. return -1;
  158. }
  159. close(efd);
  160. buf[err] = 0;
  161. id = atoi(buf);
  162. attr.config = id;
  163. efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
  164. if (efd < 0) {
  165. printf("event %d fd %d err %s\n", id, efd, strerror(errno));
  166. return -1;
  167. }
  168. event_fd[prog_cnt - 1] = efd;
  169. ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
  170. ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
  171. return 0;
  172. }
  173. static int load_maps(struct bpf_map_data *maps, int nr_maps,
  174. fixup_map_cb fixup_map)
  175. {
  176. int i;
  177. for (i = 0; i < nr_maps; i++) {
  178. if (fixup_map) {
  179. fixup_map(&maps[i], i);
  180. /* Allow userspace to assign map FD prior to creation */
  181. if (maps[i].fd != -1) {
  182. map_fd[i] = maps[i].fd;
  183. continue;
  184. }
  185. }
  186. if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
  187. maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
  188. int inner_map_fd = map_fd[maps[i].def.inner_map_idx];
  189. map_fd[i] = bpf_create_map_in_map(maps[i].def.type,
  190. maps[i].def.key_size,
  191. inner_map_fd,
  192. maps[i].def.max_entries,
  193. maps[i].def.map_flags);
  194. } else {
  195. map_fd[i] = bpf_create_map(maps[i].def.type,
  196. maps[i].def.key_size,
  197. maps[i].def.value_size,
  198. maps[i].def.max_entries,
  199. maps[i].def.map_flags);
  200. }
  201. if (map_fd[i] < 0) {
  202. printf("failed to create a map: %d %s\n",
  203. errno, strerror(errno));
  204. return 1;
  205. }
  206. maps[i].fd = map_fd[i];
  207. if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY)
  208. prog_array_fd = map_fd[i];
  209. }
  210. return 0;
  211. }
  212. static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
  213. GElf_Shdr *shdr, Elf_Data **data)
  214. {
  215. Elf_Scn *scn;
  216. scn = elf_getscn(elf, i);
  217. if (!scn)
  218. return 1;
  219. if (gelf_getshdr(scn, shdr) != shdr)
  220. return 2;
  221. *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name);
  222. if (!*shname || !shdr->sh_size)
  223. return 3;
  224. *data = elf_getdata(scn, 0);
  225. if (!*data || elf_getdata(scn, *data) != NULL)
  226. return 4;
  227. return 0;
  228. }
  229. static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
  230. GElf_Shdr *shdr, struct bpf_insn *insn,
  231. struct bpf_map_data *maps, int nr_maps)
  232. {
  233. int i, nrels;
  234. nrels = shdr->sh_size / shdr->sh_entsize;
  235. for (i = 0; i < nrels; i++) {
  236. GElf_Sym sym;
  237. GElf_Rel rel;
  238. unsigned int insn_idx;
  239. bool match = false;
  240. int j, map_idx;
  241. gelf_getrel(data, i, &rel);
  242. insn_idx = rel.r_offset / sizeof(struct bpf_insn);
  243. gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym);
  244. if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
  245. printf("invalid relo for insn[%d].code 0x%x\n",
  246. insn_idx, insn[insn_idx].code);
  247. return 1;
  248. }
  249. insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
  250. /* Match FD relocation against recorded map_data[] offset */
  251. for (map_idx = 0; map_idx < nr_maps; map_idx++) {
  252. if (maps[map_idx].elf_offset == sym.st_value) {
  253. match = true;
  254. break;
  255. }
  256. }
  257. if (match) {
  258. insn[insn_idx].imm = maps[map_idx].fd;
  259. } else {
  260. printf("invalid relo for insn[%d] no map_data match\n",
  261. insn_idx);
  262. return 1;
  263. }
  264. }
  265. return 0;
  266. }
  267. static int cmp_symbols(const void *l, const void *r)
  268. {
  269. const GElf_Sym *lsym = (const GElf_Sym *)l;
  270. const GElf_Sym *rsym = (const GElf_Sym *)r;
  271. if (lsym->st_value < rsym->st_value)
  272. return -1;
  273. else if (lsym->st_value > rsym->st_value)
  274. return 1;
  275. else
  276. return 0;
  277. }
  278. static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
  279. Elf *elf, Elf_Data *symbols, int strtabidx)
  280. {
  281. int map_sz_elf, map_sz_copy;
  282. bool validate_zero = false;
  283. Elf_Data *data_maps;
  284. int i, nr_maps;
  285. GElf_Sym *sym;
  286. Elf_Scn *scn;
  287. int copy_sz;
  288. if (maps_shndx < 0)
  289. return -EINVAL;
  290. if (!symbols)
  291. return -EINVAL;
  292. /* Get data for maps section via elf index */
  293. scn = elf_getscn(elf, maps_shndx);
  294. if (scn)
  295. data_maps = elf_getdata(scn, NULL);
  296. if (!scn || !data_maps) {
  297. printf("Failed to get Elf_Data from maps section %d\n",
  298. maps_shndx);
  299. return -EINVAL;
  300. }
  301. /* For each map get corrosponding symbol table entry */
  302. sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym));
  303. for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
  304. assert(nr_maps < MAX_MAPS+1);
  305. if (!gelf_getsym(symbols, i, &sym[nr_maps]))
  306. continue;
  307. if (sym[nr_maps].st_shndx != maps_shndx)
  308. continue;
  309. /* Only increment iif maps section */
  310. nr_maps++;
  311. }
  312. /* Align to map_fd[] order, via sort on offset in sym.st_value */
  313. qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols);
  314. /* Keeping compatible with ELF maps section changes
  315. * ------------------------------------------------
  316. * The program size of struct bpf_map_def is known by loader
  317. * code, but struct stored in ELF file can be different.
  318. *
  319. * Unfortunately sym[i].st_size is zero. To calculate the
  320. * struct size stored in the ELF file, assume all struct have
  321. * the same size, and simply divide with number of map
  322. * symbols.
  323. */
  324. map_sz_elf = data_maps->d_size / nr_maps;
  325. map_sz_copy = sizeof(struct bpf_map_def);
  326. if (map_sz_elf < map_sz_copy) {
  327. /*
  328. * Backward compat, loading older ELF file with
  329. * smaller struct, keeping remaining bytes zero.
  330. */
  331. map_sz_copy = map_sz_elf;
  332. } else if (map_sz_elf > map_sz_copy) {
  333. /*
  334. * Forward compat, loading newer ELF file with larger
  335. * struct with unknown features. Assume zero means
  336. * feature not used. Thus, validate rest of struct
  337. * data is zero.
  338. */
  339. validate_zero = true;
  340. }
  341. /* Memcpy relevant part of ELF maps data to loader maps */
  342. for (i = 0; i < nr_maps; i++) {
  343. unsigned char *addr, *end;
  344. struct bpf_map_def *def;
  345. const char *map_name;
  346. size_t offset;
  347. map_name = elf_strptr(elf, strtabidx, sym[i].st_name);
  348. maps[i].name = strdup(map_name);
  349. if (!maps[i].name) {
  350. printf("strdup(%s): %s(%d)\n", map_name,
  351. strerror(errno), errno);
  352. free(sym);
  353. return -errno;
  354. }
  355. /* Symbol value is offset into ELF maps section data area */
  356. offset = sym[i].st_value;
  357. def = (struct bpf_map_def *)(data_maps->d_buf + offset);
  358. maps[i].elf_offset = offset;
  359. memset(&maps[i].def, 0, sizeof(struct bpf_map_def));
  360. memcpy(&maps[i].def, def, map_sz_copy);
  361. /* Verify no newer features were requested */
  362. if (validate_zero) {
  363. addr = (unsigned char*) def + map_sz_copy;
  364. end = (unsigned char*) def + map_sz_elf;
  365. for (; addr < end; addr++) {
  366. if (*addr != 0) {
  367. free(sym);
  368. return -EFBIG;
  369. }
  370. }
  371. }
  372. }
  373. free(sym);
  374. return nr_maps;
  375. }
  376. static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
  377. {
  378. int fd, i, ret, maps_shndx = -1, strtabidx = -1;
  379. Elf *elf;
  380. GElf_Ehdr ehdr;
  381. GElf_Shdr shdr, shdr_prog;
  382. Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL;
  383. char *shname, *shname_prog;
  384. int nr_maps = 0;
  385. /* reset global variables */
  386. kern_version = 0;
  387. memset(license, 0, sizeof(license));
  388. memset(processed_sec, 0, sizeof(processed_sec));
  389. if (elf_version(EV_CURRENT) == EV_NONE)
  390. return 1;
  391. fd = open(path, O_RDONLY, 0);
  392. if (fd < 0)
  393. return 1;
  394. elf = elf_begin(fd, ELF_C_READ, NULL);
  395. if (!elf)
  396. return 1;
  397. if (gelf_getehdr(elf, &ehdr) != &ehdr)
  398. return 1;
  399. /* clear all kprobes */
  400. i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
  401. /* scan over all elf sections to get license and map info */
  402. for (i = 1; i < ehdr.e_shnum; i++) {
  403. if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
  404. continue;
  405. if (0) /* helpful for llvm debugging */
  406. printf("section %d:%s data %p size %zd link %d flags %d\n",
  407. i, shname, data->d_buf, data->d_size,
  408. shdr.sh_link, (int) shdr.sh_flags);
  409. if (strcmp(shname, "license") == 0) {
  410. processed_sec[i] = true;
  411. memcpy(license, data->d_buf, data->d_size);
  412. } else if (strcmp(shname, "version") == 0) {
  413. processed_sec[i] = true;
  414. if (data->d_size != sizeof(int)) {
  415. printf("invalid size of version section %zd\n",
  416. data->d_size);
  417. return 1;
  418. }
  419. memcpy(&kern_version, data->d_buf, sizeof(int));
  420. } else if (strcmp(shname, "maps") == 0) {
  421. int j;
  422. maps_shndx = i;
  423. data_maps = data;
  424. for (j = 0; j < MAX_MAPS; j++)
  425. map_data[j].fd = -1;
  426. } else if (shdr.sh_type == SHT_SYMTAB) {
  427. strtabidx = shdr.sh_link;
  428. symbols = data;
  429. }
  430. }
  431. ret = 1;
  432. if (!symbols) {
  433. printf("missing SHT_SYMTAB section\n");
  434. goto done;
  435. }
  436. if (data_maps) {
  437. nr_maps = load_elf_maps_section(map_data, maps_shndx,
  438. elf, symbols, strtabidx);
  439. if (nr_maps < 0) {
  440. printf("Error: Failed loading ELF maps (errno:%d):%s\n",
  441. nr_maps, strerror(-nr_maps));
  442. ret = 1;
  443. goto done;
  444. }
  445. if (load_maps(map_data, nr_maps, fixup_map))
  446. goto done;
  447. map_data_count = nr_maps;
  448. processed_sec[maps_shndx] = true;
  449. }
  450. /* process all relo sections, and rewrite bpf insns for maps */
  451. for (i = 1; i < ehdr.e_shnum; i++) {
  452. if (processed_sec[i])
  453. continue;
  454. if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
  455. continue;
  456. if (shdr.sh_type == SHT_REL) {
  457. struct bpf_insn *insns;
  458. /* locate prog sec that need map fixup (relocations) */
  459. if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
  460. &shdr_prog, &data_prog))
  461. continue;
  462. if (shdr_prog.sh_type != SHT_PROGBITS ||
  463. !(shdr_prog.sh_flags & SHF_EXECINSTR))
  464. continue;
  465. insns = (struct bpf_insn *) data_prog->d_buf;
  466. processed_sec[i] = true; /* relo section */
  467. if (parse_relo_and_apply(data, symbols, &shdr, insns,
  468. map_data, nr_maps))
  469. continue;
  470. }
  471. }
  472. /* load programs */
  473. for (i = 1; i < ehdr.e_shnum; i++) {
  474. if (processed_sec[i])
  475. continue;
  476. if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
  477. continue;
  478. if (memcmp(shname, "kprobe/", 7) == 0 ||
  479. memcmp(shname, "kretprobe/", 10) == 0 ||
  480. memcmp(shname, "tracepoint/", 11) == 0 ||
  481. memcmp(shname, "xdp", 3) == 0 ||
  482. memcmp(shname, "perf_event", 10) == 0 ||
  483. memcmp(shname, "socket", 6) == 0 ||
  484. memcmp(shname, "cgroup/", 7) == 0 ||
  485. memcmp(shname, "sockops", 7) == 0) {
  486. ret = load_and_attach(shname, data->d_buf,
  487. data->d_size);
  488. if (ret != 0)
  489. goto done;
  490. }
  491. }
  492. ret = 0;
  493. done:
  494. close(fd);
  495. return ret;
  496. }
  497. int load_bpf_file(char *path)
  498. {
  499. return do_load_bpf_file(path, NULL);
  500. }
  501. int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map)
  502. {
  503. return do_load_bpf_file(path, fixup_map);
  504. }
  505. void read_trace_pipe(void)
  506. {
  507. int trace_fd;
  508. trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
  509. if (trace_fd < 0)
  510. return;
  511. while (1) {
  512. static char buf[4096];
  513. ssize_t sz;
  514. sz = read(trace_fd, buf, sizeof(buf));
  515. if (sz > 0) {
  516. buf[sz] = 0;
  517. puts(buf);
  518. }
  519. }
  520. }
  521. #define MAX_SYMS 300000
  522. static struct ksym syms[MAX_SYMS];
  523. static int sym_cnt;
  524. static int ksym_cmp(const void *p1, const void *p2)
  525. {
  526. return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
  527. }
  528. int load_kallsyms(void)
  529. {
  530. FILE *f = fopen("/proc/kallsyms", "r");
  531. char func[256], buf[256];
  532. char symbol;
  533. void *addr;
  534. int i = 0;
  535. if (!f)
  536. return -ENOENT;
  537. while (!feof(f)) {
  538. if (!fgets(buf, sizeof(buf), f))
  539. break;
  540. if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
  541. break;
  542. if (!addr)
  543. continue;
  544. syms[i].addr = (long) addr;
  545. syms[i].name = strdup(func);
  546. i++;
  547. }
  548. sym_cnt = i;
  549. qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
  550. return 0;
  551. }
  552. struct ksym *ksym_search(long key)
  553. {
  554. int start = 0, end = sym_cnt;
  555. int result;
  556. while (start < end) {
  557. size_t mid = start + (end - start) / 2;
  558. result = key - syms[mid].addr;
  559. if (result < 0)
  560. end = mid;
  561. else if (result > 0)
  562. start = mid + 1;
  563. else
  564. return &syms[mid];
  565. }
  566. if (start >= 1 && syms[start - 1].addr < key &&
  567. key < syms[start].addr)
  568. /* valid ksym */
  569. return &syms[start - 1];
  570. /* out of range. return _stext */
  571. return &syms[0];
  572. }
  573. int set_link_xdp_fd(int ifindex, int fd, __u32 flags)
  574. {
  575. struct sockaddr_nl sa;
  576. int sock, seq = 0, len, ret = -1;
  577. char buf[4096];
  578. struct nlattr *nla, *nla_xdp;
  579. struct {
  580. struct nlmsghdr nh;
  581. struct ifinfomsg ifinfo;
  582. char attrbuf[64];
  583. } req;
  584. struct nlmsghdr *nh;
  585. struct nlmsgerr *err;
  586. memset(&sa, 0, sizeof(sa));
  587. sa.nl_family = AF_NETLINK;
  588. sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  589. if (sock < 0) {
  590. printf("open netlink socket: %s\n", strerror(errno));
  591. return -1;
  592. }
  593. if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
  594. printf("bind to netlink: %s\n", strerror(errno));
  595. goto cleanup;
  596. }
  597. memset(&req, 0, sizeof(req));
  598. req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
  599. req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
  600. req.nh.nlmsg_type = RTM_SETLINK;
  601. req.nh.nlmsg_pid = 0;
  602. req.nh.nlmsg_seq = ++seq;
  603. req.ifinfo.ifi_family = AF_UNSPEC;
  604. req.ifinfo.ifi_index = ifindex;
  605. /* started nested attribute for XDP */
  606. nla = (struct nlattr *)(((char *)&req)
  607. + NLMSG_ALIGN(req.nh.nlmsg_len));
  608. nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
  609. nla->nla_len = NLA_HDRLEN;
  610. /* add XDP fd */
  611. nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
  612. nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
  613. nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
  614. memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
  615. nla->nla_len += nla_xdp->nla_len;
  616. /* if user passed in any flags, add those too */
  617. if (flags) {
  618. nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
  619. nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
  620. nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
  621. memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
  622. nla->nla_len += nla_xdp->nla_len;
  623. }
  624. req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
  625. if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
  626. printf("send to netlink: %s\n", strerror(errno));
  627. goto cleanup;
  628. }
  629. len = recv(sock, buf, sizeof(buf), 0);
  630. if (len < 0) {
  631. printf("recv from netlink: %s\n", strerror(errno));
  632. goto cleanup;
  633. }
  634. for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
  635. nh = NLMSG_NEXT(nh, len)) {
  636. if (nh->nlmsg_pid != getpid()) {
  637. printf("Wrong pid %d, expected %d\n",
  638. nh->nlmsg_pid, getpid());
  639. goto cleanup;
  640. }
  641. if (nh->nlmsg_seq != seq) {
  642. printf("Wrong seq %d, expected %d\n",
  643. nh->nlmsg_seq, seq);
  644. goto cleanup;
  645. }
  646. switch (nh->nlmsg_type) {
  647. case NLMSG_ERROR:
  648. err = (struct nlmsgerr *)NLMSG_DATA(nh);
  649. if (!err->error)
  650. continue;
  651. printf("nlmsg error %s\n", strerror(-err->error));
  652. goto cleanup;
  653. case NLMSG_DONE:
  654. break;
  655. }
  656. }
  657. ret = 0;
  658. cleanup:
  659. close(sock);
  660. return ret;
  661. }