bpf_load.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. #include <stdio.h>
  2. #include <sys/types.h>
  3. #include <sys/stat.h>
  4. #include <fcntl.h>
  5. #include <libelf.h>
  6. #include <gelf.h>
  7. #include <errno.h>
  8. #include <unistd.h>
  9. #include <string.h>
  10. #include <stdbool.h>
  11. #include <stdlib.h>
  12. #include <linux/bpf.h>
  13. #include <linux/filter.h>
  14. #include <linux/perf_event.h>
  15. #include <linux/netlink.h>
  16. #include <linux/rtnetlink.h>
  17. #include <sys/types.h>
  18. #include <sys/socket.h>
  19. #include <sys/syscall.h>
  20. #include <sys/ioctl.h>
  21. #include <sys/mman.h>
  22. #include <poll.h>
  23. #include <ctype.h>
  24. #include "libbpf.h"
  25. #include "bpf_load.h"
  26. #include "perf-sys.h"
  27. #define DEBUGFS "/sys/kernel/debug/tracing/"
  28. static char license[128];
  29. static int kern_version;
  30. static bool processed_sec[128];
  31. char bpf_log_buf[BPF_LOG_BUF_SIZE];
  32. int map_fd[MAX_MAPS];
  33. int prog_fd[MAX_PROGS];
  34. int event_fd[MAX_PROGS];
  35. int prog_cnt;
  36. int prog_array_fd = -1;
  37. struct bpf_map_def {
  38. unsigned int type;
  39. unsigned int key_size;
  40. unsigned int value_size;
  41. unsigned int max_entries;
  42. unsigned int map_flags;
  43. };
  44. static int populate_prog_array(const char *event, int prog_fd)
  45. {
  46. int ind = atoi(event), err;
  47. err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY);
  48. if (err < 0) {
  49. printf("failed to store prog_fd in prog_array\n");
  50. return -1;
  51. }
  52. return 0;
  53. }
  54. static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
  55. {
  56. bool is_socket = strncmp(event, "socket", 6) == 0;
  57. bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
  58. bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
  59. bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
  60. bool is_xdp = strncmp(event, "xdp", 3) == 0;
  61. bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
  62. bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
  63. bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
  64. size_t insns_cnt = size / sizeof(struct bpf_insn);
  65. enum bpf_prog_type prog_type;
  66. char buf[256];
  67. int fd, efd, err, id;
  68. struct perf_event_attr attr = {};
  69. attr.type = PERF_TYPE_TRACEPOINT;
  70. attr.sample_type = PERF_SAMPLE_RAW;
  71. attr.sample_period = 1;
  72. attr.wakeup_events = 1;
  73. if (is_socket) {
  74. prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
  75. } else if (is_kprobe || is_kretprobe) {
  76. prog_type = BPF_PROG_TYPE_KPROBE;
  77. } else if (is_tracepoint) {
  78. prog_type = BPF_PROG_TYPE_TRACEPOINT;
  79. } else if (is_xdp) {
  80. prog_type = BPF_PROG_TYPE_XDP;
  81. } else if (is_perf_event) {
  82. prog_type = BPF_PROG_TYPE_PERF_EVENT;
  83. } else if (is_cgroup_skb) {
  84. prog_type = BPF_PROG_TYPE_CGROUP_SKB;
  85. } else if (is_cgroup_sk) {
  86. prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
  87. } else {
  88. printf("Unknown event '%s'\n", event);
  89. return -1;
  90. }
  91. fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
  92. bpf_log_buf, BPF_LOG_BUF_SIZE);
  93. if (fd < 0) {
  94. printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf);
  95. return -1;
  96. }
  97. prog_fd[prog_cnt++] = fd;
  98. if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
  99. return 0;
  100. if (is_socket) {
  101. event += 6;
  102. if (*event != '/')
  103. return 0;
  104. event++;
  105. if (!isdigit(*event)) {
  106. printf("invalid prog number\n");
  107. return -1;
  108. }
  109. return populate_prog_array(event, fd);
  110. }
  111. if (is_kprobe || is_kretprobe) {
  112. if (is_kprobe)
  113. event += 7;
  114. else
  115. event += 10;
  116. if (*event == 0) {
  117. printf("event name cannot be empty\n");
  118. return -1;
  119. }
  120. if (isdigit(*event))
  121. return populate_prog_array(event, fd);
  122. snprintf(buf, sizeof(buf),
  123. "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
  124. is_kprobe ? 'p' : 'r', event, event);
  125. err = system(buf);
  126. if (err < 0) {
  127. printf("failed to create kprobe '%s' error '%s'\n",
  128. event, strerror(errno));
  129. return -1;
  130. }
  131. strcpy(buf, DEBUGFS);
  132. strcat(buf, "events/kprobes/");
  133. strcat(buf, event);
  134. strcat(buf, "/id");
  135. } else if (is_tracepoint) {
  136. event += 11;
  137. if (*event == 0) {
  138. printf("event name cannot be empty\n");
  139. return -1;
  140. }
  141. strcpy(buf, DEBUGFS);
  142. strcat(buf, "events/");
  143. strcat(buf, event);
  144. strcat(buf, "/id");
  145. }
  146. efd = open(buf, O_RDONLY, 0);
  147. if (efd < 0) {
  148. printf("failed to open event %s\n", event);
  149. return -1;
  150. }
  151. err = read(efd, buf, sizeof(buf));
  152. if (err < 0 || err >= sizeof(buf)) {
  153. printf("read from '%s' failed '%s'\n", event, strerror(errno));
  154. return -1;
  155. }
  156. close(efd);
  157. buf[err] = 0;
  158. id = atoi(buf);
  159. attr.config = id;
  160. efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
  161. if (efd < 0) {
  162. printf("event %d fd %d err %s\n", id, efd, strerror(errno));
  163. return -1;
  164. }
  165. event_fd[prog_cnt - 1] = efd;
  166. ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
  167. ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
  168. return 0;
  169. }
  170. static int load_maps(struct bpf_map_def *maps, int len)
  171. {
  172. int i;
  173. for (i = 0; i < len / sizeof(struct bpf_map_def); i++) {
  174. map_fd[i] = bpf_create_map(maps[i].type,
  175. maps[i].key_size,
  176. maps[i].value_size,
  177. maps[i].max_entries,
  178. maps[i].map_flags);
  179. if (map_fd[i] < 0) {
  180. printf("failed to create a map: %d %s\n",
  181. errno, strerror(errno));
  182. return 1;
  183. }
  184. if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
  185. prog_array_fd = map_fd[i];
  186. }
  187. return 0;
  188. }
  189. static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
  190. GElf_Shdr *shdr, Elf_Data **data)
  191. {
  192. Elf_Scn *scn;
  193. scn = elf_getscn(elf, i);
  194. if (!scn)
  195. return 1;
  196. if (gelf_getshdr(scn, shdr) != shdr)
  197. return 2;
  198. *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name);
  199. if (!*shname || !shdr->sh_size)
  200. return 3;
  201. *data = elf_getdata(scn, 0);
  202. if (!*data || elf_getdata(scn, *data) != NULL)
  203. return 4;
  204. return 0;
  205. }
  206. static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
  207. GElf_Shdr *shdr, struct bpf_insn *insn)
  208. {
  209. int i, nrels;
  210. nrels = shdr->sh_size / shdr->sh_entsize;
  211. for (i = 0; i < nrels; i++) {
  212. GElf_Sym sym;
  213. GElf_Rel rel;
  214. unsigned int insn_idx;
  215. gelf_getrel(data, i, &rel);
  216. insn_idx = rel.r_offset / sizeof(struct bpf_insn);
  217. gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym);
  218. if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
  219. printf("invalid relo for insn[%d].code 0x%x\n",
  220. insn_idx, insn[insn_idx].code);
  221. return 1;
  222. }
  223. insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
  224. insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)];
  225. }
  226. return 0;
  227. }
  228. int load_bpf_file(char *path)
  229. {
  230. int fd, i;
  231. Elf *elf;
  232. GElf_Ehdr ehdr;
  233. GElf_Shdr shdr, shdr_prog;
  234. Elf_Data *data, *data_prog, *symbols = NULL;
  235. char *shname, *shname_prog;
  236. if (elf_version(EV_CURRENT) == EV_NONE)
  237. return 1;
  238. fd = open(path, O_RDONLY, 0);
  239. if (fd < 0)
  240. return 1;
  241. elf = elf_begin(fd, ELF_C_READ, NULL);
  242. if (!elf)
  243. return 1;
  244. if (gelf_getehdr(elf, &ehdr) != &ehdr)
  245. return 1;
  246. /* clear all kprobes */
  247. i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
  248. /* scan over all elf sections to get license and map info */
  249. for (i = 1; i < ehdr.e_shnum; i++) {
  250. if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
  251. continue;
  252. if (0) /* helpful for llvm debugging */
  253. printf("section %d:%s data %p size %zd link %d flags %d\n",
  254. i, shname, data->d_buf, data->d_size,
  255. shdr.sh_link, (int) shdr.sh_flags);
  256. if (strcmp(shname, "license") == 0) {
  257. processed_sec[i] = true;
  258. memcpy(license, data->d_buf, data->d_size);
  259. } else if (strcmp(shname, "version") == 0) {
  260. processed_sec[i] = true;
  261. if (data->d_size != sizeof(int)) {
  262. printf("invalid size of version section %zd\n",
  263. data->d_size);
  264. return 1;
  265. }
  266. memcpy(&kern_version, data->d_buf, sizeof(int));
  267. } else if (strcmp(shname, "maps") == 0) {
  268. processed_sec[i] = true;
  269. if (load_maps(data->d_buf, data->d_size))
  270. return 1;
  271. } else if (shdr.sh_type == SHT_SYMTAB) {
  272. symbols = data;
  273. }
  274. }
  275. /* load programs that need map fixup (relocations) */
  276. for (i = 1; i < ehdr.e_shnum; i++) {
  277. if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
  278. continue;
  279. if (shdr.sh_type == SHT_REL) {
  280. struct bpf_insn *insns;
  281. if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
  282. &shdr_prog, &data_prog))
  283. continue;
  284. if (shdr_prog.sh_type != SHT_PROGBITS ||
  285. !(shdr_prog.sh_flags & SHF_EXECINSTR))
  286. continue;
  287. insns = (struct bpf_insn *) data_prog->d_buf;
  288. processed_sec[shdr.sh_info] = true;
  289. processed_sec[i] = true;
  290. if (parse_relo_and_apply(data, symbols, &shdr, insns))
  291. continue;
  292. if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
  293. memcmp(shname_prog, "kretprobe/", 10) == 0 ||
  294. memcmp(shname_prog, "tracepoint/", 11) == 0 ||
  295. memcmp(shname_prog, "xdp", 3) == 0 ||
  296. memcmp(shname_prog, "perf_event", 10) == 0 ||
  297. memcmp(shname_prog, "socket", 6) == 0 ||
  298. memcmp(shname_prog, "cgroup/", 7) == 0)
  299. load_and_attach(shname_prog, insns, data_prog->d_size);
  300. }
  301. }
  302. /* load programs that don't use maps */
  303. for (i = 1; i < ehdr.e_shnum; i++) {
  304. if (processed_sec[i])
  305. continue;
  306. if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
  307. continue;
  308. if (memcmp(shname, "kprobe/", 7) == 0 ||
  309. memcmp(shname, "kretprobe/", 10) == 0 ||
  310. memcmp(shname, "tracepoint/", 11) == 0 ||
  311. memcmp(shname, "xdp", 3) == 0 ||
  312. memcmp(shname, "perf_event", 10) == 0 ||
  313. memcmp(shname, "socket", 6) == 0 ||
  314. memcmp(shname, "cgroup/", 7) == 0)
  315. load_and_attach(shname, data->d_buf, data->d_size);
  316. }
  317. close(fd);
  318. return 0;
  319. }
  320. void read_trace_pipe(void)
  321. {
  322. int trace_fd;
  323. trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
  324. if (trace_fd < 0)
  325. return;
  326. while (1) {
  327. static char buf[4096];
  328. ssize_t sz;
  329. sz = read(trace_fd, buf, sizeof(buf));
  330. if (sz > 0) {
  331. buf[sz] = 0;
  332. puts(buf);
  333. }
  334. }
  335. }
  336. #define MAX_SYMS 300000
  337. static struct ksym syms[MAX_SYMS];
  338. static int sym_cnt;
  339. static int ksym_cmp(const void *p1, const void *p2)
  340. {
  341. return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
  342. }
  343. int load_kallsyms(void)
  344. {
  345. FILE *f = fopen("/proc/kallsyms", "r");
  346. char func[256], buf[256];
  347. char symbol;
  348. void *addr;
  349. int i = 0;
  350. if (!f)
  351. return -ENOENT;
  352. while (!feof(f)) {
  353. if (!fgets(buf, sizeof(buf), f))
  354. break;
  355. if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
  356. break;
  357. if (!addr)
  358. continue;
  359. syms[i].addr = (long) addr;
  360. syms[i].name = strdup(func);
  361. i++;
  362. }
  363. sym_cnt = i;
  364. qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
  365. return 0;
  366. }
  367. struct ksym *ksym_search(long key)
  368. {
  369. int start = 0, end = sym_cnt;
  370. int result;
  371. while (start < end) {
  372. size_t mid = start + (end - start) / 2;
  373. result = key - syms[mid].addr;
  374. if (result < 0)
  375. end = mid;
  376. else if (result > 0)
  377. start = mid + 1;
  378. else
  379. return &syms[mid];
  380. }
  381. if (start >= 1 && syms[start - 1].addr < key &&
  382. key < syms[start].addr)
  383. /* valid ksym */
  384. return &syms[start - 1];
  385. /* out of range. return _stext */
  386. return &syms[0];
  387. }
  388. int set_link_xdp_fd(int ifindex, int fd)
  389. {
  390. struct sockaddr_nl sa;
  391. int sock, seq = 0, len, ret = -1;
  392. char buf[4096];
  393. struct nlattr *nla, *nla_xdp;
  394. struct {
  395. struct nlmsghdr nh;
  396. struct ifinfomsg ifinfo;
  397. char attrbuf[64];
  398. } req;
  399. struct nlmsghdr *nh;
  400. struct nlmsgerr *err;
  401. memset(&sa, 0, sizeof(sa));
  402. sa.nl_family = AF_NETLINK;
  403. sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  404. if (sock < 0) {
  405. printf("open netlink socket: %s\n", strerror(errno));
  406. return -1;
  407. }
  408. if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
  409. printf("bind to netlink: %s\n", strerror(errno));
  410. goto cleanup;
  411. }
  412. memset(&req, 0, sizeof(req));
  413. req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
  414. req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
  415. req.nh.nlmsg_type = RTM_SETLINK;
  416. req.nh.nlmsg_pid = 0;
  417. req.nh.nlmsg_seq = ++seq;
  418. req.ifinfo.ifi_family = AF_UNSPEC;
  419. req.ifinfo.ifi_index = ifindex;
  420. nla = (struct nlattr *)(((char *)&req)
  421. + NLMSG_ALIGN(req.nh.nlmsg_len));
  422. nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
  423. nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
  424. nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
  425. nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
  426. memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
  427. nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
  428. req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
  429. if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
  430. printf("send to netlink: %s\n", strerror(errno));
  431. goto cleanup;
  432. }
  433. len = recv(sock, buf, sizeof(buf), 0);
  434. if (len < 0) {
  435. printf("recv from netlink: %s\n", strerror(errno));
  436. goto cleanup;
  437. }
  438. for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
  439. nh = NLMSG_NEXT(nh, len)) {
  440. if (nh->nlmsg_pid != getpid()) {
  441. printf("Wrong pid %d, expected %d\n",
  442. nh->nlmsg_pid, getpid());
  443. goto cleanup;
  444. }
  445. if (nh->nlmsg_seq != seq) {
  446. printf("Wrong seq %d, expected %d\n",
  447. nh->nlmsg_seq, seq);
  448. goto cleanup;
  449. }
  450. switch (nh->nlmsg_type) {
  451. case NLMSG_ERROR:
  452. err = (struct nlmsgerr *)NLMSG_DATA(nh);
  453. if (!err->error)
  454. continue;
  455. printf("nlmsg error %s\n", strerror(-err->error));
  456. goto cleanup;
  457. case NLMSG_DONE:
  458. break;
  459. }
  460. }
  461. ret = 0;
  462. cleanup:
  463. close(sock);
  464. return ret;
  465. }