reuseport_bpf.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. /*
  2. * Test functionality of BPF filters for SO_REUSEPORT. The tests below will use
  3. * a BPF program (both classic and extended) to read the first word from an
  4. * incoming packet (expected to be in network byte-order), calculate a modulus
  5. * of that number, and then dispatch the packet to the Nth socket using the
  6. * result. These tests are run for each supported address family and protocol.
  7. * Additionally, a few edge cases in the implementation are tested.
  8. */
  9. #include <errno.h>
  10. #include <error.h>
  11. #include <linux/bpf.h>
  12. #include <linux/filter.h>
  13. #include <linux/unistd.h>
  14. #include <netinet/in.h>
  15. #include <stdio.h>
  16. #include <stdlib.h>
  17. #include <string.h>
  18. #include <sys/epoll.h>
  19. #include <sys/types.h>
  20. #include <sys/socket.h>
  21. #include <unistd.h>
  22. #ifndef ARRAY_SIZE
  23. #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
  24. #endif
  25. struct test_params {
  26. int recv_family;
  27. int send_family;
  28. int protocol;
  29. size_t recv_socks;
  30. uint16_t recv_port;
  31. uint16_t send_port_min;
  32. };
  33. static size_t sockaddr_size(void)
  34. {
  35. return sizeof(struct sockaddr_storage);
  36. }
  37. static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
  38. {
  39. struct sockaddr_storage *addr;
  40. struct sockaddr_in *addr4;
  41. struct sockaddr_in6 *addr6;
  42. addr = malloc(sizeof(struct sockaddr_storage));
  43. memset(addr, 0, sizeof(struct sockaddr_storage));
  44. switch (family) {
  45. case AF_INET:
  46. addr4 = (struct sockaddr_in *)addr;
  47. addr4->sin_family = AF_INET;
  48. addr4->sin_addr.s_addr = htonl(INADDR_ANY);
  49. addr4->sin_port = htons(port);
  50. break;
  51. case AF_INET6:
  52. addr6 = (struct sockaddr_in6 *)addr;
  53. addr6->sin6_family = AF_INET6;
  54. addr6->sin6_addr = in6addr_any;
  55. addr6->sin6_port = htons(port);
  56. break;
  57. default:
  58. error(1, 0, "Unsupported family %d", family);
  59. }
  60. return (struct sockaddr *)addr;
  61. }
  62. static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
  63. {
  64. struct sockaddr *addr = new_any_sockaddr(family, port);
  65. struct sockaddr_in *addr4;
  66. struct sockaddr_in6 *addr6;
  67. switch (family) {
  68. case AF_INET:
  69. addr4 = (struct sockaddr_in *)addr;
  70. addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
  71. break;
  72. case AF_INET6:
  73. addr6 = (struct sockaddr_in6 *)addr;
  74. addr6->sin6_addr = in6addr_loopback;
  75. break;
  76. default:
  77. error(1, 0, "Unsupported family %d", family);
  78. }
  79. return addr;
  80. }
  81. static void attach_ebpf(int fd, uint16_t mod)
  82. {
  83. static char bpf_log_buf[65536];
  84. static const char bpf_license[] = "GPL";
  85. int bpf_fd;
  86. const struct bpf_insn prog[] = {
  87. /* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
  88. { BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 },
  89. /* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
  90. { BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 },
  91. /* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
  92. { BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod },
  93. /* BPF_EXIT_INSN() */
  94. { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
  95. };
  96. union bpf_attr attr;
  97. memset(&attr, 0, sizeof(attr));
  98. attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
  99. attr.insn_cnt = ARRAY_SIZE(prog);
  100. attr.insns = (uint64_t)prog;
  101. attr.license = (uint64_t)bpf_license;
  102. attr.log_buf = (uint64_t)bpf_log_buf;
  103. attr.log_size = sizeof(bpf_log_buf);
  104. attr.log_level = 1;
  105. attr.kern_version = 0;
  106. bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
  107. if (bpf_fd < 0)
  108. error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
  109. if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
  110. sizeof(bpf_fd)))
  111. error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
  112. close(bpf_fd);
  113. }
  114. static void attach_cbpf(int fd, uint16_t mod)
  115. {
  116. struct sock_filter code[] = {
  117. /* A = (uint32_t)skb[0] */
  118. { BPF_LD | BPF_W | BPF_ABS, 0, 0, 0 },
  119. /* A = A % mod */
  120. { BPF_ALU | BPF_MOD, 0, 0, mod },
  121. /* return A */
  122. { BPF_RET | BPF_A, 0, 0, 0 },
  123. };
  124. struct sock_fprog p = {
  125. .len = ARRAY_SIZE(code),
  126. .filter = code,
  127. };
  128. if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
  129. error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
  130. }
  131. static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
  132. void (*attach_bpf)(int, uint16_t))
  133. {
  134. struct sockaddr * const addr =
  135. new_any_sockaddr(p.recv_family, p.recv_port);
  136. int i, opt;
  137. for (i = 0; i < p.recv_socks; ++i) {
  138. fd[i] = socket(p.recv_family, p.protocol, 0);
  139. if (fd[i] < 0)
  140. error(1, errno, "failed to create recv %d", i);
  141. opt = 1;
  142. if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
  143. sizeof(opt)))
  144. error(1, errno, "failed to set SO_REUSEPORT on %d", i);
  145. if (i == 0)
  146. attach_bpf(fd[i], mod);
  147. if (bind(fd[i], addr, sockaddr_size()))
  148. error(1, errno, "failed to bind recv socket %d", i);
  149. if (p.protocol == SOCK_STREAM)
  150. if (listen(fd[i], p.recv_socks * 10))
  151. error(1, errno, "failed to listen on socket");
  152. }
  153. free(addr);
  154. }
  155. static void send_from(struct test_params p, uint16_t sport, char *buf,
  156. size_t len)
  157. {
  158. struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
  159. struct sockaddr * const daddr =
  160. new_loopback_sockaddr(p.send_family, p.recv_port);
  161. const int fd = socket(p.send_family, p.protocol, 0);
  162. if (fd < 0)
  163. error(1, errno, "failed to create send socket");
  164. if (bind(fd, saddr, sockaddr_size()))
  165. error(1, errno, "failed to bind send socket");
  166. if (connect(fd, daddr, sockaddr_size()))
  167. error(1, errno, "failed to connect");
  168. if (send(fd, buf, len, 0) < 0)
  169. error(1, errno, "failed to send message");
  170. close(fd);
  171. free(saddr);
  172. free(daddr);
  173. }
  174. static void test_recv_order(const struct test_params p, int fd[], int mod)
  175. {
  176. char recv_buf[8], send_buf[8];
  177. struct msghdr msg;
  178. struct iovec recv_io = { recv_buf, 8 };
  179. struct epoll_event ev;
  180. int epfd, conn, i, sport, expected;
  181. uint32_t data, ndata;
  182. epfd = epoll_create(1);
  183. if (epfd < 0)
  184. error(1, errno, "failed to create epoll");
  185. for (i = 0; i < p.recv_socks; ++i) {
  186. ev.events = EPOLLIN;
  187. ev.data.fd = fd[i];
  188. if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
  189. error(1, errno, "failed to register sock %d epoll", i);
  190. }
  191. memset(&msg, 0, sizeof(msg));
  192. msg.msg_iov = &recv_io;
  193. msg.msg_iovlen = 1;
  194. for (data = 0; data < p.recv_socks * 2; ++data) {
  195. sport = p.send_port_min + data;
  196. ndata = htonl(data);
  197. memcpy(send_buf, &ndata, sizeof(ndata));
  198. send_from(p, sport, send_buf, sizeof(ndata));
  199. i = epoll_wait(epfd, &ev, 1, -1);
  200. if (i < 0)
  201. error(1, errno, "epoll wait failed");
  202. if (p.protocol == SOCK_STREAM) {
  203. conn = accept(ev.data.fd, NULL, NULL);
  204. if (conn < 0)
  205. error(1, errno, "error accepting");
  206. i = recvmsg(conn, &msg, 0);
  207. close(conn);
  208. } else {
  209. i = recvmsg(ev.data.fd, &msg, 0);
  210. }
  211. if (i < 0)
  212. error(1, errno, "recvmsg error");
  213. if (i != sizeof(ndata))
  214. error(1, 0, "expected size %zd got %d",
  215. sizeof(ndata), i);
  216. for (i = 0; i < p.recv_socks; ++i)
  217. if (ev.data.fd == fd[i])
  218. break;
  219. memcpy(&ndata, recv_buf, sizeof(ndata));
  220. fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));
  221. expected = (sport % mod);
  222. if (i != expected)
  223. error(1, 0, "expected socket %d", expected);
  224. }
  225. }
  226. static void test_reuseport_ebpf(const struct test_params p)
  227. {
  228. int i, fd[p.recv_socks];
  229. fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks);
  230. build_recv_group(p, fd, p.recv_socks, attach_ebpf);
  231. test_recv_order(p, fd, p.recv_socks);
  232. fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
  233. attach_ebpf(fd[0], p.recv_socks / 2);
  234. test_recv_order(p, fd, p.recv_socks / 2);
  235. for (i = 0; i < p.recv_socks; ++i)
  236. close(fd[i]);
  237. }
  238. static void test_reuseport_cbpf(const struct test_params p)
  239. {
  240. int i, fd[p.recv_socks];
  241. fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
  242. build_recv_group(p, fd, p.recv_socks, attach_cbpf);
  243. test_recv_order(p, fd, p.recv_socks);
  244. fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
  245. attach_cbpf(fd[0], p.recv_socks / 2);
  246. test_recv_order(p, fd, p.recv_socks / 2);
  247. for (i = 0; i < p.recv_socks; ++i)
  248. close(fd[i]);
  249. }
  250. static void test_extra_filter(const struct test_params p)
  251. {
  252. struct sockaddr * const addr =
  253. new_any_sockaddr(p.recv_family, p.recv_port);
  254. int fd1, fd2, opt;
  255. fprintf(stderr, "Testing too many filters...\n");
  256. fd1 = socket(p.recv_family, p.protocol, 0);
  257. if (fd1 < 0)
  258. error(1, errno, "failed to create socket 1");
  259. fd2 = socket(p.recv_family, p.protocol, 0);
  260. if (fd2 < 0)
  261. error(1, errno, "failed to create socket 2");
  262. opt = 1;
  263. if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
  264. error(1, errno, "failed to set SO_REUSEPORT on socket 1");
  265. if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
  266. error(1, errno, "failed to set SO_REUSEPORT on socket 2");
  267. attach_ebpf(fd1, 10);
  268. attach_ebpf(fd2, 10);
  269. if (bind(fd1, addr, sockaddr_size()))
  270. error(1, errno, "failed to bind recv socket 1");
  271. if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
  272. error(1, errno, "bind socket 2 should fail with EADDRINUSE");
  273. free(addr);
  274. }
  275. static void test_filter_no_reuseport(const struct test_params p)
  276. {
  277. struct sockaddr * const addr =
  278. new_any_sockaddr(p.recv_family, p.recv_port);
  279. const char bpf_license[] = "GPL";
  280. struct bpf_insn ecode[] = {
  281. { BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 },
  282. { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
  283. };
  284. struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }};
  285. union bpf_attr eprog;
  286. struct sock_fprog cprog;
  287. int fd, bpf_fd;
  288. fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n");
  289. memset(&eprog, 0, sizeof(eprog));
  290. eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
  291. eprog.insn_cnt = ARRAY_SIZE(ecode);
  292. eprog.insns = (uint64_t)ecode;
  293. eprog.license = (uint64_t)bpf_license;
  294. eprog.kern_version = 0;
  295. memset(&cprog, 0, sizeof(cprog));
  296. cprog.len = ARRAY_SIZE(ccode);
  297. cprog.filter = ccode;
  298. bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog));
  299. if (bpf_fd < 0)
  300. error(1, errno, "ebpf error");
  301. fd = socket(p.recv_family, p.protocol, 0);
  302. if (fd < 0)
  303. error(1, errno, "failed to create socket 1");
  304. if (bind(fd, addr, sockaddr_size()))
  305. error(1, errno, "failed to bind recv socket 1");
  306. errno = 0;
  307. if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
  308. sizeof(bpf_fd)) || errno != EINVAL)
  309. error(1, errno, "setsockopt should have returned EINVAL");
  310. errno = 0;
  311. if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog,
  312. sizeof(cprog)) || errno != EINVAL)
  313. error(1, errno, "setsockopt should have returned EINVAL");
  314. free(addr);
  315. }
  316. static void test_filter_without_bind(void)
  317. {
  318. int fd1, fd2;
  319. fprintf(stderr, "Testing filter add without bind...\n");
  320. fd1 = socket(AF_INET, SOCK_DGRAM, 0);
  321. if (fd1 < 0)
  322. error(1, errno, "failed to create socket 1");
  323. fd2 = socket(AF_INET, SOCK_DGRAM, 0);
  324. if (fd2 < 0)
  325. error(1, errno, "failed to create socket 2");
  326. attach_ebpf(fd1, 10);
  327. attach_cbpf(fd2, 10);
  328. close(fd1);
  329. close(fd2);
  330. }
  331. int main(void)
  332. {
  333. fprintf(stderr, "---- IPv4 UDP ----\n");
  334. /* NOTE: UDP socket lookups traverse a different code path when there
  335. * are > 10 sockets in a group. Run the bpf test through both paths.
  336. */
  337. test_reuseport_ebpf((struct test_params) {
  338. .recv_family = AF_INET,
  339. .send_family = AF_INET,
  340. .protocol = SOCK_DGRAM,
  341. .recv_socks = 10,
  342. .recv_port = 8000,
  343. .send_port_min = 9000});
  344. test_reuseport_ebpf((struct test_params) {
  345. .recv_family = AF_INET,
  346. .send_family = AF_INET,
  347. .protocol = SOCK_DGRAM,
  348. .recv_socks = 20,
  349. .recv_port = 8000,
  350. .send_port_min = 9000});
  351. test_reuseport_cbpf((struct test_params) {
  352. .recv_family = AF_INET,
  353. .send_family = AF_INET,
  354. .protocol = SOCK_DGRAM,
  355. .recv_socks = 10,
  356. .recv_port = 8001,
  357. .send_port_min = 9020});
  358. test_reuseport_cbpf((struct test_params) {
  359. .recv_family = AF_INET,
  360. .send_family = AF_INET,
  361. .protocol = SOCK_DGRAM,
  362. .recv_socks = 20,
  363. .recv_port = 8001,
  364. .send_port_min = 9020});
  365. test_extra_filter((struct test_params) {
  366. .recv_family = AF_INET,
  367. .protocol = SOCK_DGRAM,
  368. .recv_port = 8002});
  369. test_filter_no_reuseport((struct test_params) {
  370. .recv_family = AF_INET,
  371. .protocol = SOCK_DGRAM,
  372. .recv_port = 8008});
  373. fprintf(stderr, "---- IPv6 UDP ----\n");
  374. test_reuseport_ebpf((struct test_params) {
  375. .recv_family = AF_INET6,
  376. .send_family = AF_INET6,
  377. .protocol = SOCK_DGRAM,
  378. .recv_socks = 10,
  379. .recv_port = 8003,
  380. .send_port_min = 9040});
  381. test_reuseport_ebpf((struct test_params) {
  382. .recv_family = AF_INET6,
  383. .send_family = AF_INET6,
  384. .protocol = SOCK_DGRAM,
  385. .recv_socks = 20,
  386. .recv_port = 8003,
  387. .send_port_min = 9040});
  388. test_reuseport_cbpf((struct test_params) {
  389. .recv_family = AF_INET6,
  390. .send_family = AF_INET6,
  391. .protocol = SOCK_DGRAM,
  392. .recv_socks = 10,
  393. .recv_port = 8004,
  394. .send_port_min = 9060});
  395. test_reuseport_cbpf((struct test_params) {
  396. .recv_family = AF_INET6,
  397. .send_family = AF_INET6,
  398. .protocol = SOCK_DGRAM,
  399. .recv_socks = 20,
  400. .recv_port = 8004,
  401. .send_port_min = 9060});
  402. test_extra_filter((struct test_params) {
  403. .recv_family = AF_INET6,
  404. .protocol = SOCK_DGRAM,
  405. .recv_port = 8005});
  406. test_filter_no_reuseport((struct test_params) {
  407. .recv_family = AF_INET6,
  408. .protocol = SOCK_DGRAM,
  409. .recv_port = 8009});
  410. fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n");
  411. test_reuseport_ebpf((struct test_params) {
  412. .recv_family = AF_INET6,
  413. .send_family = AF_INET,
  414. .protocol = SOCK_DGRAM,
  415. .recv_socks = 20,
  416. .recv_port = 8006,
  417. .send_port_min = 9080});
  418. test_reuseport_ebpf((struct test_params) {
  419. .recv_family = AF_INET6,
  420. .send_family = AF_INET,
  421. .protocol = SOCK_DGRAM,
  422. .recv_socks = 10,
  423. .recv_port = 8006,
  424. .send_port_min = 9080});
  425. test_reuseport_cbpf((struct test_params) {
  426. .recv_family = AF_INET6,
  427. .send_family = AF_INET,
  428. .protocol = SOCK_DGRAM,
  429. .recv_socks = 10,
  430. .recv_port = 8007,
  431. .send_port_min = 9100});
  432. test_reuseport_cbpf((struct test_params) {
  433. .recv_family = AF_INET6,
  434. .send_family = AF_INET,
  435. .protocol = SOCK_DGRAM,
  436. .recv_socks = 20,
  437. .recv_port = 8007,
  438. .send_port_min = 9100});
  439. test_filter_without_bind();
  440. fprintf(stderr, "SUCCESS\n");
  441. return 0;
  442. }