bpf_trace.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822
  1. /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
  2. * Copyright (c) 2016 Facebook
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of version 2 of the GNU General Public
  6. * License as published by the Free Software Foundation.
  7. */
  8. #include <linux/kernel.h>
  9. #include <linux/types.h>
  10. #include <linux/slab.h>
  11. #include <linux/bpf.h>
  12. #include <linux/bpf_perf_event.h>
  13. #include <linux/filter.h>
  14. #include <linux/uaccess.h>
  15. #include <linux/ctype.h>
  16. #include "trace.h"
  17. u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
  18. /**
  19. * trace_call_bpf - invoke BPF program
  20. * @call: tracepoint event
  21. * @ctx: opaque context pointer
  22. *
  23. * kprobe handlers execute BPF programs via this helper.
  24. * Can be used from static tracepoints in the future.
  25. *
  26. * Return: BPF programs always return an integer which is interpreted by
  27. * kprobe handler as:
  28. * 0 - return from kprobe (event is filtered out)
  29. * 1 - store kprobe event into ring buffer
  30. * Other values are reserved and currently alias to 1
  31. */
  32. unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
  33. {
  34. unsigned int ret;
  35. if (in_nmi()) /* not supported yet */
  36. return 1;
  37. preempt_disable();
  38. if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
  39. /*
  40. * since some bpf program is already running on this cpu,
  41. * don't call into another bpf program (same or different)
  42. * and don't send kprobe event into ring-buffer,
  43. * so return zero here
  44. */
  45. ret = 0;
  46. goto out;
  47. }
  48. /*
  49. * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
  50. * to all call sites, we did a bpf_prog_array_valid() there to check
  51. * whether call->prog_array is empty or not, which is
  52. * a heurisitc to speed up execution.
  53. *
  54. * If bpf_prog_array_valid() fetched prog_array was
  55. * non-NULL, we go into trace_call_bpf() and do the actual
  56. * proper rcu_dereference() under RCU lock.
  57. * If it turns out that prog_array is NULL then, we bail out.
  58. * For the opposite, if the bpf_prog_array_valid() fetched pointer
  59. * was NULL, you'll skip the prog_array with the risk of missing
  60. * out of events when it was updated in between this and the
  61. * rcu_dereference() which is accepted risk.
  62. */
  63. ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN);
  64. out:
  65. __this_cpu_dec(bpf_prog_active);
  66. preempt_enable();
  67. return ret;
  68. }
  69. EXPORT_SYMBOL_GPL(trace_call_bpf);
  70. BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
  71. {
  72. int ret;
  73. ret = probe_kernel_read(dst, unsafe_ptr, size);
  74. if (unlikely(ret < 0))
  75. memset(dst, 0, size);
  76. return ret;
  77. }
  78. static const struct bpf_func_proto bpf_probe_read_proto = {
  79. .func = bpf_probe_read,
  80. .gpl_only = true,
  81. .ret_type = RET_INTEGER,
  82. .arg1_type = ARG_PTR_TO_UNINIT_MEM,
  83. .arg2_type = ARG_CONST_SIZE_OR_ZERO,
  84. .arg3_type = ARG_ANYTHING,
  85. };
  86. BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
  87. u32, size)
  88. {
  89. /*
  90. * Ensure we're in user context which is safe for the helper to
  91. * run. This helper has no business in a kthread.
  92. *
  93. * access_ok() should prevent writing to non-user memory, but in
  94. * some situations (nommu, temporary switch, etc) access_ok() does
  95. * not provide enough validation, hence the check on KERNEL_DS.
  96. */
  97. if (unlikely(in_interrupt() ||
  98. current->flags & (PF_KTHREAD | PF_EXITING)))
  99. return -EPERM;
  100. if (unlikely(uaccess_kernel()))
  101. return -EPERM;
  102. if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
  103. return -EPERM;
  104. return probe_kernel_write(unsafe_ptr, src, size);
  105. }
  106. static const struct bpf_func_proto bpf_probe_write_user_proto = {
  107. .func = bpf_probe_write_user,
  108. .gpl_only = true,
  109. .ret_type = RET_INTEGER,
  110. .arg1_type = ARG_ANYTHING,
  111. .arg2_type = ARG_PTR_TO_MEM,
  112. .arg3_type = ARG_CONST_SIZE,
  113. };
  114. static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
  115. {
  116. pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
  117. current->comm, task_pid_nr(current));
  118. return &bpf_probe_write_user_proto;
  119. }
  120. /*
  121. * Only limited trace_printk() conversion specifiers allowed:
  122. * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
  123. */
  124. BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
  125. u64, arg2, u64, arg3)
  126. {
  127. bool str_seen = false;
  128. int mod[3] = {};
  129. int fmt_cnt = 0;
  130. u64 unsafe_addr;
  131. char buf[64];
  132. int i;
  133. /*
  134. * bpf_check()->check_func_arg()->check_stack_boundary()
  135. * guarantees that fmt points to bpf program stack,
  136. * fmt_size bytes of it were initialized and fmt_size > 0
  137. */
  138. if (fmt[--fmt_size] != 0)
  139. return -EINVAL;
  140. /* check format string for allowed specifiers */
  141. for (i = 0; i < fmt_size; i++) {
  142. if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
  143. return -EINVAL;
  144. if (fmt[i] != '%')
  145. continue;
  146. if (fmt_cnt >= 3)
  147. return -EINVAL;
  148. /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
  149. i++;
  150. if (fmt[i] == 'l') {
  151. mod[fmt_cnt]++;
  152. i++;
  153. } else if (fmt[i] == 'p' || fmt[i] == 's') {
  154. mod[fmt_cnt]++;
  155. i++;
  156. if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0)
  157. return -EINVAL;
  158. fmt_cnt++;
  159. if (fmt[i - 1] == 's') {
  160. if (str_seen)
  161. /* allow only one '%s' per fmt string */
  162. return -EINVAL;
  163. str_seen = true;
  164. switch (fmt_cnt) {
  165. case 1:
  166. unsafe_addr = arg1;
  167. arg1 = (long) buf;
  168. break;
  169. case 2:
  170. unsafe_addr = arg2;
  171. arg2 = (long) buf;
  172. break;
  173. case 3:
  174. unsafe_addr = arg3;
  175. arg3 = (long) buf;
  176. break;
  177. }
  178. buf[0] = 0;
  179. strncpy_from_unsafe(buf,
  180. (void *) (long) unsafe_addr,
  181. sizeof(buf));
  182. }
  183. continue;
  184. }
  185. if (fmt[i] == 'l') {
  186. mod[fmt_cnt]++;
  187. i++;
  188. }
  189. if (fmt[i] != 'i' && fmt[i] != 'd' &&
  190. fmt[i] != 'u' && fmt[i] != 'x')
  191. return -EINVAL;
  192. fmt_cnt++;
  193. }
  194. /* Horrid workaround for getting va_list handling working with different
  195. * argument type combinations generically for 32 and 64 bit archs.
  196. */
  197. #define __BPF_TP_EMIT() __BPF_ARG3_TP()
  198. #define __BPF_TP(...) \
  199. __trace_printk(1 /* Fake ip will not be printed. */, \
  200. fmt, ##__VA_ARGS__)
  201. #define __BPF_ARG1_TP(...) \
  202. ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \
  203. ? __BPF_TP(arg1, ##__VA_ARGS__) \
  204. : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \
  205. ? __BPF_TP((long)arg1, ##__VA_ARGS__) \
  206. : __BPF_TP((u32)arg1, ##__VA_ARGS__)))
  207. #define __BPF_ARG2_TP(...) \
  208. ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \
  209. ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \
  210. : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \
  211. ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \
  212. : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__)))
  213. #define __BPF_ARG3_TP(...) \
  214. ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \
  215. ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \
  216. : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \
  217. ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \
  218. : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__)))
  219. return __BPF_TP_EMIT();
  220. }
  221. static const struct bpf_func_proto bpf_trace_printk_proto = {
  222. .func = bpf_trace_printk,
  223. .gpl_only = true,
  224. .ret_type = RET_INTEGER,
  225. .arg1_type = ARG_PTR_TO_MEM,
  226. .arg2_type = ARG_CONST_SIZE,
  227. };
  228. const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
  229. {
  230. /*
  231. * this program might be calling bpf_trace_printk,
  232. * so allocate per-cpu printk buffers
  233. */
  234. trace_printk_init_buffers();
  235. return &bpf_trace_printk_proto;
  236. }
  237. static __always_inline int
  238. get_map_perf_counter(struct bpf_map *map, u64 flags,
  239. u64 *value, u64 *enabled, u64 *running)
  240. {
  241. struct bpf_array *array = container_of(map, struct bpf_array, map);
  242. unsigned int cpu = smp_processor_id();
  243. u64 index = flags & BPF_F_INDEX_MASK;
  244. struct bpf_event_entry *ee;
  245. if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
  246. return -EINVAL;
  247. if (index == BPF_F_CURRENT_CPU)
  248. index = cpu;
  249. if (unlikely(index >= array->map.max_entries))
  250. return -E2BIG;
  251. ee = READ_ONCE(array->ptrs[index]);
  252. if (!ee)
  253. return -ENOENT;
  254. return perf_event_read_local(ee->event, value, enabled, running);
  255. }
  256. BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
  257. {
  258. u64 value = 0;
  259. int err;
  260. err = get_map_perf_counter(map, flags, &value, NULL, NULL);
  261. /*
  262. * this api is ugly since we miss [-22..-2] range of valid
  263. * counter values, but that's uapi
  264. */
  265. if (err)
  266. return err;
  267. return value;
  268. }
  269. static const struct bpf_func_proto bpf_perf_event_read_proto = {
  270. .func = bpf_perf_event_read,
  271. .gpl_only = true,
  272. .ret_type = RET_INTEGER,
  273. .arg1_type = ARG_CONST_MAP_PTR,
  274. .arg2_type = ARG_ANYTHING,
  275. };
  276. BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags,
  277. struct bpf_perf_event_value *, buf, u32, size)
  278. {
  279. int err = -EINVAL;
  280. if (unlikely(size != sizeof(struct bpf_perf_event_value)))
  281. goto clear;
  282. err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled,
  283. &buf->running);
  284. if (unlikely(err))
  285. goto clear;
  286. return 0;
  287. clear:
  288. memset(buf, 0, size);
  289. return err;
  290. }
  291. static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
  292. .func = bpf_perf_event_read_value,
  293. .gpl_only = true,
  294. .ret_type = RET_INTEGER,
  295. .arg1_type = ARG_CONST_MAP_PTR,
  296. .arg2_type = ARG_ANYTHING,
  297. .arg3_type = ARG_PTR_TO_UNINIT_MEM,
  298. .arg4_type = ARG_CONST_SIZE,
  299. };
  300. static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd);
  301. static __always_inline u64
  302. __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
  303. u64 flags, struct perf_raw_record *raw)
  304. {
  305. struct bpf_array *array = container_of(map, struct bpf_array, map);
  306. struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd);
  307. unsigned int cpu = smp_processor_id();
  308. u64 index = flags & BPF_F_INDEX_MASK;
  309. struct bpf_event_entry *ee;
  310. struct perf_event *event;
  311. if (index == BPF_F_CURRENT_CPU)
  312. index = cpu;
  313. if (unlikely(index >= array->map.max_entries))
  314. return -E2BIG;
  315. ee = READ_ONCE(array->ptrs[index]);
  316. if (!ee)
  317. return -ENOENT;
  318. event = ee->event;
  319. if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
  320. event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
  321. return -EINVAL;
  322. if (unlikely(event->oncpu != cpu))
  323. return -EOPNOTSUPP;
  324. perf_sample_data_init(sd, 0, 0);
  325. sd->raw = raw;
  326. perf_event_output(event, sd, regs);
  327. return 0;
  328. }
  329. BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
  330. u64, flags, void *, data, u64, size)
  331. {
  332. struct perf_raw_record raw = {
  333. .frag = {
  334. .size = size,
  335. .data = data,
  336. },
  337. };
  338. if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
  339. return -EINVAL;
  340. return __bpf_perf_event_output(regs, map, flags, &raw);
  341. }
  342. static const struct bpf_func_proto bpf_perf_event_output_proto = {
  343. .func = bpf_perf_event_output,
  344. .gpl_only = true,
  345. .ret_type = RET_INTEGER,
  346. .arg1_type = ARG_PTR_TO_CTX,
  347. .arg2_type = ARG_CONST_MAP_PTR,
  348. .arg3_type = ARG_ANYTHING,
  349. .arg4_type = ARG_PTR_TO_MEM,
  350. .arg5_type = ARG_CONST_SIZE_OR_ZERO,
  351. };
  352. static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
  353. u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
  354. void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
  355. {
  356. struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
  357. struct perf_raw_frag frag = {
  358. .copy = ctx_copy,
  359. .size = ctx_size,
  360. .data = ctx,
  361. };
  362. struct perf_raw_record raw = {
  363. .frag = {
  364. {
  365. .next = ctx_size ? &frag : NULL,
  366. },
  367. .size = meta_size,
  368. .data = meta,
  369. },
  370. };
  371. perf_fetch_caller_regs(regs);
  372. return __bpf_perf_event_output(regs, map, flags, &raw);
  373. }
  374. BPF_CALL_0(bpf_get_current_task)
  375. {
  376. return (long) current;
  377. }
  378. static const struct bpf_func_proto bpf_get_current_task_proto = {
  379. .func = bpf_get_current_task,
  380. .gpl_only = true,
  381. .ret_type = RET_INTEGER,
  382. };
  383. BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
  384. {
  385. struct bpf_array *array = container_of(map, struct bpf_array, map);
  386. struct cgroup *cgrp;
  387. if (unlikely(in_interrupt()))
  388. return -EINVAL;
  389. if (unlikely(idx >= array->map.max_entries))
  390. return -E2BIG;
  391. cgrp = READ_ONCE(array->ptrs[idx]);
  392. if (unlikely(!cgrp))
  393. return -EAGAIN;
  394. return task_under_cgroup_hierarchy(current, cgrp);
  395. }
  396. static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
  397. .func = bpf_current_task_under_cgroup,
  398. .gpl_only = false,
  399. .ret_type = RET_INTEGER,
  400. .arg1_type = ARG_CONST_MAP_PTR,
  401. .arg2_type = ARG_ANYTHING,
  402. };
  403. BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
  404. const void *, unsafe_ptr)
  405. {
  406. int ret;
  407. /*
  408. * The strncpy_from_unsafe() call will likely not fill the entire
  409. * buffer, but that's okay in this circumstance as we're probing
  410. * arbitrary memory anyway similar to bpf_probe_read() and might
  411. * as well probe the stack. Thus, memory is explicitly cleared
  412. * only in error case, so that improper users ignoring return
  413. * code altogether don't copy garbage; otherwise length of string
  414. * is returned that can be used for bpf_perf_event_output() et al.
  415. */
  416. ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
  417. if (unlikely(ret < 0))
  418. memset(dst, 0, size);
  419. return ret;
  420. }
  421. static const struct bpf_func_proto bpf_probe_read_str_proto = {
  422. .func = bpf_probe_read_str,
  423. .gpl_only = true,
  424. .ret_type = RET_INTEGER,
  425. .arg1_type = ARG_PTR_TO_UNINIT_MEM,
  426. .arg2_type = ARG_CONST_SIZE_OR_ZERO,
  427. .arg3_type = ARG_ANYTHING,
  428. };
  429. static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
  430. {
  431. switch (func_id) {
  432. case BPF_FUNC_map_lookup_elem:
  433. return &bpf_map_lookup_elem_proto;
  434. case BPF_FUNC_map_update_elem:
  435. return &bpf_map_update_elem_proto;
  436. case BPF_FUNC_map_delete_elem:
  437. return &bpf_map_delete_elem_proto;
  438. case BPF_FUNC_probe_read:
  439. return &bpf_probe_read_proto;
  440. case BPF_FUNC_ktime_get_ns:
  441. return &bpf_ktime_get_ns_proto;
  442. case BPF_FUNC_tail_call:
  443. return &bpf_tail_call_proto;
  444. case BPF_FUNC_get_current_pid_tgid:
  445. return &bpf_get_current_pid_tgid_proto;
  446. case BPF_FUNC_get_current_task:
  447. return &bpf_get_current_task_proto;
  448. case BPF_FUNC_get_current_uid_gid:
  449. return &bpf_get_current_uid_gid_proto;
  450. case BPF_FUNC_get_current_comm:
  451. return &bpf_get_current_comm_proto;
  452. case BPF_FUNC_trace_printk:
  453. return bpf_get_trace_printk_proto();
  454. case BPF_FUNC_get_smp_processor_id:
  455. return &bpf_get_smp_processor_id_proto;
  456. case BPF_FUNC_get_numa_node_id:
  457. return &bpf_get_numa_node_id_proto;
  458. case BPF_FUNC_perf_event_read:
  459. return &bpf_perf_event_read_proto;
  460. case BPF_FUNC_probe_write_user:
  461. return bpf_get_probe_write_proto();
  462. case BPF_FUNC_current_task_under_cgroup:
  463. return &bpf_current_task_under_cgroup_proto;
  464. case BPF_FUNC_get_prandom_u32:
  465. return &bpf_get_prandom_u32_proto;
  466. case BPF_FUNC_probe_read_str:
  467. return &bpf_probe_read_str_proto;
  468. default:
  469. return NULL;
  470. }
  471. }
  472. static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
  473. {
  474. switch (func_id) {
  475. case BPF_FUNC_perf_event_output:
  476. return &bpf_perf_event_output_proto;
  477. case BPF_FUNC_get_stackid:
  478. return &bpf_get_stackid_proto;
  479. case BPF_FUNC_perf_event_read_value:
  480. return &bpf_perf_event_read_value_proto;
  481. default:
  482. return tracing_func_proto(func_id);
  483. }
  484. }
  485. /* bpf+kprobe programs can access fields of 'struct pt_regs' */
  486. static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
  487. struct bpf_insn_access_aux *info)
  488. {
  489. if (off < 0 || off >= sizeof(struct pt_regs))
  490. return false;
  491. if (type != BPF_READ)
  492. return false;
  493. if (off % size != 0)
  494. return false;
  495. /*
  496. * Assertion for 32 bit to make sure last 8 byte access
  497. * (BPF_DW) to the last 4 byte member is disallowed.
  498. */
  499. if (off + size > sizeof(struct pt_regs))
  500. return false;
  501. return true;
  502. }
  503. const struct bpf_verifier_ops kprobe_verifier_ops = {
  504. .get_func_proto = kprobe_prog_func_proto,
  505. .is_valid_access = kprobe_prog_is_valid_access,
  506. };
  507. const struct bpf_prog_ops kprobe_prog_ops = {
  508. };
  509. BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
  510. u64, flags, void *, data, u64, size)
  511. {
  512. struct pt_regs *regs = *(struct pt_regs **)tp_buff;
  513. /*
  514. * r1 points to perf tracepoint buffer where first 8 bytes are hidden
  515. * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
  516. * from there and call the same bpf_perf_event_output() helper inline.
  517. */
  518. return ____bpf_perf_event_output(regs, map, flags, data, size);
  519. }
  520. static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
  521. .func = bpf_perf_event_output_tp,
  522. .gpl_only = true,
  523. .ret_type = RET_INTEGER,
  524. .arg1_type = ARG_PTR_TO_CTX,
  525. .arg2_type = ARG_CONST_MAP_PTR,
  526. .arg3_type = ARG_ANYTHING,
  527. .arg4_type = ARG_PTR_TO_MEM,
  528. .arg5_type = ARG_CONST_SIZE_OR_ZERO,
  529. };
  530. BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
  531. u64, flags)
  532. {
  533. struct pt_regs *regs = *(struct pt_regs **)tp_buff;
  534. /*
  535. * Same comment as in bpf_perf_event_output_tp(), only that this time
  536. * the other helper's function body cannot be inlined due to being
  537. * external, thus we need to call raw helper function.
  538. */
  539. return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
  540. flags, 0, 0);
  541. }
  542. static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
  543. .func = bpf_get_stackid_tp,
  544. .gpl_only = true,
  545. .ret_type = RET_INTEGER,
  546. .arg1_type = ARG_PTR_TO_CTX,
  547. .arg2_type = ARG_CONST_MAP_PTR,
  548. .arg3_type = ARG_ANYTHING,
  549. };
  550. BPF_CALL_3(bpf_perf_prog_read_value_tp, struct bpf_perf_event_data_kern *, ctx,
  551. struct bpf_perf_event_value *, buf, u32, size)
  552. {
  553. int err = -EINVAL;
  554. if (unlikely(size != sizeof(struct bpf_perf_event_value)))
  555. goto clear;
  556. err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled,
  557. &buf->running);
  558. if (unlikely(err))
  559. goto clear;
  560. return 0;
  561. clear:
  562. memset(buf, 0, size);
  563. return err;
  564. }
  565. static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = {
  566. .func = bpf_perf_prog_read_value_tp,
  567. .gpl_only = true,
  568. .ret_type = RET_INTEGER,
  569. .arg1_type = ARG_PTR_TO_CTX,
  570. .arg2_type = ARG_PTR_TO_UNINIT_MEM,
  571. .arg3_type = ARG_CONST_SIZE,
  572. };
  573. static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
  574. {
  575. switch (func_id) {
  576. case BPF_FUNC_perf_event_output:
  577. return &bpf_perf_event_output_proto_tp;
  578. case BPF_FUNC_get_stackid:
  579. return &bpf_get_stackid_proto_tp;
  580. case BPF_FUNC_perf_prog_read_value:
  581. return &bpf_perf_prog_read_value_proto_tp;
  582. default:
  583. return tracing_func_proto(func_id);
  584. }
  585. }
  586. static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
  587. struct bpf_insn_access_aux *info)
  588. {
  589. if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
  590. return false;
  591. if (type != BPF_READ)
  592. return false;
  593. if (off % size != 0)
  594. return false;
  595. BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
  596. return true;
  597. }
  598. const struct bpf_verifier_ops tracepoint_verifier_ops = {
  599. .get_func_proto = tp_prog_func_proto,
  600. .is_valid_access = tp_prog_is_valid_access,
  601. };
  602. const struct bpf_prog_ops tracepoint_prog_ops = {
  603. };
  604. static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
  605. struct bpf_insn_access_aux *info)
  606. {
  607. const int size_sp = FIELD_SIZEOF(struct bpf_perf_event_data,
  608. sample_period);
  609. if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
  610. return false;
  611. if (type != BPF_READ)
  612. return false;
  613. if (off % size != 0)
  614. return false;
  615. switch (off) {
  616. case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
  617. bpf_ctx_record_field_size(info, size_sp);
  618. if (!bpf_ctx_narrow_access_ok(off, size, size_sp))
  619. return false;
  620. break;
  621. default:
  622. if (size != sizeof(long))
  623. return false;
  624. }
  625. return true;
  626. }
  627. static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
  628. const struct bpf_insn *si,
  629. struct bpf_insn *insn_buf,
  630. struct bpf_prog *prog, u32 *target_size)
  631. {
  632. struct bpf_insn *insn = insn_buf;
  633. switch (si->off) {
  634. case offsetof(struct bpf_perf_event_data, sample_period):
  635. *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
  636. data), si->dst_reg, si->src_reg,
  637. offsetof(struct bpf_perf_event_data_kern, data));
  638. *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
  639. bpf_target_off(struct perf_sample_data, period, 8,
  640. target_size));
  641. break;
  642. default:
  643. *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
  644. regs), si->dst_reg, si->src_reg,
  645. offsetof(struct bpf_perf_event_data_kern, regs));
  646. *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
  647. si->off);
  648. break;
  649. }
  650. return insn - insn_buf;
  651. }
  652. const struct bpf_verifier_ops perf_event_verifier_ops = {
  653. .get_func_proto = tp_prog_func_proto,
  654. .is_valid_access = pe_prog_is_valid_access,
  655. .convert_ctx_access = pe_prog_convert_ctx_access,
  656. };
  657. const struct bpf_prog_ops perf_event_prog_ops = {
  658. };
  659. static DEFINE_MUTEX(bpf_event_mutex);
  660. #define BPF_TRACE_MAX_PROGS 64
  661. int perf_event_attach_bpf_prog(struct perf_event *event,
  662. struct bpf_prog *prog)
  663. {
  664. struct bpf_prog_array __rcu *old_array;
  665. struct bpf_prog_array *new_array;
  666. int ret = -EEXIST;
  667. mutex_lock(&bpf_event_mutex);
  668. if (event->prog)
  669. goto unlock;
  670. old_array = event->tp_event->prog_array;
  671. if (old_array &&
  672. bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
  673. ret = -E2BIG;
  674. goto unlock;
  675. }
  676. ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
  677. if (ret < 0)
  678. goto unlock;
  679. /* set the new array to event->tp_event and set event->prog */
  680. event->prog = prog;
  681. rcu_assign_pointer(event->tp_event->prog_array, new_array);
  682. bpf_prog_array_free(old_array);
  683. unlock:
  684. mutex_unlock(&bpf_event_mutex);
  685. return ret;
  686. }
  687. void perf_event_detach_bpf_prog(struct perf_event *event)
  688. {
  689. struct bpf_prog_array __rcu *old_array;
  690. struct bpf_prog_array *new_array;
  691. int ret;
  692. mutex_lock(&bpf_event_mutex);
  693. if (!event->prog)
  694. goto unlock;
  695. old_array = event->tp_event->prog_array;
  696. ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
  697. if (ret < 0) {
  698. bpf_prog_array_delete_safe(old_array, event->prog);
  699. } else {
  700. rcu_assign_pointer(event->tp_event->prog_array, new_array);
  701. bpf_prog_array_free(old_array);
  702. }
  703. bpf_prog_put(event->prog);
  704. event->prog = NULL;
  705. unlock:
  706. mutex_unlock(&bpf_event_mutex);
  707. }