123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131 |
- // SPDX-License-Identifier: GPL-2.0
- /*
- * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
- *
- * Test it with:
- *
- * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null
- *
- * This exactly matches what is marshalled into the raw_syscall:sys_enter
- * payload expected by the 'perf trace' beautifiers.
- *
- * For now it just uses the existing tracepoint augmentation code in 'perf
- * trace', in the next csets we'll hook up these with the sys_enter/sys_exit
- * code that will combine entry/exit in a strace like way.
- */
- #include <stdio.h>
- #include <linux/socket.h>
- /* bpf-output associated map */
- struct bpf_map SEC("maps") __augmented_syscalls__ = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u32),
- .max_entries = __NR_CPUS__,
- };
- struct syscall_enter_args {
- unsigned long long common_tp_fields;
- long syscall_nr;
- unsigned long args[6];
- };
- struct syscall_exit_args {
- unsigned long long common_tp_fields;
- long syscall_nr;
- long ret;
- };
- struct augmented_filename {
- unsigned int size;
- int reserved;
- char value[256];
- };
- #define SYS_OPEN 2
- #define SYS_OPENAT 257
- SEC("raw_syscalls:sys_enter")
- int sys_enter(struct syscall_enter_args *args)
- {
- struct {
- struct syscall_enter_args args;
- struct augmented_filename filename;
- } augmented_args;
- unsigned int len = sizeof(augmented_args);
- const void *filename_arg = NULL;
- probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
- /*
- * Yonghong and Edward Cree sayz:
- *
- * https://www.spinics.net/lists/netdev/msg531645.html
- *
- * >> R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
- * >> 10: (bf) r1 = r6
- * >> 11: (07) r1 += 16
- * >> 12: (05) goto pc+2
- * >> 15: (79) r3 = *(u64 *)(r1 +0)
- * >> dereference of modified ctx ptr R1 off=16 disallowed
- * > Aha, we at least got a different error message this time.
- * > And indeed llvm has done that optimisation, rather than the more obvious
- * > 11: r3 = *(u64 *)(r1 +16)
- * > because it wants to have lots of reads share a single insn. You may be able
- * > to defeat that optimisation by adding compiler barriers, idk. Maybe someone
- * > with llvm knowledge can figure out how to stop it (ideally, llvm would know
- * > when it's generating for bpf backend and not do that). -O0? ¯\_(ツ)_/¯
- *
- * The optimization mostly likes below:
- *
- * br1:
- * ...
- * r1 += 16
- * goto merge
- * br2:
- * ...
- * r1 += 20
- * goto merge
- * merge:
- * *(u64 *)(r1 + 0)
- *
- * The compiler tries to merge common loads. There is no easy way to
- * stop this compiler optimization without turning off a lot of other
- * optimizations. The easiest way is to add barriers:
- *
- * __asm__ __volatile__("": : :"memory")
- *
- * after the ctx memory access to prevent their down stream merging.
- */
- switch (augmented_args.args.syscall_nr) {
- case SYS_OPEN: filename_arg = (const void *)args->args[0];
- __asm__ __volatile__("": : :"memory");
- break;
- case SYS_OPENAT: filename_arg = (const void *)args->args[1];
- break;
- }
- if (filename_arg != NULL) {
- augmented_args.filename.reserved = 0;
- augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
- sizeof(augmented_args.filename.value),
- filename_arg);
- if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
- len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
- len &= sizeof(augmented_args.filename.value) - 1;
- }
- } else {
- len = sizeof(augmented_args.args);
- }
- perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
- return 0;
- }
- SEC("raw_syscalls:sys_exit")
- int sys_exit(struct syscall_exit_args *args)
- {
- return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */
- }
- license(GPL);
|