7 years ago · f60ad0a0c4
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -692,6 +692,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
 
				 extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
			
 
				 extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
			
 
				 extern const struct bpf_func_proto bpf_get_stackid_proto;
			
 
				+extern const struct bpf_func_proto bpf_get_stack_proto;
			
 
				 extern const struct bpf_func_proto bpf_sock_map_update_proto;
			
 
				 
			
 
				 /* Shared helpers among cBPF and eBPF. */
			
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -468,7 +468,8 @@ struct bpf_prog {
 
				 				dst_needed:1,	/* Do we need dst entry? */
			
 
				 				blinded:1,	/* Was blinded */
			
 
				 				is_func:1,	/* program is a bpf function */
			
 
				-				kprobe_override:1; /* Do we override a kprobe? */
			
 
				+				kprobe_override:1, /* Do we override a kprobe? */
			
 
				+				has_callchain_buf:1; /* callchain buffer allocated? */
			
 
				 	enum bpf_prog_type	type;		/* Type of BPF program */
			
 
				 	enum bpf_attach_type	expected_attach_type; /* For some prog types */
			
 
				 	u32			len;		/* Number of filter blocks */
			
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -23,8 +23,10 @@ struct tnum tnum_range(u64 min, u64 max);
 
				 /* Arithmetic and logical ops */
			
 
				 /* Shift a tnum left (by a fixed shift) */
			
 
				 struct tnum tnum_lshift(struct tnum a, u8 shift);
			
 
				-/* Shift a tnum right (by a fixed shift) */
			
 
				+/* Shift (rsh) a tnum right (by a fixed shift) */
			
 
				 struct tnum tnum_rshift(struct tnum a, u8 shift);
			
 
				+/* Shift (arsh) a tnum right (by a fixed min_shift) */
			
 
				+struct tnum tnum_arshift(struct tnum a, u8 min_shift);
			
 
				 /* Add two tnums, return @a + @b */
			
 
				 struct tnum tnum_add(struct tnum a, struct tnum b);
			
 
				 /* Subtract two tnums, return @a - @b */
			
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1767,6 +1767,40 @@ union bpf_attr {
 
				  * 		**CONFIG_XFRM** configuration option.
			
 
				  * 	Return
			
 
				  * 		0 on success, or a negative error in case of failure.
			
 
				+ *
			
 
				+ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
			
 
				+ * 	Description
			
 
				+ *		Return a user or a kernel stack in bpf program provided buffer.
			
 
				+ *		To achieve this, the helper needs *ctx*, which is a pointer
			
 
				+ *		to the context on which the tracing program is executed.
			
 
				+ *		To store the stacktrace, the bpf program provides *buf* with
			
 
				+ *		a nonnegative *size*.
			
 
				+ *
			
 
				+ *		The last argument, *flags*, holds the number of stack frames to
			
 
				+ *		skip (from 0 to 255), masked with
			
 
				+ *		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
			
 
				+ *		the following flags:
			
 
				+ *
			
 
				+ *		**BPF_F_USER_STACK**
			
 
				+ *			Collect a user space stack instead of a kernel stack.
			
 
				+ *		**BPF_F_USER_BUILD_ID**
			
 
				+ *			Collect buildid+offset instead of ips for user stack,
			
 
				+ *			only valid if **BPF_F_USER_STACK** is also specified.
			
 
				+ *
			
 
				+ *		**bpf_get_stack**\ () can collect up to
			
 
				+ *		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
			
 
				+ *		to sufficient large buffer size. Note that
			
 
				+ *		this limit can be controlled with the **sysctl** program, and
			
 
				+ *		that it should be manually increased in order to profile long
			
 
				+ *		user stacks (such as stacks for Java programs). To do so, use:
			
 
				+ *
			
 
				+ *	::
			
 
				+ *
			
 
				+ *		# sysctl kernel.perf_event_max_stack=<new value>
			
 
				+ *
			
 
				+ * 	Return
			
 
				+ * 		a non-negative value equal to or less than size on success, or
			
 
				+ * 		a negative error in case of failure.
			
 
				  */
			
 
				 #define __BPF_FUNC_MAPPER(FN)		\
			
 
				 	FN(unspec),			\
			
@@ -1835,7 +1869,8 @@ union bpf_attr {
 
				 	FN(msg_pull_data),		\
			
 
				 	FN(bind),			\
			
 
				 	FN(xdp_adjust_tail),		\
			
 
				-	FN(skb_get_xfrm_state),
			
 
				+	FN(skb_get_xfrm_state),		\
			
 
				+	FN(get_stack),
			
 
				 
			
 
				 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
			
 
				  * function eBPF program intends to call
			
@@ -1869,11 +1904,14 @@ enum bpf_func_id {
 
				 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
			
 
				 #define BPF_F_TUNINFO_IPV6		(1ULL << 0)
			
 
				 
			
 
				-/* BPF_FUNC_get_stackid flags. */
			
 
				+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
			
 
				 #define BPF_F_SKIP_FIELD_MASK		0xffULL
			
 
				 #define BPF_F_USER_STACK		(1ULL << 8)
			
 
				+/* flags used by BPF_FUNC_get_stackid only. */
			
 
				 #define BPF_F_FAST_STACK_CMP		(1ULL << 9)
			
 
				 #define BPF_F_REUSE_STACKID		(1ULL << 10)
			
 
				+/* flags used by BPF_FUNC_get_stack only. */
			
 
				+#define BPF_F_USER_BUILD_ID		(1ULL << 11)
			
 
				 
			
 
				 /* BPF_FUNC_skb_set_tunnel_key flags. */
			
 
				 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
			
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -31,6 +31,7 @@
 
				 #include <linux/rbtree_latch.h>
			
 
				 #include <linux/kallsyms.h>
			
 
				 #include <linux/rcupdate.h>
			
 
				+#include <linux/perf_event.h>
			
 
				 
			
 
				 #include <asm/unaligned.h>
			
 
				 
			
@@ -1722,6 +1723,10 @@ static void bpf_prog_free_deferred(struct work_struct *work)
 
				 	aux = container_of(work, struct bpf_prog_aux, work);
			
 
				 	if (bpf_prog_is_dev_bound(aux))
			
 
				 		bpf_prog_offload_destroy(aux->prog);
			
 
				+#ifdef CONFIG_PERF_EVENTS
			
 
				+	if (aux->prog->has_callchain_buf)
			
 
				+		put_callchain_buffers();
			
 
				+#endif
			
 
				 	for (i = 0; i < aux->func_cnt; i++)
			
 
				 		bpf_jit_free(aux->func[i]);
			
 
				 	if (aux->func_cnt) {
			
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -262,16 +262,11 @@ out:
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static void stack_map_get_build_id_offset(struct bpf_map *map,
			
 
				-					  struct stack_map_bucket *bucket,
			
 
				+static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
			
 
				 					  u64 *ips, u32 trace_nr, bool user)
			
 
				 {
			
 
				 	int i;
			
 
				 	struct vm_area_struct *vma;
			
 
				-	struct bpf_stack_build_id *id_offs;
			
 
				-
			
 
				-	bucket->nr = trace_nr;
			
 
				-	id_offs = (struct bpf_stack_build_id *)bucket->data;
			
 
				 
			
 
				 	/*
			
 
				 	 * We cannot do up_read() in nmi context, so build_id lookup is
			
@@ -361,8 +356,10 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
 
				 			pcpu_freelist_pop(&smap->freelist);
			
 
				 		if (unlikely(!new_bucket))
			
 
				 			return -ENOMEM;
			
 
				-		stack_map_get_build_id_offset(map, new_bucket, ips,
			
 
				-					      trace_nr, user);
			
 
				+		new_bucket->nr = trace_nr;
			
 
				+		stack_map_get_build_id_offset(
			
 
				+			(struct bpf_stack_build_id *)new_bucket->data,
			
 
				+			ips, trace_nr, user);
			
 
				 		trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
			
 
				 		if (hash_matches && bucket->nr == trace_nr &&
			
 
				 		    memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
			
@@ -405,6 +402,73 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
 
				 	.arg3_type	= ARG_ANYTHING,
			
 
				 };
			
 
				 
			
 
				+BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
			
 
				+	   u64, flags)
			
 
				+{
			
 
				+	u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
			
 
				+	bool user_build_id = flags & BPF_F_USER_BUILD_ID;
			
 
				+	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
			
 
				+	bool user = flags & BPF_F_USER_STACK;
			
 
				+	struct perf_callchain_entry *trace;
			
 
				+	bool kernel = !user;
			
 
				+	int err = -EINVAL;
			
 
				+	u64 *ips;
			
 
				+
			
 
				+	if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
			
 
				+			       BPF_F_USER_BUILD_ID)))
			
 
				+		goto clear;
			
 
				+	if (kernel && user_build_id)
			
 
				+		goto clear;
			
 
				+
			
 
				+	elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
			
 
				+					    : sizeof(u64);
			
 
				+	if (unlikely(size % elem_size))
			
 
				+		goto clear;
			
 
				+
			
 
				+	num_elem = size / elem_size;
			
 
				+	if (sysctl_perf_event_max_stack < num_elem)
			
 
				+		init_nr = 0;
			
 
				+	else
			
 
				+		init_nr = sysctl_perf_event_max_stack - num_elem;
			
 
				+	trace = get_perf_callchain(regs, init_nr, kernel, user,
			
 
				+				   sysctl_perf_event_max_stack, false, false);
			
 
				+	if (unlikely(!trace))
			
 
				+		goto err_fault;
			
 
				+
			
 
				+	trace_nr = trace->nr - init_nr;
			
 
				+	if (trace_nr < skip)
			
 
				+		goto err_fault;
			
 
				+
			
 
				+	trace_nr -= skip;
			
 
				+	trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
			
 
				+	copy_len = trace_nr * elem_size;
			
 
				+	ips = trace->ip + skip + init_nr;
			
 
				+	if (user && user_build_id)
			
 
				+		stack_map_get_build_id_offset(buf, ips, trace_nr, user);
			
 
				+	else
			
 
				+		memcpy(buf, ips, copy_len);
			
 
				+
			
 
				+	if (size > copy_len)
			
 
				+		memset(buf + copy_len, 0, size - copy_len);
			
 
				+	return copy_len;
			
 
				+
			
 
				+err_fault:
			
 
				+	err = -EFAULT;
			
 
				+clear:
			
 
				+	memset(buf, 0, size);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+const struct bpf_func_proto bpf_get_stack_proto = {
			
 
				+	.func		= bpf_get_stack,
			
 
				+	.gpl_only	= true,
			
 
				+	.ret_type	= RET_INTEGER,
			
 
				+	.arg1_type	= ARG_PTR_TO_CTX,
			
 
				+	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
			
 
				+	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
			
 
				+	.arg4_type	= ARG_ANYTHING,
			
 
				+};
			
 
				+
			
 
				 /* Called from eBPF program */
			
 
				 static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
			
 
				 {
			
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -43,6 +43,16 @@ struct tnum tnum_rshift(struct tnum a, u8 shift)
 
				 	return TNUM(a.value >> shift, a.mask >> shift);
			
 
				 }
			
 
				 
			
 
				+struct tnum tnum_arshift(struct tnum a, u8 min_shift)
			
 
				+{
			
 
				+	/* if a.value is negative, arithmetic shifting by minimum shift
			
 
				+	 * will have larger negative offset compared to more shifting.
			
 
				+	 * If a.value is nonnegative, arithmetic shifting by minimum shift
			
 
				+	 * will have larger positive offset compare to more shifting.
			
 
				+	 */
			
 
				+	return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift);
			
 
				+}
			
 
				+
			
 
				 struct tnum tnum_add(struct tnum a, struct tnum b)
			
 
				 {
			
 
				 	u64 sm, sv, sigma, chi, mu;
			
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -22,6 +22,7 @@
 
				 #include <linux/stringify.h>
			
 
				 #include <linux/bsearch.h>
			
 
				 #include <linux/sort.h>
			
 
				+#include <linux/perf_event.h>
			
 
				 
			
 
				 #include "disasm.h"
			
 
				 
			
@@ -164,6 +165,8 @@ struct bpf_call_arg_meta {
 
				 	bool pkt_access;
			
 
				 	int regno;
			
 
				 	int access_size;
			
 
				+	s64 msize_smax_value;
			
 
				+	u64 msize_umax_value;
			
 
				 };
			
 
				 
			
 
				 static DEFINE_MUTEX(bpf_verifier_lock);
			
@@ -1984,6 +1987,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 
				 	} else if (arg_type_is_mem_size(arg_type)) {
			
 
				 		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
			
 
				 
			
 
				+		/* remember the mem_size which may be used later
			
 
				+		 * to refine return values.
			
 
				+		 */
			
 
				+		meta->msize_smax_value = reg->smax_value;
			
 
				+		meta->msize_umax_value = reg->umax_value;
			
 
				+
			
 
				 		/* The register is SCALAR_VALUE; the access check
			
 
				 		 * happens using its boundaries.
			
 
				 		 */
			
@@ -2323,6 +2332,23 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
			
 
				+				   int func_id,
			
 
				+				   struct bpf_call_arg_meta *meta)
			
 
				+{
			
 
				+	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
			
 
				+
			
 
				+	if (ret_type != RET_INTEGER ||
			
 
				+	    (func_id != BPF_FUNC_get_stack &&
			
 
				+	     func_id != BPF_FUNC_probe_read_str))
			
 
				+		return;
			
 
				+
			
 
				+	ret_reg->smax_value = meta->msize_smax_value;
			
 
				+	ret_reg->umax_value = meta->msize_umax_value;
			
 
				+	__reg_deduce_bounds(ret_reg);
			
 
				+	__reg_bound_offset(ret_reg);
			
 
				+}
			
 
				+
			
 
				 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
			
 
				 {
			
 
				 	const struct bpf_func_proto *fn = NULL;
			
@@ -2446,10 +2472,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				+	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
			
 
				+
			
 
				 	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
			
 
				 	if (err)
			
 
				 		return err;
			
 
				 
			
 
				+	if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
			
 
				+		const char *err_str;
			
 
				+
			
 
				+#ifdef CONFIG_PERF_EVENTS
			
 
				+		err = get_callchain_buffers(sysctl_perf_event_max_stack);
			
 
				+		err_str = "cannot get callchain buffer for func %s#%d\n";
			
 
				+#else
			
 
				+		err = -ENOTSUPP;
			
 
				+		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
			
 
				+#endif
			
 
				+		if (err) {
			
 
				+			verbose(env, err_str, func_id_name(func_id), func_id);
			
 
				+			return err;
			
 
				+		}
			
 
				+
			
 
				+		env->prog->has_callchain_buf = true;
			
 
				+	}
			
 
				+
			
 
				 	if (changes_data)
			
 
				 		clear_all_pkt_pointers(env);
			
 
				 	return 0;
			
@@ -2894,10 +2940,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 
				 			dst_reg->umin_value <<= umin_val;
			
 
				 			dst_reg->umax_value <<= umax_val;
			
 
				 		}
			
 
				-		if (src_known)
			
 
				-			dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
			
 
				-		else
			
 
				-			dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val);
			
 
				+		dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
			
 
				 		/* We may learn something more from the var_off */
			
 
				 		__update_reg_bounds(dst_reg);
			
 
				 		break;
			
@@ -2925,16 +2968,35 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 
				 		 */
			
 
				 		dst_reg->smin_value = S64_MIN;
			
 
				 		dst_reg->smax_value = S64_MAX;
			
 
				-		if (src_known)
			
 
				-			dst_reg->var_off = tnum_rshift(dst_reg->var_off,
			
 
				-						       umin_val);
			
 
				-		else
			
 
				-			dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val);
			
 
				+		dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
			
 
				 		dst_reg->umin_value >>= umax_val;
			
 
				 		dst_reg->umax_value >>= umin_val;
			
 
				 		/* We may learn something more from the var_off */
			
 
				 		__update_reg_bounds(dst_reg);
			
 
				 		break;
			
 
				+	case BPF_ARSH:
			
 
				+		if (umax_val >= insn_bitness) {
			
 
				+			/* Shifts greater than 31 or 63 are undefined.
			
 
				+			 * This includes shifts by a negative number.
			
 
				+			 */
			
 
				+			mark_reg_unknown(env, regs, insn->dst_reg);
			
 
				+			break;
			
 
				+		}
			
 
				+
			
 
				+		/* Upon reaching here, src_known is true and
			
 
				+		 * umax_val is equal to umin_val.
			
 
				+		 */
			
 
				+		dst_reg->smin_value >>= umin_val;
			
 
				+		dst_reg->smax_value >>= umin_val;
			
 
				+		dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
			
 
				+
			
 
				+		/* blow away the dst_reg umin_value/umax_value and rely on
			
 
				+		 * dst_reg var_off to refine the result.
			
 
				+		 */
			
 
				+		dst_reg->umin_value = 0;
			
 
				+		dst_reg->umax_value = U64_MAX;
			
 
				+		__update_reg_bounds(dst_reg);
			
 
				+		break;
			
 
				 	default:
			
 
				 		mark_reg_unknown(env, regs, insn->dst_reg);
			
 
				 		break;
			
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -20,6 +20,7 @@
 
				 #include "trace.h"
			
 
				 
			
 
				 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
			
 
				+u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
			
 
				 
			
 
				 /**
			
 
				  * trace_call_bpf - invoke BPF program
			
@@ -577,6 +578,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 
				 		return &bpf_perf_event_output_proto;
			
 
				 	case BPF_FUNC_get_stackid:
			
 
				 		return &bpf_get_stackid_proto;
			
 
				+	case BPF_FUNC_get_stack:
			
 
				+		return &bpf_get_stack_proto;
			
 
				 	case BPF_FUNC_perf_event_read_value:
			
 
				 		return &bpf_perf_event_read_value_proto;
			
 
				 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
			
@@ -664,6 +667,25 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
 
				 	.arg3_type	= ARG_ANYTHING,
			
 
				 };
			
 
				 
			
 
				+BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size,
			
 
				+	   u64, flags)
			
 
				+{
			
 
				+	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
			
 
				+
			
 
				+	return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
			
 
				+			     (unsigned long) size, flags, 0);
			
 
				+}
			
 
				+
			
 
				+static const struct bpf_func_proto bpf_get_stack_proto_tp = {
			
 
				+	.func		= bpf_get_stack_tp,
			
 
				+	.gpl_only	= true,
			
 
				+	.ret_type	= RET_INTEGER,
			
 
				+	.arg1_type	= ARG_PTR_TO_CTX,
			
 
				+	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
			
 
				+	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
			
 
				+	.arg4_type	= ARG_ANYTHING,
			
 
				+};
			
 
				+
			
 
				 static const struct bpf_func_proto *
			
 
				 tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
			
 
				 {
			
@@ -672,6 +694,8 @@ tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 
				 		return &bpf_perf_event_output_proto_tp;
			
 
				 	case BPF_FUNC_get_stackid:
			
 
				 		return &bpf_get_stackid_proto_tp;
			
 
				+	case BPF_FUNC_get_stack:
			
 
				+		return &bpf_get_stack_proto_tp;
			
 
				 	default:
			
 
				 		return tracing_func_proto(func_id, prog);
			
 
				 	}
			
@@ -734,6 +758,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 
				 		return &bpf_perf_event_output_proto_tp;
			
 
				 	case BPF_FUNC_get_stackid:
			
 
				 		return &bpf_get_stackid_proto_tp;
			
 
				+	case BPF_FUNC_get_stack:
			
 
				+		return &bpf_get_stack_proto_tp;
			
 
				 	case BPF_FUNC_perf_prog_read_value:
			
 
				 		return &bpf_perf_prog_read_value_proto;
			
 
				 	default:
			
@@ -744,7 +770,7 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 
				 /*
			
 
				  * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
			
 
				  * to avoid potential recursive reuse issue when/if tracepoints are added
			
 
				- * inside bpf_*_event_output and/or bpf_get_stack_id
			
 
				+ * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
			
 
				  */
			
 
				 static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
			
 
				 BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
			
@@ -787,6 +813,26 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
 
				 	.arg3_type	= ARG_ANYTHING,
			
 
				 };
			
 
				 
			
 
				+BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
			
 
				+	   void *, buf, u32, size, u64, flags)
			
 
				+{
			
 
				+	struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
			
 
				+
			
 
				+	perf_fetch_caller_regs(regs);
			
 
				+	return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
			
 
				+			     (unsigned long) size, flags, 0);
			
 
				+}
			
 
				+
			
 
				+static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
			
 
				+	.func		= bpf_get_stack_raw_tp,
			
 
				+	.gpl_only	= true,
			
 
				+	.ret_type	= RET_INTEGER,
			
 
				+	.arg1_type	= ARG_PTR_TO_CTX,
			
 
				+	.arg2_type	= ARG_PTR_TO_MEM,
			
 
				+	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
			
 
				+	.arg4_type	= ARG_ANYTHING,
			
 
				+};
			
 
				+
			
 
				 static const struct bpf_func_proto *
			
 
				 raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
			
 
				 {
			
@@ -795,6 +841,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 
				 		return &bpf_perf_event_output_proto_raw_tp;
			
 
				 	case BPF_FUNC_get_stackid:
			
 
				 		return &bpf_get_stackid_proto_raw_tp;
			
 
				+	case BPF_FUNC_get_stack:
			
 
				+		return &bpf_get_stack_proto_raw_tp;
			
 
				 	default:
			
 
				 		return tracing_func_proto(func_id, prog);
			
 
				 	}
			
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -49,6 +49,7 @@ hostprogs-y += xdp_adjust_tail
 
				 # Libbpf dependencies
			
 
				 LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
			
 
				 CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
			
 
				+TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o
			
 
				 
			
 
				 test_lru_dist-objs := test_lru_dist.o $(LIBBPF)
			
 
				 sock_example-objs := sock_example.o $(LIBBPF)
			
@@ -65,10 +66,10 @@ tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o
 
				 tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o
			
 
				 load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o
			
 
				 test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
			
 
				-trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
			
 
				+trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o $(TRACE_HELPERS)
			
 
				 lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o
			
 
				-offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o
			
 
				-spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o
			
 
				+offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o $(TRACE_HELPERS)
			
 
				+spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o $(TRACE_HELPERS)
			
 
				 map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o
			
 
				 test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o
			
 
				 test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o
			
@@ -82,8 +83,8 @@ xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o
 
				 xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o
			
 
				 test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \
			
 
				 				       test_current_task_under_cgroup_user.o
			
 
				-trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o
			
 
				-sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o
			
 
				+trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o $(TRACE_HELPERS)
			
 
				+sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o $(TRACE_HELPERS)
			
 
				 tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o
			
 
				 lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o
			
 
				 xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o
			
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -648,66 +648,3 @@ void read_trace_pipe(void)
 
				 		}
			
 
				 	}
			
 
				 }
			
 
				-
			
 
				-#define MAX_SYMS 300000
			
 
				-static struct ksym syms[MAX_SYMS];
			
 
				-static int sym_cnt;
			
 
				-
			
 
				-static int ksym_cmp(const void *p1, const void *p2)
			
 
				-{
			
 
				-	return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
			
 
				-}
			
 
				-
			
 
				-int load_kallsyms(void)
			
 
				-{
			
 
				-	FILE *f = fopen("/proc/kallsyms", "r");
			
 
				-	char func[256], buf[256];
			
 
				-	char symbol;
			
 
				-	void *addr;
			
 
				-	int i = 0;
			
 
				-
			
 
				-	if (!f)
			
 
				-		return -ENOENT;
			
 
				-
			
 
				-	while (!feof(f)) {
			
 
				-		if (!fgets(buf, sizeof(buf), f))
			
 
				-			break;
			
 
				-		if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
			
 
				-			break;
			
 
				-		if (!addr)
			
 
				-			continue;
			
 
				-		syms[i].addr = (long) addr;
			
 
				-		syms[i].name = strdup(func);
			
 
				-		i++;
			
 
				-	}
			
 
				-	sym_cnt = i;
			
 
				-	qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-struct ksym *ksym_search(long key)
			
 
				-{
			
 
				-	int start = 0, end = sym_cnt;
			
 
				-	int result;
			
 
				-
			
 
				-	while (start < end) {
			
 
				-		size_t mid = start + (end - start) / 2;
			
 
				-
			
 
				-		result = key - syms[mid].addr;
			
 
				-		if (result < 0)
			
 
				-			end = mid;
			
 
				-		else if (result > 0)
			
 
				-			start = mid + 1;
			
 
				-		else
			
 
				-			return &syms[mid];
			
 
				-	}
			
 
				-
			
 
				-	if (start >= 1 && syms[start - 1].addr < key &&
			
 
				-	    key < syms[start].addr)
			
 
				-		/* valid ksym */
			
 
				-		return &syms[start - 1];
			
 
				-
			
 
				-	/* out of range. return _stext */
			
 
				-	return &syms[0];
			
 
				-}
			
 
				-
			
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -54,12 +54,5 @@ int load_bpf_file(char *path);
 
				 int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map);
			
 
				 
			
 
				 void read_trace_pipe(void);
			
 
				-struct ksym {
			
 
				-	long addr;
			
 
				-	char *name;
			
 
				-};
			
 
				-
			
 
				-int load_kallsyms(void);
			
 
				-struct ksym *ksym_search(long key);
			
 
				 int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
			
 
				 #endif
			
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
@@ -17,6 +17,7 @@
 
				 #include <sys/resource.h>
			
 
				 #include "libbpf.h"
			
 
				 #include "bpf_load.h"
			
 
				+#include "trace_helpers.h"
			
 
				 
			
 
				 #define PRINT_RAW_ADDR 0
			
 
				 
			
--- a/samples/bpf/sampleip_user.c
+++ b/samples/bpf/sampleip_user.c
@@ -22,6 +22,7 @@
 
				 #include "libbpf.h"
			
 
				 #include "bpf_load.h"
			
 
				 #include "perf-sys.h"
			
 
				+#include "trace_helpers.h"
			
 
				 
			
 
				 #define DEFAULT_FREQ	99
			
 
				 #define DEFAULT_SECS	5
			
--- a/samples/bpf/spintest_user.c
+++ b/samples/bpf/spintest_user.c
@@ -7,6 +7,7 @@
 
				 #include <sys/resource.h>
			
 
				 #include "libbpf.h"
			
 
				 #include "bpf_load.h"
			
 
				+#include "trace_helpers.h"
			
 
				 
			
 
				 int main(int ac, char **argv)
			
 
				 {
			
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -21,6 +21,7 @@
 
				 #include "libbpf.h"
			
 
				 #include "bpf_load.h"
			
 
				 #include "perf-sys.h"
			
 
				+#include "trace_helpers.h"
			
 
				 
			
 
				 #define SAMPLE_FREQ 50
			
 
				 
			
--- a/samples/bpf/trace_output_user.c
+++ b/samples/bpf/trace_output_user.c
@@ -21,100 +21,10 @@
 
				 #include "libbpf.h"
			
 
				 #include "bpf_load.h"
			
 
				 #include "perf-sys.h"
			
 
				+#include "trace_helpers.h"
			
 
				 
			
 
				 static int pmu_fd;
			
 
				 
			
 
				-int page_size;
			
 
				-int page_cnt = 8;
			
 
				-volatile struct perf_event_mmap_page *header;
			
 
				-
			
 
				-typedef void (*print_fn)(void *data, int size);
			
 
				-
			
 
				-static int perf_event_mmap(int fd)
			
 
				-{
			
 
				-	void *base;
			
 
				-	int mmap_size;
			
 
				-
			
 
				-	page_size = getpagesize();
			
 
				-	mmap_size = page_size * (page_cnt + 1);
			
 
				-
			
 
				-	base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
			
 
				-	if (base == MAP_FAILED) {
			
 
				-		printf("mmap err\n");
			
 
				-		return -1;
			
 
				-	}
			
 
				-
			
 
				-	header = base;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static int perf_event_poll(int fd)
			
 
				-{
			
 
				-	struct pollfd pfd = { .fd = fd, .events = POLLIN };
			
 
				-
			
 
				-	return poll(&pfd, 1, 1000);
			
 
				-}
			
 
				-
			
 
				-struct perf_event_sample {
			
 
				-	struct perf_event_header header;
			
 
				-	__u32 size;
			
 
				-	char data[];
			
 
				-};
			
 
				-
			
 
				-static void perf_event_read(print_fn fn)
			
 
				-{
			
 
				-	__u64 data_tail = header->data_tail;
			
 
				-	__u64 data_head = header->data_head;
			
 
				-	__u64 buffer_size = page_cnt * page_size;
			
 
				-	void *base, *begin, *end;
			
 
				-	char buf[256];
			
 
				-
			
 
				-	asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
			
 
				-	if (data_head == data_tail)
			
 
				-		return;
			
 
				-
			
 
				-	base = ((char *)header) + page_size;
			
 
				-
			
 
				-	begin = base + data_tail % buffer_size;
			
 
				-	end = base + data_head % buffer_size;
			
 
				-
			
 
				-	while (begin != end) {
			
 
				-		struct perf_event_sample *e;
			
 
				-
			
 
				-		e = begin;
			
 
				-		if (begin + e->header.size > base + buffer_size) {
			
 
				-			long len = base + buffer_size - begin;
			
 
				-
			
 
				-			assert(len < e->header.size);
			
 
				-			memcpy(buf, begin, len);
			
 
				-			memcpy(buf + len, base, e->header.size - len);
			
 
				-			e = (void *) buf;
			
 
				-			begin = base + e->header.size - len;
			
 
				-		} else if (begin + e->header.size == base + buffer_size) {
			
 
				-			begin = base;
			
 
				-		} else {
			
 
				-			begin += e->header.size;
			
 
				-		}
			
 
				-
			
 
				-		if (e->header.type == PERF_RECORD_SAMPLE) {
			
 
				-			fn(e->data, e->size);
			
 
				-		} else if (e->header.type == PERF_RECORD_LOST) {
			
 
				-			struct {
			
 
				-				struct perf_event_header header;
			
 
				-				__u64 id;
			
 
				-				__u64 lost;
			
 
				-			} *lost = (void *) e;
			
 
				-			printf("lost %lld events\n", lost->lost);
			
 
				-		} else {
			
 
				-			printf("unknown event type=%d size=%d\n",
			
 
				-			       e->header.type, e->header.size);
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	__sync_synchronize(); /* smp_mb() */
			
 
				-	header->data_tail = data_head;
			
 
				-}
			
 
				-
			
 
				 static __u64 time_get_ns(void)
			
 
				 {
			
 
				 	struct timespec ts;
			
@@ -127,7 +37,7 @@ static __u64 start_time;
 
				 
			
 
				 #define MAX_CNT 100000ll
			
 
				 
			
 
				-static void print_bpf_output(void *data, int size)
			
 
				+static int print_bpf_output(void *data, int size)
			
 
				 {
			
 
				 	static __u64 cnt;
			
 
				 	struct {
			
@@ -138,7 +48,7 @@ static void print_bpf_output(void *data, int size)
 
				 	if (e->cookie != 0x12345678) {
			
 
				 		printf("BUG pid %llx cookie %llx sized %d\n",
			
 
				 		       e->pid, e->cookie, size);
			
 
				-		kill(0, SIGINT);
			
 
				+		return PERF_EVENT_ERROR;
			
 
				 	}
			
 
				 
			
 
				 	cnt++;
			
@@ -146,8 +56,10 @@ static void print_bpf_output(void *data, int size)
 
				 	if (cnt == MAX_CNT) {
			
 
				 		printf("recv %lld events per sec\n",
			
 
				 		       MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
			
 
				-		kill(0, SIGINT);
			
 
				+		return PERF_EVENT_DONE;
			
 
				 	}
			
 
				+
			
 
				+	return PERF_EVENT_CONT;
			
 
				 }
			
 
				 
			
 
				 static void test_bpf_perf_event(void)
			
@@ -170,6 +82,7 @@ int main(int argc, char **argv)
 
				 {
			
 
				 	char filename[256];
			
 
				 	FILE *f;
			
 
				+	int ret;
			
 
				 
			
 
				 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
			
 
				 
			
@@ -187,10 +100,7 @@ int main(int argc, char **argv)
 
				 	(void) f;
			
 
				 
			
 
				 	start_time = time_get_ns();
			
 
				-	for (;;) {
			
 
				-		perf_event_poll(pmu_fd);
			
 
				-		perf_event_read(print_bpf_output);
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				+	ret = perf_event_poller(pmu_fd, print_bpf_output);
			
 
				+	kill(0, SIGINT);
			
 
				+	return ret;
			
 
				 }
			
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1767,6 +1767,40 @@ union bpf_attr {
 
				  * 		**CONFIG_XFRM** configuration option.
			
 
				  * 	Return
			
 
				  * 		0 on success, or a negative error in case of failure.
			
 
				+ *
			
 
				+ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
			
 
				+ * 	Description
			
 
				+ *		Return a user or a kernel stack in bpf program provided buffer.
			
 
				+ *		To achieve this, the helper needs *ctx*, which is a pointer
			
 
				+ *		to the context on which the tracing program is executed.
			
 
				+ *		To store the stacktrace, the bpf program provides *buf* with
			
 
				+ *		a nonnegative *size*.
			
 
				+ *
			
 
				+ *		The last argument, *flags*, holds the number of stack frames to
			
 
				+ *		skip (from 0 to 255), masked with
			
 
				+ *		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
			
 
				+ *		the following flags:
			
 
				+ *
			
 
				+ *		**BPF_F_USER_STACK**
			
 
				+ *			Collect a user space stack instead of a kernel stack.
			
 
				+ *		**BPF_F_USER_BUILD_ID**
			
 
				+ *			Collect buildid+offset instead of ips for user stack,
			
 
				+ *			only valid if **BPF_F_USER_STACK** is also specified.
			
 
				+ *
			
 
				+ *		**bpf_get_stack**\ () can collect up to
			
 
				+ *		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
			
 
				+ *		to sufficient large buffer size. Note that
			
 
				+ *		this limit can be controlled with the **sysctl** program, and
			
 
				+ *		that it should be manually increased in order to profile long
			
 
				+ *		user stacks (such as stacks for Java programs). To do so, use:
			
 
				+ *
			
 
				+ *	::
			
 
				+ *
			
 
				+ *		# sysctl kernel.perf_event_max_stack=<new value>
			
 
				+ *
			
 
				+ * 	Return
			
 
				+ * 		a non-negative value equal to or less than size on success, or
			
 
				+ * 		a negative error in case of failure.
			
 
				  */
			
 
				 #define __BPF_FUNC_MAPPER(FN)		\
			
 
				 	FN(unspec),			\
			
@@ -1835,7 +1869,8 @@ union bpf_attr {
 
				 	FN(msg_pull_data),		\
			
 
				 	FN(bind),			\
			
 
				 	FN(xdp_adjust_tail),		\
			
 
				-	FN(skb_get_xfrm_state),
			
 
				+	FN(skb_get_xfrm_state),		\
			
 
				+	FN(get_stack),
			
 
				 
			
 
				 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
			
 
				  * function eBPF program intends to call
			
@@ -1869,11 +1904,14 @@ enum bpf_func_id {
 
				 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
			
 
				 #define BPF_F_TUNINFO_IPV6		(1ULL << 0)
			
 
				 
			
 
				-/* BPF_FUNC_get_stackid flags. */
			
 
				+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
			
 
				 #define BPF_F_SKIP_FIELD_MASK		0xffULL
			
 
				 #define BPF_F_USER_STACK		(1ULL << 8)
			
 
				+/* flags used by BPF_FUNC_get_stackid only. */
			
 
				 #define BPF_F_FAST_STACK_CMP		(1ULL << 9)
			
 
				 #define BPF_F_REUSE_STACKID		(1ULL << 10)
			
 
				+/* flags used by BPF_FUNC_get_stack only. */
			
 
				+#define BPF_F_USER_BUILD_ID		(1ULL << 11)
			
 
				 
			
 
				 /* BPF_FUNC_skb_set_tunnel_key flags. */
			
 
				 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
			
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -32,7 +32,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 
				 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
			
 
				 	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
			
 
				 	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
			
 
				-	test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o
			
 
				+	test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
			
 
				+	test_get_stack_rawtp.o
			
 
				 
			
 
				 # Order correspond to 'make run_tests' order
			
 
				 TEST_PROGS := test_kmod.sh \
			
@@ -58,6 +59,7 @@ $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
 
				 $(OUTPUT)/test_sock: cgroup_helpers.c
			
 
				 $(OUTPUT)/test_sock_addr: cgroup_helpers.c
			
 
				 $(OUTPUT)/test_sockmap: cgroup_helpers.c
			
 
				+$(OUTPUT)/test_progs: trace_helpers.c
			
 
				 
			
 
				 .PHONY: force
			
 
				 
			
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -101,6 +101,8 @@ static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
 
				 static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
			
 
				 				     int size, int flags) =
			
 
				 	(void *) BPF_FUNC_skb_get_xfrm_state;
			
 
				+static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
			
 
				+	(void *) BPF_FUNC_get_stack;
			
 
				 
			
 
				 /* llvm builtin functions that eBPF C program may use to
			
 
				  * emit BPF_LD_ABS and BPF_LD_IND instructions
			
--- a/tools/testing/selftests/bpf/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
@@ -0,0 +1,102 @@
 
				+// SPDX-License-Identifier: GPL-2.0
			
 
				+
			
 
				+#include <linux/bpf.h>
			
 
				+#include "bpf_helpers.h"
			
 
				+
			
 
				+/* Permit pretty deep stack traces */
			
 
				+#define MAX_STACK_RAWTP 100
			
 
				+struct stack_trace_t {
			
 
				+	int pid;
			
 
				+	int kern_stack_size;
			
 
				+	int user_stack_size;
			
 
				+	int user_stack_buildid_size;
			
 
				+	__u64 kern_stack[MAX_STACK_RAWTP];
			
 
				+	__u64 user_stack[MAX_STACK_RAWTP];
			
 
				+	struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
			
 
				+};
			
 
				+
			
 
				+struct bpf_map_def SEC("maps") perfmap = {
			
 
				+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
			
 
				+	.key_size = sizeof(int),
			
 
				+	.value_size = sizeof(__u32),
			
 
				+	.max_entries = 2,
			
 
				+};
			
 
				+
			
 
				+struct bpf_map_def SEC("maps") stackdata_map = {
			
 
				+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
			
 
				+	.key_size = sizeof(__u32),
			
 
				+	.value_size = sizeof(struct stack_trace_t),
			
 
				+	.max_entries = 1,
			
 
				+};
			
 
				+
			
 
				+/* Allocate per-cpu space twice the needed. For the code below
			
 
				+ *   usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
			
 
				+ *   if (usize < 0)
			
 
				+ *     return 0;
			
 
				+ *   ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
			
 
				+ *
			
 
				+ * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64),
			
 
				+ * verifier will complain that access "raw_data + usize"
			
 
				+ * with size "max_len - usize" may be out of bound.
			
 
				+ * The maximum "raw_data + usize" is "raw_data + max_len"
			
 
				+ * and the maximum "max_len - usize" is "max_len", verifier
			
 
				+ * concludes that the maximum buffer access range is
			
 
				+ * "raw_data[0...max_len * 2 - 1]" and hence reject the program.
			
 
				+ *
			
 
				+ * Doubling the to-be-used max buffer size can fix this verifier
			
 
				+ * issue and avoid complicated C programming massaging.
			
 
				+ * This is an acceptable workaround since there is one entry here.
			
 
				+ */
			
 
				+struct bpf_map_def SEC("maps") rawdata_map = {
			
 
				+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
			
 
				+	.key_size = sizeof(__u32),
			
 
				+	.value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
			
 
				+	.max_entries = 1,
			
 
				+};
			
 
				+
			
 
				+SEC("tracepoint/raw_syscalls/sys_enter")
			
 
				+int bpf_prog1(void *ctx)
			
 
				+{
			
 
				+	int max_len, max_buildid_len, usize, ksize, total_size;
			
 
				+	struct stack_trace_t *data;
			
 
				+	void *raw_data;
			
 
				+	__u32 key = 0;
			
 
				+
			
 
				+	data = bpf_map_lookup_elem(&stackdata_map, &key);
			
 
				+	if (!data)
			
 
				+		return 0;
			
 
				+
			
 
				+	max_len = MAX_STACK_RAWTP * sizeof(__u64);
			
 
				+	max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id);
			
 
				+	data->pid = bpf_get_current_pid_tgid();
			
 
				+	data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack,
			
 
				+					      max_len, 0);
			
 
				+	data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len,
			
 
				+					    BPF_F_USER_STACK);
			
 
				+	data->user_stack_buildid_size = bpf_get_stack(
			
 
				+		ctx, data->user_stack_buildid, max_buildid_len,
			
 
				+		BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
			
 
				+	bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data));
			
 
				+
			
 
				+	/* write both kernel and user stacks to the same buffer */
			
 
				+	raw_data = bpf_map_lookup_elem(&rawdata_map, &key);
			
 
				+	if (!raw_data)
			
 
				+		return 0;
			
 
				+
			
 
				+	usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
			
 
				+	if (usize < 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
			
 
				+	if (ksize < 0)
			
 
				+		return 0;
			
 
				+
			
 
				+	total_size = usize + ksize;
			
 
				+	if (total_size > 0 && total_size <= max_len)
			
 
				+		bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+char _license[] SEC("license") = "GPL";
			
 
				+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
			
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -38,8 +38,10 @@ typedef __u16 __sum16;
 
				 #include "bpf_util.h"
			
 
				 #include "bpf_endian.h"
			
 
				 #include "bpf_rlimit.h"
			
 
				+#include "trace_helpers.h"
			
 
				 
			
 
				 static int error_cnt, pass_cnt;
			
 
				+static bool jit_enabled;
			
 
				 
			
 
				 #define MAGIC_BYTES 123
			
 
				 
			
@@ -391,13 +393,30 @@ static inline __u64 ptr_to_u64(const void *ptr)
 
				 	return (__u64) (unsigned long) ptr;
			
 
				 }
			
 
				 
			
 
				+static bool is_jit_enabled(void)
			
 
				+{
			
 
				+	const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
			
 
				+	bool enabled = false;
			
 
				+	int sysctl_fd;
			
 
				+
			
 
				+	sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
			
 
				+	if (sysctl_fd != -1) {
			
 
				+		char tmpc;
			
 
				+
			
 
				+		if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
			
 
				+			enabled = (tmpc != '0');
			
 
				+		close(sysctl_fd);
			
 
				+	}
			
 
				+
			
 
				+	return enabled;
			
 
				+}
			
 
				+
			
 
				 static void test_bpf_obj_id(void)
			
 
				 {
			
 
				 	const __u64 array_magic_value = 0xfaceb00c;
			
 
				 	const __u32 array_key = 0;
			
 
				 	const int nr_iters = 2;
			
 
				 	const char *file = "./test_obj_id.o";
			
 
				-	const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
			
 
				 	const char *expected_prog_name = "test_obj_id";
			
 
				 	const char *expected_map_name = "test_map_id";
			
 
				 	const __u64 nsec_per_sec = 1000000000;
			
@@ -414,20 +433,11 @@ static void test_bpf_obj_id(void)
 
				 	char jited_insns[128], xlated_insns[128], zeros[128];
			
 
				 	__u32 i, next_id, info_len, nr_id_found, duration = 0;
			
 
				 	struct timespec real_time_ts, boot_time_ts;
			
 
				-	int sysctl_fd, jit_enabled = 0, err = 0;
			
 
				+	int err = 0;
			
 
				 	__u64 array_value;
			
 
				 	uid_t my_uid = getuid();
			
 
				 	time_t now, load_time;
			
 
				 
			
 
				-	sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
			
 
				-	if (sysctl_fd != -1) {
			
 
				-		char tmpc;
			
 
				-
			
 
				-		if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
			
 
				-			jit_enabled = (tmpc != '0');
			
 
				-		close(sysctl_fd);
			
 
				-	}
			
 
				-
			
 
				 	err = bpf_prog_get_fd_by_id(0);
			
 
				 	CHECK(err >= 0 || errno != ENOENT,
			
 
				 	      "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
			
@@ -896,11 +906,47 @@ static int compare_map_keys(int map1_fd, int map2_fd)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len)
			
 
				+{
			
 
				+	__u32 key, next_key, *cur_key_p, *next_key_p;
			
 
				+	char *val_buf1, *val_buf2;
			
 
				+	int i, err = 0;
			
 
				+
			
 
				+	val_buf1 = malloc(stack_trace_len);
			
 
				+	val_buf2 = malloc(stack_trace_len);
			
 
				+	cur_key_p = NULL;
			
 
				+	next_key_p = &key;
			
 
				+	while (bpf_map_get_next_key(smap_fd, cur_key_p, next_key_p) == 0) {
			
 
				+		err = bpf_map_lookup_elem(smap_fd, next_key_p, val_buf1);
			
 
				+		if (err)
			
 
				+			goto out;
			
 
				+		err = bpf_map_lookup_elem(amap_fd, next_key_p, val_buf2);
			
 
				+		if (err)
			
 
				+			goto out;
			
 
				+		for (i = 0; i < stack_trace_len; i++) {
			
 
				+			if (val_buf1[i] != val_buf2[i]) {
			
 
				+				err = -1;
			
 
				+				goto out;
			
 
				+			}
			
 
				+		}
			
 
				+		key = *next_key_p;
			
 
				+		cur_key_p = &key;
			
 
				+		next_key_p = &next_key;
			
 
				+	}
			
 
				+	if (errno != ENOENT)
			
 
				+		err = -1;
			
 
				+
			
 
				+out:
			
 
				+	free(val_buf1);
			
 
				+	free(val_buf2);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				 static void test_stacktrace_map()
			
 
				 {
			
 
				-	int control_map_fd, stackid_hmap_fd, stackmap_fd;
			
 
				+	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
			
 
				 	const char *file = "./test_stacktrace_map.o";
			
 
				-	int bytes, efd, err, pmu_fd, prog_fd;
			
 
				+	int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
			
 
				 	struct perf_event_attr attr = {};
			
 
				 	__u32 key, val, duration = 0;
			
 
				 	struct bpf_object *obj;
			
@@ -956,6 +1002,10 @@ static void test_stacktrace_map()
 
				 	if (stackmap_fd < 0)
			
 
				 		goto disable_pmu;
			
 
				 
			
 
				+	stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
			
 
				+	if (stack_amap_fd < 0)
			
 
				+		goto disable_pmu;
			
 
				+
			
 
				 	/* give some time for bpf program run */
			
 
				 	sleep(1);
			
 
				 
			
@@ -977,6 +1027,12 @@ static void test_stacktrace_map()
 
				 		  "err %d errno %d\n", err, errno))
			
 
				 		goto disable_pmu_noerr;
			
 
				 
			
 
				+	stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
			
 
				+	err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
			
 
				+	if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
			
 
				+		  "err %d errno %d\n", err, errno))
			
 
				+		goto disable_pmu_noerr;
			
 
				+
			
 
				 	goto disable_pmu_noerr;
			
 
				 disable_pmu:
			
 
				 	error_cnt++;
			
@@ -1070,9 +1126,9 @@ err:
 
				 
			
 
				 static void test_stacktrace_build_id(void)
			
 
				 {
			
 
				-	int control_map_fd, stackid_hmap_fd, stackmap_fd;
			
 
				+	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
			
 
				 	const char *file = "./test_stacktrace_build_id.o";
			
 
				-	int bytes, efd, err, pmu_fd, prog_fd;
			
 
				+	int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
			
 
				 	struct perf_event_attr attr = {};
			
 
				 	__u32 key, previous_key, val, duration = 0;
			
 
				 	struct bpf_object *obj;
			
@@ -1137,6 +1193,11 @@ static void test_stacktrace_build_id(void)
 
				 		  err, errno))
			
 
				 		goto disable_pmu;
			
 
				 
			
 
				+	stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
			
 
				+	if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
			
 
				+		  "err %d errno %d\n", err, errno))
			
 
				+		goto disable_pmu;
			
 
				+
			
 
				 	assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
			
 
				 	       == 0);
			
 
				 	assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0);
			
@@ -1188,8 +1249,15 @@ static void test_stacktrace_build_id(void)
 
				 		previous_key = key;
			
 
				 	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
			
 
				 
			
 
				-	CHECK(build_id_matches < 1, "build id match",
			
 
				-	      "Didn't find expected build ID from the map");
			
 
				+	if (CHECK(build_id_matches < 1, "build id match",
			
 
				+		  "Didn't find expected build ID from the map"))
			
 
				+		goto disable_pmu;
			
 
				+
			
 
				+	stack_trace_len = PERF_MAX_STACK_DEPTH
			
 
				+		* sizeof(struct bpf_stack_build_id);
			
 
				+	err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
			
 
				+	CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
			
 
				+	      "err %d errno %d\n", err, errno);
			
 
				 
			
 
				 disable_pmu:
			
 
				 	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
			
@@ -1204,8 +1272,147 @@ out:
 
				 	return;
			
 
				 }
			
 
				 
			
 
				+#define MAX_CNT_RAWTP	10ull
			
 
				+#define MAX_STACK_RAWTP	100
			
 
				+struct get_stack_trace_t {
			
 
				+	int pid;
			
 
				+	int kern_stack_size;
			
 
				+	int user_stack_size;
			
 
				+	int user_stack_buildid_size;
			
 
				+	__u64 kern_stack[MAX_STACK_RAWTP];
			
 
				+	__u64 user_stack[MAX_STACK_RAWTP];
			
 
				+	struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
			
 
				+};
			
 
				+
			
 
				+static int get_stack_print_output(void *data, int size)
			
 
				+{
			
 
				+	bool good_kern_stack = false, good_user_stack = false;
			
 
				+	const char *nonjit_func = "___bpf_prog_run";
			
 
				+	struct get_stack_trace_t *e = data;
			
 
				+	int i, num_stack;
			
 
				+	static __u64 cnt;
			
 
				+	struct ksym *ks;
			
 
				+
			
 
				+	cnt++;
			
 
				+
			
 
				+	if (size < sizeof(struct get_stack_trace_t)) {
			
 
				+		__u64 *raw_data = data;
			
 
				+		bool found = false;
			
 
				+
			
 
				+		num_stack = size / sizeof(__u64);
			
 
				+		/* If jit is enabled, we do not have a good way to
			
 
				+		 * verify the sanity of the kernel stack. So we
			
 
				+		 * just assume it is good if the stack is not empty.
			
 
				+		 * This could be improved in the future.
			
 
				+		 */
			
 
				+		if (jit_enabled) {
			
 
				+			found = num_stack > 0;
			
 
				+		} else {
			
 
				+			for (i = 0; i < num_stack; i++) {
			
 
				+				ks = ksym_search(raw_data[i]);
			
 
				+				if (strcmp(ks->name, nonjit_func) == 0) {
			
 
				+					found = true;
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		if (found) {
			
 
				+			good_kern_stack = true;
			
 
				+			good_user_stack = true;
			
 
				+		}
			
 
				+	} else {
			
 
				+		num_stack = e->kern_stack_size / sizeof(__u64);
			
 
				+		if (jit_enabled) {
			
 
				+			good_kern_stack = num_stack > 0;
			
 
				+		} else {
			
 
				+			for (i = 0; i < num_stack; i++) {
			
 
				+				ks = ksym_search(e->kern_stack[i]);
			
 
				+				if (strcmp(ks->name, nonjit_func) == 0) {
			
 
				+					good_kern_stack = true;
			
 
				+					break;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0)
			
 
				+			good_user_stack = true;
			
 
				+	}
			
 
				+	if (!good_kern_stack || !good_user_stack)
			
 
				+		return PERF_EVENT_ERROR;
			
 
				+
			
 
				+	if (cnt == MAX_CNT_RAWTP)
			
 
				+		return PERF_EVENT_DONE;
			
 
				+
			
 
				+	return PERF_EVENT_CONT;
			
 
				+}
			
 
				+
			
 
				+static void test_get_stack_raw_tp(void)
			
 
				+{
			
 
				+	const char *file = "./test_get_stack_rawtp.o";
			
 
				+	int i, efd, err, prog_fd, pmu_fd, perfmap_fd;
			
 
				+	struct perf_event_attr attr = {};
			
 
				+	struct timespec tv = {0, 10};
			
 
				+	__u32 key = 0, duration = 0;
			
 
				+	struct bpf_object *obj;
			
 
				+
			
 
				+	err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
			
 
				+	if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
			
 
				+		return;
			
 
				+
			
 
				+	efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
			
 
				+	if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
			
 
				+		goto close_prog;
			
 
				+
			
 
				+	perfmap_fd = bpf_find_map(__func__, obj, "perfmap");
			
 
				+	if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
			
 
				+		  perfmap_fd, errno))
			
 
				+		goto close_prog;
			
 
				+
			
 
				+	err = load_kallsyms();
			
 
				+	if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno))
			
 
				+		goto close_prog;
			
 
				+
			
 
				+	attr.sample_type = PERF_SAMPLE_RAW;
			
 
				+	attr.type = PERF_TYPE_SOFTWARE;
			
 
				+	attr.config = PERF_COUNT_SW_BPF_OUTPUT;
			
 
				+	pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/,
			
 
				+			 -1/*group_fd*/, 0);
			
 
				+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
			
 
				+		  errno))
			
 
				+		goto close_prog;
			
 
				+
			
 
				+	err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY);
			
 
				+	if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err,
			
 
				+		  errno))
			
 
				+		goto close_prog;
			
 
				+
			
 
				+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
			
 
				+	if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n",
			
 
				+		  err, errno))
			
 
				+		goto close_prog;
			
 
				+
			
 
				+	err = perf_event_mmap(pmu_fd);
			
 
				+	if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno))
			
 
				+		goto close_prog;
			
 
				+
			
 
				+	/* trigger some syscall action */
			
 
				+	for (i = 0; i < MAX_CNT_RAWTP; i++)
			
 
				+		nanosleep(&tv, NULL);
			
 
				+
			
 
				+	err = perf_event_poller(pmu_fd, get_stack_print_output);
			
 
				+	if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno))
			
 
				+		goto close_prog;
			
 
				+
			
 
				+	goto close_prog_noerr;
			
 
				+close_prog:
			
 
				+	error_cnt++;
			
 
				+close_prog_noerr:
			
 
				+	bpf_object__close(obj);
			
 
				+}
			
 
				+
			
 
				 int main(void)
			
 
				 {
			
 
				+	jit_enabled = is_jit_enabled();
			
 
				+
			
 
				 	test_pkt_access();
			
 
				 	test_xdp();
			
 
				 	test_xdp_adjust_tail();
			
@@ -1219,6 +1426,7 @@ int main(void)
 
				 	test_stacktrace_map();
			
 
				 	test_stacktrace_build_id();
			
 
				 	test_stacktrace_map_raw_tp();
			
 
				+	test_get_stack_raw_tp();
			
 
				 
			
 
				 	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
			
 
				 	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
			
--- a/tools/testing/selftests/bpf/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
@@ -19,7 +19,7 @@ struct bpf_map_def SEC("maps") stackid_hmap = {
 
				 	.type = BPF_MAP_TYPE_HASH,
			
 
				 	.key_size = sizeof(__u32),
			
 
				 	.value_size = sizeof(__u32),
			
 
				-	.max_entries = 10000,
			
 
				+	.max_entries = 16384,
			
 
				 };
			
 
				 
			
 
				 struct bpf_map_def SEC("maps") stackmap = {
			
@@ -31,6 +31,14 @@ struct bpf_map_def SEC("maps") stackmap = {
 
				 	.map_flags = BPF_F_STACK_BUILD_ID,
			
 
				 };
			
 
				 
			
 
				+struct bpf_map_def SEC("maps") stack_amap = {
			
 
				+	.type = BPF_MAP_TYPE_ARRAY,
			
 
				+	.key_size = sizeof(__u32),
			
 
				+	.value_size = sizeof(struct bpf_stack_build_id)
			
 
				+		* PERF_MAX_STACK_DEPTH,
			
 
				+	.max_entries = 128,
			
 
				+};
			
 
				+
			
 
				 /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
			
 
				 struct random_urandom_args {
			
 
				 	unsigned long long pad;
			
@@ -42,7 +50,10 @@ struct random_urandom_args {
 
				 SEC("tracepoint/random/urandom_read")
			
 
				 int oncpu(struct random_urandom_args *args)
			
 
				 {
			
 
				+	__u32 max_len = sizeof(struct bpf_stack_build_id)
			
 
				+			* PERF_MAX_STACK_DEPTH;
			
 
				 	__u32 key = 0, val = 0, *value_p;
			
 
				+	void *stack_p;
			
 
				 
			
 
				 	value_p = bpf_map_lookup_elem(&control_map, &key);
			
 
				 	if (value_p && *value_p)
			
@@ -50,8 +61,13 @@ int oncpu(struct random_urandom_args *args)
 
				 
			
 
				 	/* The size of stackmap and stackid_hmap should be the same */
			
 
				 	key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
			
 
				-	if ((int)key >= 0)
			
 
				+	if ((int)key >= 0) {
			
 
				 		bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
			
 
				+		stack_p = bpf_map_lookup_elem(&stack_amap, &key);
			
 
				+		if (stack_p)
			
 
				+			bpf_get_stack(args, stack_p, max_len,
			
 
				+				      BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
			
 
				+	}
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/tools/testing/selftests/bpf/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/test_stacktrace_map.c
@@ -19,14 +19,21 @@ struct bpf_map_def SEC("maps") stackid_hmap = {
 
				 	.type = BPF_MAP_TYPE_HASH,
			
 
				 	.key_size = sizeof(__u32),
			
 
				 	.value_size = sizeof(__u32),
			
 
				-	.max_entries = 10000,
			
 
				+	.max_entries = 16384,
			
 
				 };
			
 
				 
			
 
				 struct bpf_map_def SEC("maps") stackmap = {
			
 
				 	.type = BPF_MAP_TYPE_STACK_TRACE,
			
 
				 	.key_size = sizeof(__u32),
			
 
				 	.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
			
 
				-	.max_entries = 10000,
			
 
				+	.max_entries = 16384,
			
 
				+};
			
 
				+
			
 
				+struct bpf_map_def SEC("maps") stack_amap = {
			
 
				+	.type = BPF_MAP_TYPE_ARRAY,
			
 
				+	.key_size = sizeof(__u32),
			
 
				+	.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
			
 
				+	.max_entries = 16384,
			
 
				 };
			
 
				 
			
 
				 /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
			
@@ -44,7 +51,9 @@ struct sched_switch_args {
 
				 SEC("tracepoint/sched/sched_switch")
			
 
				 int oncpu(struct sched_switch_args *ctx)
			
 
				 {
			
 
				+	__u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
			
 
				 	__u32 key = 0, val = 0, *value_p;
			
 
				+	void *stack_p;
			
 
				 
			
 
				 	value_p = bpf_map_lookup_elem(&control_map, &key);
			
 
				 	if (value_p && *value_p)
			
@@ -52,8 +61,12 @@ int oncpu(struct sched_switch_args *ctx)
 
				 
			
 
				 	/* The size of stackmap and stackid_hmap should be the same */
			
 
				 	key = bpf_get_stackid(ctx, &stackmap, 0);
			
 
				-	if ((int)key >= 0)
			
 
				+	if ((int)key >= 0) {
			
 
				 		bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
			
 
				+		stack_p = bpf_map_lookup_elem(&stack_amap, &key);
			
 
				+		if (stack_p)
			
 
				+			bpf_get_stack(ctx, stack_p, max_len, 0);
			
 
				+	}
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -11680,6 +11680,51 @@ static struct bpf_test tests[] = {
 
				 		.errstr = "BPF_XADD stores into R2 packet",
			
 
				 		.prog_type = BPF_PROG_TYPE_XDP,
			
 
				 	},
			
 
				+	{
			
 
				+		"bpf_get_stack return R0 within range",
			
 
				+		.insns = {
			
 
				+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
			
 
				+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
			
 
				+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
			
 
				+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
			
 
				+			BPF_LD_MAP_FD(BPF_REG_1, 0),
			
 
				+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
			
 
				+				     BPF_FUNC_map_lookup_elem),
			
 
				+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28),
			
 
				+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
			
 
				+			BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)),
			
 
				+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
			
 
				+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
			
 
				+			BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)),
			
 
				+			BPF_MOV64_IMM(BPF_REG_4, 256),
			
 
				+			BPF_EMIT_CALL(BPF_FUNC_get_stack),
			
 
				+			BPF_MOV64_IMM(BPF_REG_1, 0),
			
 
				+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
			
 
				+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32),
			
 
				+			BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32),
			
 
				+			BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16),
			
 
				+			BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8),
			
 
				+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
			
 
				+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8),
			
 
				+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
			
 
				+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
			
 
				+			BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32),
			
 
				+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
			
 
				+			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
			
 
				+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
			
 
				+			BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)),
			
 
				+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5),
			
 
				+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4),
			
 
				+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
			
 
				+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_9),
			
 
				+			BPF_MOV64_IMM(BPF_REG_4, 0),
			
 
				+			BPF_EMIT_CALL(BPF_FUNC_get_stack),
			
 
				+			BPF_EXIT_INSN(),
			
 
				+		},
			
 
				+		.fixup_map2 = { 4 },
			
 
				+		.result = ACCEPT,
			
 
				+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
			
 
				+	},
			
 
				 };
			
 
				 
			
 
				 static int probe_filter_length(const struct bpf_insn *fp)
			
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -0,0 +1,180 @@
 
				+// SPDX-License-Identifier: GPL-2.0
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+#include <assert.h>
			
 
				+#include <errno.h>
			
 
				+#include <poll.h>
			
 
				+#include <unistd.h>
			
 
				+#include <linux/perf_event.h>
			
 
				+#include <sys/mman.h>
			
 
				+#include "trace_helpers.h"
			
 
				+
			
 
				+#define MAX_SYMS 300000
			
 
				+static struct ksym syms[MAX_SYMS];
			
 
				+static int sym_cnt;
			
 
				+
			
 
				+static int ksym_cmp(const void *p1, const void *p2)
			
 
				+{
			
 
				+	return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
			
 
				+}
			
 
				+
			
 
				+int load_kallsyms(void)
			
 
				+{
			
 
				+	FILE *f = fopen("/proc/kallsyms", "r");
			
 
				+	char func[256], buf[256];
			
 
				+	char symbol;
			
 
				+	void *addr;
			
 
				+	int i = 0;
			
 
				+
			
 
				+	if (!f)
			
 
				+		return -ENOENT;
			
 
				+
			
 
				+	while (!feof(f)) {
			
 
				+		if (!fgets(buf, sizeof(buf), f))
			
 
				+			break;
			
 
				+		if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
			
 
				+			break;
			
 
				+		if (!addr)
			
 
				+			continue;
			
 
				+		syms[i].addr = (long) addr;
			
 
				+		syms[i].name = strdup(func);
			
 
				+		i++;
			
 
				+	}
			
 
				+	sym_cnt = i;
			
 
				+	qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+struct ksym *ksym_search(long key)
			
 
				+{
			
 
				+	int start = 0, end = sym_cnt;
			
 
				+	int result;
			
 
				+
			
 
				+	while (start < end) {
			
 
				+		size_t mid = start + (end - start) / 2;
			
 
				+
			
 
				+		result = key - syms[mid].addr;
			
 
				+		if (result < 0)
			
 
				+			end = mid;
			
 
				+		else if (result > 0)
			
 
				+			start = mid + 1;
			
 
				+		else
			
 
				+			return &syms[mid];
			
 
				+	}
			
 
				+
			
 
				+	if (start >= 1 && syms[start - 1].addr < key &&
			
 
				+	    key < syms[start].addr)
			
 
				+		/* valid ksym */
			
 
				+		return &syms[start - 1];
			
 
				+
			
 
				+	/* out of range. return _stext */
			
 
				+	return &syms[0];
			
 
				+}
			
 
				+
			
 
				+static int page_size;
			
 
				+static int page_cnt = 8;
			
 
				+static volatile struct perf_event_mmap_page *header;
			
 
				+
			
 
				+int perf_event_mmap(int fd)
			
 
				+{
			
 
				+	void *base;
			
 
				+	int mmap_size;
			
 
				+
			
 
				+	page_size = getpagesize();
			
 
				+	mmap_size = page_size * (page_cnt + 1);
			
 
				+
			
 
				+	base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
			
 
				+	if (base == MAP_FAILED) {
			
 
				+		printf("mmap err\n");
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				+	header = base;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static int perf_event_poll(int fd)
			
 
				+{
			
 
				+	struct pollfd pfd = { .fd = fd, .events = POLLIN };
			
 
				+
			
 
				+	return poll(&pfd, 1, 1000);
			
 
				+}
			
 
				+
			
 
				+struct perf_event_sample {
			
 
				+	struct perf_event_header header;
			
 
				+	__u32 size;
			
 
				+	char data[];
			
 
				+};
			
 
				+
			
 
				+static int perf_event_read(perf_event_print_fn fn)
			
 
				+{
			
 
				+	__u64 data_tail = header->data_tail;
			
 
				+	__u64 data_head = header->data_head;
			
 
				+	__u64 buffer_size = page_cnt * page_size;
			
 
				+	void *base, *begin, *end;
			
 
				+	char buf[256];
			
 
				+	int ret;
			
 
				+
			
 
				+	asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
			
 
				+	if (data_head == data_tail)
			
 
				+		return PERF_EVENT_CONT;
			
 
				+
			
 
				+	base = ((char *)header) + page_size;
			
 
				+
			
 
				+	begin = base + data_tail % buffer_size;
			
 
				+	end = base + data_head % buffer_size;
			
 
				+
			
 
				+	while (begin != end) {
			
 
				+		struct perf_event_sample *e;
			
 
				+
			
 
				+		e = begin;
			
 
				+		if (begin + e->header.size > base + buffer_size) {
			
 
				+			long len = base + buffer_size - begin;
			
 
				+
			
 
				+			assert(len < e->header.size);
			
 
				+			memcpy(buf, begin, len);
			
 
				+			memcpy(buf + len, base, e->header.size - len);
			
 
				+			e = (void *) buf;
			
 
				+			begin = base + e->header.size - len;
			
 
				+		} else if (begin + e->header.size == base + buffer_size) {
			
 
				+			begin = base;
			
 
				+		} else {
			
 
				+			begin += e->header.size;
			
 
				+		}
			
 
				+
			
 
				+		if (e->header.type == PERF_RECORD_SAMPLE) {
			
 
				+			ret = fn(e->data, e->size);
			
 
				+			if (ret != PERF_EVENT_CONT)
			
 
				+				return ret;
			
 
				+		} else if (e->header.type == PERF_RECORD_LOST) {
			
 
				+			struct {
			
 
				+				struct perf_event_header header;
			
 
				+				__u64 id;
			
 
				+				__u64 lost;
			
 
				+			} *lost = (void *) e;
			
 
				+			printf("lost %lld events\n", lost->lost);
			
 
				+		} else {
			
 
				+			printf("unknown event type=%d size=%d\n",
			
 
				+			       e->header.type, e->header.size);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	__sync_synchronize(); /* smp_mb() */
			
 
				+	header->data_tail = data_head;
			
 
				+	return PERF_EVENT_CONT;
			
 
				+}
			
 
				+
			
 
				+int perf_event_poller(int fd, perf_event_print_fn output_fn)
			
 
				+{
			
 
				+	int ret;
			
 
				+
			
 
				+	for (;;) {
			
 
				+		perf_event_poll(fd);
			
 
				+		ret = perf_event_read(output_fn);
			
 
				+		if (ret != PERF_EVENT_CONT)
			
 
				+			return ret;
			
 
				+	}
			
 
				+
			
 
				+	return PERF_EVENT_DONE;
			
 
				+}
			
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -0,0 +1,23 @@
 
				+/* SPDX-License-Identifier: GPL-2.0 */
			
 
				+#ifndef __TRACE_HELPER_H
			
 
				+#define __TRACE_HELPER_H
			
 
				+
			
 
				+struct ksym {
			
 
				+	long addr;
			
 
				+	char *name;
			
 
				+};
			
 
				+
			
 
				+int load_kallsyms(void);
			
 
				+struct ksym *ksym_search(long key);
			
 
				+
			
 
				+typedef int (*perf_event_print_fn)(void *data, int size);
			
 
				+
			
 
				+/* return code for perf_event_print_fn */
			
 
				+#define PERF_EVENT_DONE		0
			
 
				+#define PERF_EVENT_ERROR	-1
			
 
				+#define PERF_EVENT_CONT		-2
			
 
				+
			
 
				+int perf_event_mmap(int fd);
			
 
				+/* return PERF_EVENT_DONE or PERF_EVENT_ERROR */
			
 
				+int perf_event_poller(int fd, perf_event_print_fn output_fn);
			
 
				+#endif