10 years ago · 29d14f0835
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1960,7 +1960,8 @@ intel_bts_constraints(struct perf_event *event)
 
				 
			
 
				 static int intel_alt_er(int idx, u64 config)
			
 
				 {
			
 
				-	int alt_idx;
			
 
				+	int alt_idx = idx;
			
 
				+
			
 
				 	if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
			
 
				 		return idx;
			
 
				 
			
@@ -2897,14 +2898,12 @@ static void intel_pmu_cpu_starting(int cpu)
 
				 		return;
			
 
				 
			
 
				 	if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
			
 
				-		void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED];
			
 
				-
			
 
				 		for_each_cpu(i, topology_sibling_cpumask(cpu)) {
			
 
				 			struct intel_shared_regs *pc;
			
 
				 
			
 
				 			pc = per_cpu(cpu_hw_events, i).shared_regs;
			
 
				 			if (pc && pc->core_id == core_id) {
			
 
				-				*onln = cpuc->shared_regs;
			
 
				+				cpuc->kfree_on_online[0] = cpuc->shared_regs;
			
 
				 				cpuc->shared_regs = pc;
			
 
				 				break;
			
 
				 			}
			
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -995,6 +995,9 @@ static int __init uncore_pci_init(void)
 
				 	case 87: /* Knights Landing */
			
 
				 		ret = knl_uncore_pci_init();
			
 
				 		break;
			
 
				+	case 94: /* SkyLake */
			
 
				+		ret = skl_uncore_pci_init();
			
 
				+		break;
			
 
				 	default:
			
 
				 		return 0;
			
 
				 	}
			
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -336,6 +336,7 @@ int snb_uncore_pci_init(void);
 
				 int ivb_uncore_pci_init(void);
			
 
				 int hsw_uncore_pci_init(void);
			
 
				 int bdw_uncore_pci_init(void);
			
 
				+int skl_uncore_pci_init(void);
			
 
				 void snb_uncore_cpu_init(void);
			
 
				 void nhm_uncore_cpu_init(void);
			
 
				 int snb_pci2phy_map_init(int devid);
			
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -8,6 +8,7 @@
 
				 #define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
			
 
				 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
			
 
				 #define PCI_DEVICE_ID_INTEL_BDW_IMC	0x1604
			
 
				+#define PCI_DEVICE_ID_INTEL_SKL_IMC	0x191f
			
 
				 
			
 
				 /* SNB event control */
			
 
				 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
			
@@ -524,6 +525,14 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = {
 
				 	{ /* end: all zeroes */ },
			
 
				 };
			
 
				 
			
 
				+static const struct pci_device_id skl_uncore_pci_ids[] = {
			
 
				+	{ /* IMC */
			
 
				+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
			
 
				+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
			
 
				+	},
			
 
				+	{ /* end: all zeroes */ },
			
 
				+};
			
 
				+
			
 
				 static struct pci_driver snb_uncore_pci_driver = {
			
 
				 	.name		= "snb_uncore",
			
 
				 	.id_table	= snb_uncore_pci_ids,
			
@@ -544,6 +553,11 @@ static struct pci_driver bdw_uncore_pci_driver = {
 
				 	.id_table	= bdw_uncore_pci_ids,
			
 
				 };
			
 
				 
			
 
				+static struct pci_driver skl_uncore_pci_driver = {
			
 
				+	.name		= "skl_uncore",
			
 
				+	.id_table	= skl_uncore_pci_ids,
			
 
				+};
			
 
				+
			
 
				 struct imc_uncore_pci_dev {
			
 
				 	__u32 pci_id;
			
 
				 	struct pci_driver *driver;
			
@@ -558,6 +572,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 
				 	IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
			
 
				 	IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
			
 
				 	IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
			
 
				+	IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
			
 
				 	{  /* end marker */ }
			
 
				 };
			
 
				 
			
@@ -610,6 +625,11 @@ int bdw_uncore_pci_init(void)
 
				 	return imc_uncore_pci_init();
			
 
				 }
			
 
				 
			
 
				+int skl_uncore_pci_init(void)
			
 
				+{
			
 
				+	return imc_uncore_pci_init();
			
 
				+}
			
 
				+
			
 
				 /* end of Sandy Bridge uncore support */
			
 
				 
			
 
				 /* Nehalem uncore support */
			
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -634,9 +634,6 @@ struct perf_event_context {
 
				 	int				nr_cgroups;	 /* cgroup evts */
			
 
				 	void				*task_ctx_data; /* pmu specific data */
			
 
				 	struct rcu_head			rcu_head;
			
 
				-
			
 
				-	struct delayed_work		orphans_remove;
			
 
				-	bool				orphans_remove_sched;
			
 
				 };
			
 
				 
			
 
				 /*
			
@@ -729,7 +726,7 @@ extern int perf_event_init_task(struct task_struct *child);
 
				 extern void perf_event_exit_task(struct task_struct *child);
			
 
				 extern void perf_event_free_task(struct task_struct *task);
			
 
				 extern void perf_event_delayed_put(struct task_struct *task);
			
 
				-extern struct perf_event *perf_event_get(unsigned int fd);
			
 
				+extern struct file *perf_event_get(unsigned int fd);
			
 
				 extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
			
 
				 extern void perf_event_print_debug(void);
			
 
				 extern void perf_pmu_disable(struct pmu *pmu);
			
@@ -1044,7 +1041,7 @@ extern void perf_swevent_put_recursion_context(int rctx);
 
				 extern u64 perf_swevent_set_period(struct perf_event *event);
			
 
				 extern void perf_event_enable(struct perf_event *event);
			
 
				 extern void perf_event_disable(struct perf_event *event);
			
 
				-extern int __perf_event_disable(void *info);
			
 
				+extern void perf_event_disable_local(struct perf_event *event);
			
 
				 extern void perf_event_task_tick(void);
			
 
				 #else /* !CONFIG_PERF_EVENTS: */
			
 
				 static inline void *
			
@@ -1070,7 +1067,7 @@ static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 
				 static inline void perf_event_exit_task(struct task_struct *child)	{ }
			
 
				 static inline void perf_event_free_task(struct task_struct *task)	{ }
			
 
				 static inline void perf_event_delayed_put(struct task_struct *task)	{ }
			
 
				-static inline struct perf_event *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
			
 
				+static inline struct file *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
			
 
				 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
			
 
				 {
			
 
				 	return ERR_PTR(-EINVAL);
			
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -291,10 +291,13 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 
				 {
			
 
				 	struct perf_event *event;
			
 
				 	const struct perf_event_attr *attr;
			
 
				+	struct file *file;
			
 
				 
			
 
				-	event = perf_event_get(fd);
			
 
				-	if (IS_ERR(event))
			
 
				-		return event;
			
 
				+	file = perf_event_get(fd);
			
 
				+	if (IS_ERR(file))
			
 
				+		return file;
			
 
				+
			
 
				+	event = file->private_data;
			
 
				 
			
 
				 	attr = perf_event_attrs(event);
			
 
				 	if (IS_ERR(attr))
			
@@ -304,24 +307,22 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 
				 		goto err;
			
 
				 
			
 
				 	if (attr->type == PERF_TYPE_RAW)
			
 
				-		return event;
			
 
				+		return file;
			
 
				 
			
 
				 	if (attr->type == PERF_TYPE_HARDWARE)
			
 
				-		return event;
			
 
				+		return file;
			
 
				 
			
 
				 	if (attr->type == PERF_TYPE_SOFTWARE &&
			
 
				 	    attr->config == PERF_COUNT_SW_BPF_OUTPUT)
			
 
				-		return event;
			
 
				+		return file;
			
 
				 err:
			
 
				-	perf_event_release_kernel(event);
			
 
				+	fput(file);
			
 
				 	return ERR_PTR(-EINVAL);
			
 
				 }
			
 
				 
			
 
				 static void perf_event_fd_array_put_ptr(void *ptr)
			
 
				 {
			
 
				-	struct perf_event *event = ptr;
			
 
				-
			
 
				-	perf_event_release_kernel(event);
			
 
				+	fput((struct file *)ptr);
			
 
				 }
			
 
				 
			
 
				 static const struct bpf_map_ops perf_event_array_ops = {
			
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -49,8 +49,6 @@
 
				 
			
 
				 #include <asm/irq_regs.h>
			
 
				 
			
 
				-static struct workqueue_struct *perf_wq;
			
 
				-
			
 
				 typedef int (*remote_function_f)(void *);
			
 
				 
			
 
				 struct remote_function_call {
			
@@ -126,44 +124,181 @@ static int cpu_function_call(int cpu, remote_function_f func, void *info)
 
				 	return data.ret;
			
 
				 }
			
 
				 
			
 
				-static void event_function_call(struct perf_event *event,
			
 
				-				int (*active)(void *),
			
 
				-				void (*inactive)(void *),
			
 
				-				void *data)
			
 
				+static inline struct perf_cpu_context *
			
 
				+__get_cpu_context(struct perf_event_context *ctx)
			
 
				+{
			
 
				+	return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
			
 
				+}
			
 
				+
			
 
				+static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
			
 
				+			  struct perf_event_context *ctx)
			
 
				 {
			
 
				+	raw_spin_lock(&cpuctx->ctx.lock);
			
 
				+	if (ctx)
			
 
				+		raw_spin_lock(&ctx->lock);
			
 
				+}
			
 
				+
			
 
				+static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
			
 
				+			    struct perf_event_context *ctx)
			
 
				+{
			
 
				+	if (ctx)
			
 
				+		raw_spin_unlock(&ctx->lock);
			
 
				+	raw_spin_unlock(&cpuctx->ctx.lock);
			
 
				+}
			
 
				+
			
 
				+#define TASK_TOMBSTONE ((void *)-1L)
			
 
				+
			
 
				+static bool is_kernel_event(struct perf_event *event)
			
 
				+{
			
 
				+	return READ_ONCE(event->owner) == TASK_TOMBSTONE;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * On task ctx scheduling...
			
 
				+ *
			
 
				+ * When !ctx->nr_events a task context will not be scheduled. This means
			
 
				+ * we can disable the scheduler hooks (for performance) without leaving
			
 
				+ * pending task ctx state.
			
 
				+ *
			
 
				+ * This however results in two special cases:
			
 
				+ *
			
 
				+ *  - removing the last event from a task ctx; this is relatively straight
			
 
				+ *    forward and is done in __perf_remove_from_context.
			
 
				+ *
			
 
				+ *  - adding the first event to a task ctx; this is tricky because we cannot
			
 
				+ *    rely on ctx->is_active and therefore cannot use event_function_call().
			
 
				+ *    See perf_install_in_context().
			
 
				+ *
			
 
				+ * This is because we need a ctx->lock serialized variable (ctx->is_active)
			
 
				+ * to reliably determine if a particular task/context is scheduled in. The
			
 
				+ * task_curr() use in task_function_call() is racy in that a remote context
			
 
				+ * switch is not a single atomic operation.
			
 
				+ *
			
 
				+ * As is, the situation is 'safe' because we set rq->curr before we do the
			
 
				+ * actual context switch. This means that task_curr() will fail early, but
			
 
				+ * we'll continue spinning on ctx->is_active until we've passed
			
 
				+ * perf_event_task_sched_out().
			
 
				+ *
			
 
				+ * Without this ctx->lock serialized variable we could have race where we find
			
 
				+ * the task (and hence the context) would not be active while in fact they are.
			
 
				+ *
			
 
				+ * If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
			
 
				+ */
			
 
				+
			
 
				+typedef void (*event_f)(struct perf_event *, struct perf_cpu_context *,
			
 
				+			struct perf_event_context *, void *);
			
 
				+
			
 
				+struct event_function_struct {
			
 
				+	struct perf_event *event;
			
 
				+	event_f func;
			
 
				+	void *data;
			
 
				+};
			
 
				+
			
 
				+static int event_function(void *info)
			
 
				+{
			
 
				+	struct event_function_struct *efs = info;
			
 
				+	struct perf_event *event = efs->event;
			
 
				 	struct perf_event_context *ctx = event->ctx;
			
 
				-	struct task_struct *task = ctx->task;
			
 
				+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
			
 
				+	struct perf_event_context *task_ctx = cpuctx->task_ctx;
			
 
				+	int ret = 0;
			
 
				+
			
 
				+	WARN_ON_ONCE(!irqs_disabled());
			
 
				+
			
 
				+	perf_ctx_lock(cpuctx, task_ctx);
			
 
				+	/*
			
 
				+	 * Since we do the IPI call without holding ctx->lock things can have
			
 
				+	 * changed, double check we hit the task we set out to hit.
			
 
				+	 */
			
 
				+	if (ctx->task) {
			
 
				+		if (ctx->task != current) {
			
 
				+			ret = -EAGAIN;
			
 
				+			goto unlock;
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * We only use event_function_call() on established contexts,
			
 
				+		 * and event_function() is only ever called when active (or
			
 
				+		 * rather, we'll have bailed in task_function_call() or the
			
 
				+		 * above ctx->task != current test), therefore we must have
			
 
				+		 * ctx->is_active here.
			
 
				+		 */
			
 
				+		WARN_ON_ONCE(!ctx->is_active);
			
 
				+		/*
			
 
				+		 * And since we have ctx->is_active, cpuctx->task_ctx must
			
 
				+		 * match.
			
 
				+		 */
			
 
				+		WARN_ON_ONCE(task_ctx != ctx);
			
 
				+	} else {
			
 
				+		WARN_ON_ONCE(&cpuctx->ctx != ctx);
			
 
				+	}
			
 
				+
			
 
				+	efs->func(event, cpuctx, ctx, efs->data);
			
 
				+unlock:
			
 
				+	perf_ctx_unlock(cpuctx, task_ctx);
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static void event_function_local(struct perf_event *event, event_f func, void *data)
			
 
				+{
			
 
				+	struct event_function_struct efs = {
			
 
				+		.event = event,
			
 
				+		.func = func,
			
 
				+		.data = data,
			
 
				+	};
			
 
				+
			
 
				+	int ret = event_function(&efs);
			
 
				+	WARN_ON_ONCE(ret);
			
 
				+}
			
 
				+
			
 
				+static void event_function_call(struct perf_event *event, event_f func, void *data)
			
 
				+{
			
 
				+	struct perf_event_context *ctx = event->ctx;
			
 
				+	struct task_struct *task = READ_ONCE(ctx->task); /* verified in event_function */
			
 
				+	struct event_function_struct efs = {
			
 
				+		.event = event,
			
 
				+		.func = func,
			
 
				+		.data = data,
			
 
				+	};
			
 
				+
			
 
				+	if (!event->parent) {
			
 
				+		/*
			
 
				+		 * If this is a !child event, we must hold ctx::mutex to
			
 
				+		 * stabilize the the event->ctx relation. See
			
 
				+		 * perf_event_ctx_lock().
			
 
				+		 */
			
 
				+		lockdep_assert_held(&ctx->mutex);
			
 
				+	}
			
 
				 
			
 
				 	if (!task) {
			
 
				-		cpu_function_call(event->cpu, active, data);
			
 
				+		cpu_function_call(event->cpu, event_function, &efs);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				 again:
			
 
				-	if (!task_function_call(task, active, data))
			
 
				+	if (task == TASK_TOMBSTONE)
			
 
				+		return;
			
 
				+
			
 
				+	if (!task_function_call(task, event_function, &efs))
			
 
				 		return;
			
 
				 
			
 
				 	raw_spin_lock_irq(&ctx->lock);
			
 
				-	if (ctx->is_active) {
			
 
				-		/*
			
 
				-		 * Reload the task pointer, it might have been changed by
			
 
				-		 * a concurrent perf_event_context_sched_out().
			
 
				-		 */
			
 
				-		task = ctx->task;
			
 
				-		raw_spin_unlock_irq(&ctx->lock);
			
 
				-		goto again;
			
 
				+	/*
			
 
				+	 * Reload the task pointer, it might have been changed by
			
 
				+	 * a concurrent perf_event_context_sched_out().
			
 
				+	 */
			
 
				+	task = ctx->task;
			
 
				+	if (task != TASK_TOMBSTONE) {
			
 
				+		if (ctx->is_active) {
			
 
				+			raw_spin_unlock_irq(&ctx->lock);
			
 
				+			goto again;
			
 
				+		}
			
 
				+		func(event, NULL, ctx, data);
			
 
				 	}
			
 
				-	inactive(data);
			
 
				 	raw_spin_unlock_irq(&ctx->lock);
			
 
				 }
			
 
				 
			
 
				-#define EVENT_OWNER_KERNEL ((void *) -1)
			
 
				-
			
 
				-static bool is_kernel_event(struct perf_event *event)
			
 
				-{
			
 
				-	return event->owner == EVENT_OWNER_KERNEL;
			
 
				-}
			
 
				-
			
 
				 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
			
 
				 		       PERF_FLAG_FD_OUTPUT  |\
			
 
				 		       PERF_FLAG_PID_CGROUP |\
			
@@ -368,28 +503,6 @@ static inline u64 perf_event_clock(struct perf_event *event)
 
				 	return event->clock();
			
 
				 }
			
 
				 
			
 
				-static inline struct perf_cpu_context *
			
 
				-__get_cpu_context(struct perf_event_context *ctx)
			
 
				-{
			
 
				-	return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
			
 
				-}
			
 
				-
			
 
				-static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
			
 
				-			  struct perf_event_context *ctx)
			
 
				-{
			
 
				-	raw_spin_lock(&cpuctx->ctx.lock);
			
 
				-	if (ctx)
			
 
				-		raw_spin_lock(&ctx->lock);
			
 
				-}
			
 
				-
			
 
				-static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
			
 
				-			    struct perf_event_context *ctx)
			
 
				-{
			
 
				-	if (ctx)
			
 
				-		raw_spin_unlock(&ctx->lock);
			
 
				-	raw_spin_unlock(&cpuctx->ctx.lock);
			
 
				-}
			
 
				-
			
 
				 #ifdef CONFIG_CGROUP_PERF
			
 
				 
			
 
				 static inline bool
			
@@ -579,13 +692,7 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
 
				 	 * we are holding the rcu lock
			
 
				 	 */
			
 
				 	cgrp1 = perf_cgroup_from_task(task, NULL);
			
 
				-
			
 
				-	/*
			
 
				-	 * next is NULL when called from perf_event_enable_on_exec()
			
 
				-	 * that will systematically cause a cgroup_switch()
			
 
				-	 */
			
 
				-	if (next)
			
 
				-		cgrp2 = perf_cgroup_from_task(next, NULL);
			
 
				+	cgrp2 = perf_cgroup_from_task(next, NULL);
			
 
				 
			
 
				 	/*
			
 
				 	 * only schedule out current cgroup events if we know
			
@@ -611,8 +718,6 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
 
				 	 * we are holding the rcu lock
			
 
				 	 */
			
 
				 	cgrp1 = perf_cgroup_from_task(task, NULL);
			
 
				-
			
 
				-	/* prev can never be NULL */
			
 
				 	cgrp2 = perf_cgroup_from_task(prev, NULL);
			
 
				 
			
 
				 	/*
			
@@ -917,7 +1022,7 @@ static void put_ctx(struct perf_event_context *ctx)
 
				 	if (atomic_dec_and_test(&ctx->refcount)) {
			
 
				 		if (ctx->parent_ctx)
			
 
				 			put_ctx(ctx->parent_ctx);
			
 
				-		if (ctx->task)
			
 
				+		if (ctx->task && ctx->task != TASK_TOMBSTONE)
			
 
				 			put_task_struct(ctx->task);
			
 
				 		call_rcu(&ctx->rcu_head, free_ctx);
			
 
				 	}
			
@@ -934,9 +1039,8 @@ static void put_ctx(struct perf_event_context *ctx)
 
				  * perf_event_context::mutex nests and those are:
			
 
				  *
			
 
				  *  - perf_event_exit_task_context()	[ child , 0 ]
			
 
				- *      __perf_event_exit_task()
			
 
				- *        sync_child_event()
			
 
				- *          put_event()			[ parent, 1 ]
			
 
				+ *      perf_event_exit_event()
			
 
				+ *        put_event()			[ parent, 1 ]
			
 
				  *
			
 
				  *  - perf_event_init_context()		[ parent, 0 ]
			
 
				  *      inherit_task_group()
			
@@ -979,8 +1083,8 @@ static void put_ctx(struct perf_event_context *ctx)
 
				  * Lock order:
			
 
				  *	task_struct::perf_event_mutex
			
 
				  *	  perf_event_context::mutex
			
 
				- *	    perf_event_context::lock
			
 
				  *	    perf_event::child_mutex;
			
 
				+ *	      perf_event_context::lock
			
 
				  *	    perf_event::mmap_mutex
			
 
				  *	    mmap_sem
			
 
				  */
			
@@ -1078,6 +1182,7 @@ static u64 primary_event_id(struct perf_event *event)
 
				 
			
 
				 /*
			
 
				  * Get the perf_event_context for a task and lock it.
			
 
				+ *
			
 
				  * This has to cope with with the fact that until it is locked,
			
 
				  * the context could get moved to another task.
			
 
				  */
			
@@ -1118,9 +1223,12 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
 
				 			goto retry;
			
 
				 		}
			
 
				 
			
 
				-		if (!atomic_inc_not_zero(&ctx->refcount)) {
			
 
				+		if (ctx->task == TASK_TOMBSTONE ||
			
 
				+		    !atomic_inc_not_zero(&ctx->refcount)) {
			
 
				 			raw_spin_unlock(&ctx->lock);
			
 
				 			ctx = NULL;
			
 
				+		} else {
			
 
				+			WARN_ON_ONCE(ctx->task != task);
			
 
				 		}
			
 
				 	}
			
 
				 	rcu_read_unlock();
			
@@ -1246,6 +1354,8 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
 
				 static void
			
 
				 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
			
 
				 {
			
 
				+	lockdep_assert_held(&ctx->lock);
			
 
				+
			
 
				 	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
			
 
				 	event->attach_state |= PERF_ATTACH_CONTEXT;
			
 
				 
			
@@ -1448,11 +1558,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 
				 
			
 
				 	if (is_cgroup_event(event)) {
			
 
				 		ctx->nr_cgroups--;
			
 
				+		/*
			
 
				+		 * Because cgroup events are always per-cpu events, this will
			
 
				+		 * always be called from the right CPU.
			
 
				+		 */
			
 
				 		cpuctx = __get_cpu_context(ctx);
			
 
				 		/*
			
 
				-		 * if there are no more cgroup events
			
 
				-		 * then cler cgrp to avoid stale pointer
			
 
				-		 * in update_cgrp_time_from_cpuctx()
			
 
				+		 * If there are no more cgroup events then clear cgrp to avoid
			
 
				+		 * stale pointer in update_cgrp_time_from_cpuctx().
			
 
				 		 */
			
 
				 		if (!ctx->nr_cgroups)
			
 
				 			cpuctx->cgrp = NULL;
			
@@ -1530,45 +1643,11 @@ static void perf_group_detach(struct perf_event *event)
 
				 		perf_event__header_size(tmp);
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * User event without the task.
			
 
				- */
			
 
				 static bool is_orphaned_event(struct perf_event *event)
			
 
				 {
			
 
				-	return event && !is_kernel_event(event) && !event->owner;
			
 
				+	return event->state == PERF_EVENT_STATE_EXIT;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Event has a parent but parent's task finished and it's
			
 
				- * alive only because of children holding refference.
			
 
				- */
			
 
				-static bool is_orphaned_child(struct perf_event *event)
			
 
				-{
			
 
				-	return is_orphaned_event(event->parent);
			
 
				-}
			
 
				-
			
 
				-static void orphans_remove_work(struct work_struct *work);
			
 
				-
			
 
				-static void schedule_orphans_remove(struct perf_event_context *ctx)
			
 
				-{
			
 
				-	if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
			
 
				-		return;
			
 
				-
			
 
				-	if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
			
 
				-		get_ctx(ctx);
			
 
				-		ctx->orphans_remove_sched = true;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int __init perf_workqueue_init(void)
			
 
				-{
			
 
				-	perf_wq = create_singlethread_workqueue("perf");
			
 
				-	WARN(!perf_wq, "failed to create perf workqueue\n");
			
 
				-	return perf_wq ? 0 : -1;
			
 
				-}
			
 
				-
			
 
				-core_initcall(perf_workqueue_init);
			
 
				-
			
 
				 static inline int pmu_filter_match(struct perf_event *event)
			
 
				 {
			
 
				 	struct pmu *pmu = event->pmu;
			
@@ -1629,9 +1708,6 @@ event_sched_out(struct perf_event *event,
 
				 	if (event->attr.exclusive || !cpuctx->active_oncpu)
			
 
				 		cpuctx->exclusive = 0;
			
 
				 
			
 
				-	if (is_orphaned_child(event))
			
 
				-		schedule_orphans_remove(ctx);
			
 
				-
			
 
				 	perf_pmu_enable(event->pmu);
			
 
				 }
			
 
				 
			
@@ -1655,21 +1731,8 @@ group_sched_out(struct perf_event *group_event,
 
				 		cpuctx->exclusive = 0;
			
 
				 }
			
 
				 
			
 
				-struct remove_event {
			
 
				-	struct perf_event *event;
			
 
				-	bool detach_group;
			
 
				-};
			
 
				-
			
 
				-static void ___perf_remove_from_context(void *info)
			
 
				-{
			
 
				-	struct remove_event *re = info;
			
 
				-	struct perf_event *event = re->event;
			
 
				-	struct perf_event_context *ctx = event->ctx;
			
 
				-
			
 
				-	if (re->detach_group)
			
 
				-		perf_group_detach(event);
			
 
				-	list_del_event(event, ctx);
			
 
				-}
			
 
				+#define DETACH_GROUP	0x01UL
			
 
				+#define DETACH_STATE	0x02UL
			
 
				 
			
 
				 /*
			
 
				  * Cross CPU call to remove a performance event
			
@@ -1677,33 +1740,33 @@ static void ___perf_remove_from_context(void *info)
 
				  * We disable the event on the hardware level first. After that we
			
 
				  * remove it from the context list.
			
 
				  */
			
 
				-static int __perf_remove_from_context(void *info)
			
 
				+static void
			
 
				+__perf_remove_from_context(struct perf_event *event,
			
 
				+			   struct perf_cpu_context *cpuctx,
			
 
				+			   struct perf_event_context *ctx,
			
 
				+			   void *info)
			
 
				 {
			
 
				-	struct remove_event *re = info;
			
 
				-	struct perf_event *event = re->event;
			
 
				-	struct perf_event_context *ctx = event->ctx;
			
 
				-	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
			
 
				+	unsigned long flags = (unsigned long)info;
			
 
				 
			
 
				-	raw_spin_lock(&ctx->lock);
			
 
				 	event_sched_out(event, cpuctx, ctx);
			
 
				-	if (re->detach_group)
			
 
				+	if (flags & DETACH_GROUP)
			
 
				 		perf_group_detach(event);
			
 
				 	list_del_event(event, ctx);
			
 
				-	if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
			
 
				+	if (flags & DETACH_STATE)
			
 
				+		event->state = PERF_EVENT_STATE_EXIT;
			
 
				+
			
 
				+	if (!ctx->nr_events && ctx->is_active) {
			
 
				 		ctx->is_active = 0;
			
 
				-		cpuctx->task_ctx = NULL;
			
 
				+		if (ctx->task) {
			
 
				+			WARN_ON_ONCE(cpuctx->task_ctx != ctx);
			
 
				+			cpuctx->task_ctx = NULL;
			
 
				+		}
			
 
				 	}
			
 
				-	raw_spin_unlock(&ctx->lock);
			
 
				-
			
 
				-	return 0;
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * Remove the event from a task's (or a CPU's) list of events.
			
 
				  *
			
 
				- * CPU events are removed with a smp call. For task events we only
			
 
				- * call when the task is on a CPU.
			
 
				- *
			
 
				  * If event->ctx is a cloned context, callers must make sure that
			
 
				  * every task struct that event->ctx->task could possibly point to
			
 
				  * remains valid.  This is OK when called from perf_release since
			
@@ -1711,73 +1774,32 @@ static int __perf_remove_from_context(void *info)
 
				  * When called from perf_event_exit_task, it's OK because the
			
 
				  * context has been detached from its task.
			
 
				  */
			
 
				-static void perf_remove_from_context(struct perf_event *event, bool detach_group)
			
 
				+static void perf_remove_from_context(struct perf_event *event, unsigned long flags)
			
 
				 {
			
 
				-	struct perf_event_context *ctx = event->ctx;
			
 
				-	struct remove_event re = {
			
 
				-		.event = event,
			
 
				-		.detach_group = detach_group,
			
 
				-	};
			
 
				+	lockdep_assert_held(&event->ctx->mutex);
			
 
				 
			
 
				-	lockdep_assert_held(&ctx->mutex);
			
 
				-
			
 
				-	event_function_call(event, __perf_remove_from_context,
			
 
				-			    ___perf_remove_from_context, &re);
			
 
				+	event_function_call(event, __perf_remove_from_context, (void *)flags);
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				  * Cross CPU call to disable a performance event
			
 
				  */
			
 
				-int __perf_event_disable(void *info)
			
 
				-{
			
 
				-	struct perf_event *event = info;
			
 
				-	struct perf_event_context *ctx = event->ctx;
			
 
				-	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
			
 
				-
			
 
				-	/*
			
 
				-	 * If this is a per-task event, need to check whether this
			
 
				-	 * event's task is the current task on this cpu.
			
 
				-	 *
			
 
				-	 * Can trigger due to concurrent perf_event_context_sched_out()
			
 
				-	 * flipping contexts around.
			
 
				-	 */
			
 
				-	if (ctx->task && cpuctx->task_ctx != ctx)
			
 
				-		return -EINVAL;
			
 
				-
			
 
				-	raw_spin_lock(&ctx->lock);
			
 
				-
			
 
				-	/*
			
 
				-	 * If the event is on, turn it off.
			
 
				-	 * If it is in error state, leave it in error state.
			
 
				-	 */
			
 
				-	if (event->state >= PERF_EVENT_STATE_INACTIVE) {
			
 
				-		update_context_time(ctx);
			
 
				-		update_cgrp_time_from_event(event);
			
 
				-		update_group_times(event);
			
 
				-		if (event == event->group_leader)
			
 
				-			group_sched_out(event, cpuctx, ctx);
			
 
				-		else
			
 
				-			event_sched_out(event, cpuctx, ctx);
			
 
				-		event->state = PERF_EVENT_STATE_OFF;
			
 
				-	}
			
 
				-
			
 
				-	raw_spin_unlock(&ctx->lock);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-void ___perf_event_disable(void *info)
			
 
				+static void __perf_event_disable(struct perf_event *event,
			
 
				+				 struct perf_cpu_context *cpuctx,
			
 
				+				 struct perf_event_context *ctx,
			
 
				+				 void *info)
			
 
				 {
			
 
				-	struct perf_event *event = info;
			
 
				+	if (event->state < PERF_EVENT_STATE_INACTIVE)
			
 
				+		return;
			
 
				 
			
 
				-	/*
			
 
				-	 * Since we have the lock this context can't be scheduled
			
 
				-	 * in, so we can change the state safely.
			
 
				-	 */
			
 
				-	if (event->state == PERF_EVENT_STATE_INACTIVE) {
			
 
				-		update_group_times(event);
			
 
				-		event->state = PERF_EVENT_STATE_OFF;
			
 
				-	}
			
 
				+	update_context_time(ctx);
			
 
				+	update_cgrp_time_from_event(event);
			
 
				+	update_group_times(event);
			
 
				+	if (event == event->group_leader)
			
 
				+		group_sched_out(event, cpuctx, ctx);
			
 
				+	else
			
 
				+		event_sched_out(event, cpuctx, ctx);
			
 
				+	event->state = PERF_EVENT_STATE_OFF;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1788,7 +1810,8 @@ void ___perf_event_disable(void *info)
 
				  * remains valid.  This condition is satisifed when called through
			
 
				  * perf_event_for_each_child or perf_event_for_each because they
			
 
				  * hold the top-level event's child_mutex, so any descendant that
			
 
				- * goes to exit will block in sync_child_event.
			
 
				+ * goes to exit will block in perf_event_exit_event().
			
 
				+ *
			
 
				  * When called from perf_pending_event it's OK because event->ctx
			
 
				  * is the current context on this CPU and preemption is disabled,
			
 
				  * hence we can't get into perf_event_task_sched_out for this context.
			
@@ -1804,8 +1827,12 @@ static void _perf_event_disable(struct perf_event *event)
 
				 	}
			
 
				 	raw_spin_unlock_irq(&ctx->lock);
			
 
				 
			
 
				-	event_function_call(event, __perf_event_disable,
			
 
				-			    ___perf_event_disable, event);
			
 
				+	event_function_call(event, __perf_event_disable, NULL);
			
 
				+}
			
 
				+
			
 
				+void perf_event_disable_local(struct perf_event *event)
			
 
				+{
			
 
				+	event_function_local(event, __perf_event_disable, NULL);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1918,9 +1945,6 @@ event_sched_in(struct perf_event *event,
 
				 	if (event->attr.exclusive)
			
 
				 		cpuctx->exclusive = 1;
			
 
				 
			
 
				-	if (is_orphaned_child(event))
			
 
				-		schedule_orphans_remove(ctx);
			
 
				-
			
 
				 out:
			
 
				 	perf_pmu_enable(event->pmu);
			
 
				 
			
@@ -2039,7 +2063,8 @@ static void add_event_to_ctx(struct perf_event *event,
 
				 	event->tstamp_stopped = tstamp;
			
 
				 }
			
 
				 
			
 
				-static void task_ctx_sched_out(struct perf_event_context *ctx);
			
 
				+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
			
 
				+			       struct perf_event_context *ctx);
			
 
				 static void
			
 
				 ctx_sched_in(struct perf_event_context *ctx,
			
 
				 	     struct perf_cpu_context *cpuctx,
			
@@ -2058,16 +2083,15 @@ static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
 
				 		ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
			
 
				 }
			
 
				 
			
 
				-static void ___perf_install_in_context(void *info)
			
 
				+static void ctx_resched(struct perf_cpu_context *cpuctx,
			
 
				+			struct perf_event_context *task_ctx)
			
 
				 {
			
 
				-	struct perf_event *event = info;
			
 
				-	struct perf_event_context *ctx = event->ctx;
			
 
				-
			
 
				-	/*
			
 
				-	 * Since the task isn't running, its safe to add the event, us holding
			
 
				-	 * the ctx->lock ensures the task won't get scheduled in.
			
 
				-	 */
			
 
				-	add_event_to_ctx(event, ctx);
			
 
				+	perf_pmu_disable(cpuctx->ctx.pmu);
			
 
				+	if (task_ctx)
			
 
				+		task_ctx_sched_out(cpuctx, task_ctx);
			
 
				+	cpu_ctx_sched_out(cpuctx, EVENT_ALL);
			
 
				+	perf_event_sched_in(cpuctx, task_ctx, current);
			
 
				+	perf_pmu_enable(cpuctx->ctx.pmu);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2077,55 +2101,31 @@ static void ___perf_install_in_context(void *info)
 
				  */
			
 
				 static int  __perf_install_in_context(void *info)
			
 
				 {
			
 
				-	struct perf_event *event = info;
			
 
				-	struct perf_event_context *ctx = event->ctx;
			
 
				+	struct perf_event_context *ctx = info;
			
 
				 	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
			
 
				 	struct perf_event_context *task_ctx = cpuctx->task_ctx;
			
 
				-	struct task_struct *task = current;
			
 
				-
			
 
				-	perf_ctx_lock(cpuctx, task_ctx);
			
 
				-	perf_pmu_disable(cpuctx->ctx.pmu);
			
 
				 
			
 
				-	/*
			
 
				-	 * If there was an active task_ctx schedule it out.
			
 
				-	 */
			
 
				-	if (task_ctx)
			
 
				-		task_ctx_sched_out(task_ctx);
			
 
				-
			
 
				-	/*
			
 
				-	 * If the context we're installing events in is not the
			
 
				-	 * active task_ctx, flip them.
			
 
				-	 */
			
 
				-	if (ctx->task && task_ctx != ctx) {
			
 
				-		if (task_ctx)
			
 
				-			raw_spin_unlock(&task_ctx->lock);
			
 
				+	raw_spin_lock(&cpuctx->ctx.lock);
			
 
				+	if (ctx->task) {
			
 
				 		raw_spin_lock(&ctx->lock);
			
 
				+		/*
			
 
				+		 * If we hit the 'wrong' task, we've since scheduled and
			
 
				+		 * everything should be sorted, nothing to do!
			
 
				+		 */
			
 
				 		task_ctx = ctx;
			
 
				-	}
			
 
				+		if (ctx->task != current)
			
 
				+			goto unlock;
			
 
				 
			
 
				-	if (task_ctx) {
			
 
				-		cpuctx->task_ctx = task_ctx;
			
 
				-		task = task_ctx->task;
			
 
				+		/*
			
 
				+		 * If task_ctx is set, it had better be to us.
			
 
				+		 */
			
 
				+		WARN_ON_ONCE(cpuctx->task_ctx != ctx && cpuctx->task_ctx);
			
 
				+	} else if (task_ctx) {
			
 
				+		raw_spin_lock(&task_ctx->lock);
			
 
				 	}
			
 
				 
			
 
				-	cpu_ctx_sched_out(cpuctx, EVENT_ALL);
			
 
				-
			
 
				-	update_context_time(ctx);
			
 
				-	/*
			
 
				-	 * update cgrp time only if current cgrp
			
 
				-	 * matches event->cgrp. Must be done before
			
 
				-	 * calling add_event_to_ctx()
			
 
				-	 */
			
 
				-	update_cgrp_time_from_event(event);
			
 
				-
			
 
				-	add_event_to_ctx(event, ctx);
			
 
				-
			
 
				-	/*
			
 
				-	 * Schedule everything back in
			
 
				-	 */
			
 
				-	perf_event_sched_in(cpuctx, task_ctx, task);
			
 
				-
			
 
				-	perf_pmu_enable(cpuctx->ctx.pmu);
			
 
				+	ctx_resched(cpuctx, task_ctx);
			
 
				+unlock:
			
 
				 	perf_ctx_unlock(cpuctx, task_ctx);
			
 
				 
			
 
				 	return 0;
			
@@ -2133,27 +2133,54 @@ static int  __perf_install_in_context(void *info)
 
				 
			
 
				 /*
			
 
				  * Attach a performance event to a context
			
 
				- *
			
 
				- * First we add the event to the list with the hardware enable bit
			
 
				- * in event->hw_config cleared.
			
 
				- *
			
 
				- * If the event is attached to a task which is on a CPU we use a smp
			
 
				- * call to enable it in the task context. The task might have been
			
 
				- * scheduled away, but we check this in the smp call again.
			
 
				  */
			
 
				 static void
			
 
				 perf_install_in_context(struct perf_event_context *ctx,
			
 
				 			struct perf_event *event,
			
 
				 			int cpu)
			
 
				 {
			
 
				+	struct task_struct *task = NULL;
			
 
				+
			
 
				 	lockdep_assert_held(&ctx->mutex);
			
 
				 
			
 
				 	event->ctx = ctx;
			
 
				 	if (event->cpu != -1)
			
 
				 		event->cpu = cpu;
			
 
				 
			
 
				-	event_function_call(event, __perf_install_in_context,
			
 
				-			    ___perf_install_in_context, event);
			
 
				+	/*
			
 
				+	 * Installing events is tricky because we cannot rely on ctx->is_active
			
 
				+	 * to be set in case this is the nr_events 0 -> 1 transition.
			
 
				+	 *
			
 
				+	 * So what we do is we add the event to the list here, which will allow
			
 
				+	 * a future context switch to DTRT and then send a racy IPI. If the IPI
			
 
				+	 * fails to hit the right task, this means a context switch must have
			
 
				+	 * happened and that will have taken care of business.
			
 
				+	 */
			
 
				+	raw_spin_lock_irq(&ctx->lock);
			
 
				+	task = ctx->task;
			
 
				+	/*
			
 
				+	 * Worse, we cannot even rely on the ctx actually existing anymore. If
			
 
				+	 * between find_get_context() and perf_install_in_context() the task
			
 
				+	 * went through perf_event_exit_task() its dead and we should not be
			
 
				+	 * adding new events.
			
 
				+	 */
			
 
				+	if (task == TASK_TOMBSTONE) {
			
 
				+		raw_spin_unlock_irq(&ctx->lock);
			
 
				+		return;
			
 
				+	}
			
 
				+	update_context_time(ctx);
			
 
				+	/*
			
 
				+	 * Update cgrp time only if current cgrp matches event->cgrp.
			
 
				+	 * Must be done before calling add_event_to_ctx().
			
 
				+	 */
			
 
				+	update_cgrp_time_from_event(event);
			
 
				+	add_event_to_ctx(event, ctx);
			
 
				+	raw_spin_unlock_irq(&ctx->lock);
			
 
				+
			
 
				+	if (task)
			
 
				+		task_function_call(task, __perf_install_in_context, ctx);
			
 
				+	else
			
 
				+		cpu_function_call(cpu, __perf_install_in_context, ctx);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2180,43 +2207,30 @@ static void __perf_event_mark_enabled(struct perf_event *event)
 
				 /*
			
 
				  * Cross CPU call to enable a performance event
			
 
				  */
			
 
				-static int __perf_event_enable(void *info)
			
 
				+static void __perf_event_enable(struct perf_event *event,
			
 
				+				struct perf_cpu_context *cpuctx,
			
 
				+				struct perf_event_context *ctx,
			
 
				+				void *info)
			
 
				 {
			
 
				-	struct perf_event *event = info;
			
 
				-	struct perf_event_context *ctx = event->ctx;
			
 
				 	struct perf_event *leader = event->group_leader;
			
 
				-	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
			
 
				-	int err;
			
 
				+	struct perf_event_context *task_ctx;
			
 
				 
			
 
				-	/*
			
 
				-	 * There's a time window between 'ctx->is_active' check
			
 
				-	 * in perf_event_enable function and this place having:
			
 
				-	 *   - IRQs on
			
 
				-	 *   - ctx->lock unlocked
			
 
				-	 *
			
 
				-	 * where the task could be killed and 'ctx' deactivated
			
 
				-	 * by perf_event_exit_task.
			
 
				-	 */
			
 
				-	if (!ctx->is_active)
			
 
				-		return -EINVAL;
			
 
				+	if (event->state >= PERF_EVENT_STATE_INACTIVE ||
			
 
				+	    event->state <= PERF_EVENT_STATE_ERROR)
			
 
				+		return;
			
 
				 
			
 
				-	raw_spin_lock(&ctx->lock);
			
 
				 	update_context_time(ctx);
			
 
				-
			
 
				-	if (event->state >= PERF_EVENT_STATE_INACTIVE)
			
 
				-		goto unlock;
			
 
				-
			
 
				-	/*
			
 
				-	 * set current task's cgroup time reference point
			
 
				-	 */
			
 
				-	perf_cgroup_set_timestamp(current, ctx);
			
 
				-
			
 
				 	__perf_event_mark_enabled(event);
			
 
				 
			
 
				+	if (!ctx->is_active)
			
 
				+		return;
			
 
				+
			
 
				 	if (!event_filter_match(event)) {
			
 
				-		if (is_cgroup_event(event))
			
 
				+		if (is_cgroup_event(event)) {
			
 
				+			perf_cgroup_set_timestamp(current, ctx); // XXX ?
			
 
				 			perf_cgroup_defer_enabled(event);
			
 
				-		goto unlock;
			
 
				+		}
			
 
				+		return;
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -2224,41 +2238,13 @@ static int __perf_event_enable(void *info)
 
				 	 * then don't put it on unless the group is on.
			
 
				 	 */
			
 
				 	if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
			
 
				-		goto unlock;
			
 
				-
			
 
				-	if (!group_can_go_on(event, cpuctx, 1)) {
			
 
				-		err = -EEXIST;
			
 
				-	} else {
			
 
				-		if (event == leader)
			
 
				-			err = group_sched_in(event, cpuctx, ctx);
			
 
				-		else
			
 
				-			err = event_sched_in(event, cpuctx, ctx);
			
 
				-	}
			
 
				-
			
 
				-	if (err) {
			
 
				-		/*
			
 
				-		 * If this event can't go on and it's part of a
			
 
				-		 * group, then the whole group has to come off.
			
 
				-		 */
			
 
				-		if (leader != event) {
			
 
				-			group_sched_out(leader, cpuctx, ctx);
			
 
				-			perf_mux_hrtimer_restart(cpuctx);
			
 
				-		}
			
 
				-		if (leader->attr.pinned) {
			
 
				-			update_group_times(leader);
			
 
				-			leader->state = PERF_EVENT_STATE_ERROR;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-unlock:
			
 
				-	raw_spin_unlock(&ctx->lock);
			
 
				+		return;
			
 
				 
			
 
				-	return 0;
			
 
				-}
			
 
				+	task_ctx = cpuctx->task_ctx;
			
 
				+	if (ctx->task)
			
 
				+		WARN_ON_ONCE(task_ctx != ctx);
			
 
				 
			
 
				-void ___perf_event_enable(void *info)
			
 
				-{
			
 
				-	__perf_event_mark_enabled((struct perf_event *)info);
			
 
				+	ctx_resched(cpuctx, task_ctx);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2275,7 +2261,8 @@ static void _perf_event_enable(struct perf_event *event)
 
				 	struct perf_event_context *ctx = event->ctx;
			
 
				 
			
 
				 	raw_spin_lock_irq(&ctx->lock);
			
 
				-	if (event->state >= PERF_EVENT_STATE_INACTIVE) {
			
 
				+	if (event->state >= PERF_EVENT_STATE_INACTIVE ||
			
 
				+	    event->state <  PERF_EVENT_STATE_ERROR) {
			
 
				 		raw_spin_unlock_irq(&ctx->lock);
			
 
				 		return;
			
 
				 	}
			
@@ -2291,8 +2278,7 @@ static void _perf_event_enable(struct perf_event *event)
 
				 		event->state = PERF_EVENT_STATE_OFF;
			
 
				 	raw_spin_unlock_irq(&ctx->lock);
			
 
				 
			
 
				-	event_function_call(event, __perf_event_enable,
			
 
				-			    ___perf_event_enable, event);
			
 
				+	event_function_call(event, __perf_event_enable, NULL);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2342,12 +2328,27 @@ static void ctx_sched_out(struct perf_event_context *ctx,
 
				 			  struct perf_cpu_context *cpuctx,
			
 
				 			  enum event_type_t event_type)
			
 
				 {
			
 
				-	struct perf_event *event;
			
 
				 	int is_active = ctx->is_active;
			
 
				+	struct perf_event *event;
			
 
				 
			
 
				-	ctx->is_active &= ~event_type;
			
 
				-	if (likely(!ctx->nr_events))
			
 
				+	lockdep_assert_held(&ctx->lock);
			
 
				+
			
 
				+	if (likely(!ctx->nr_events)) {
			
 
				+		/*
			
 
				+		 * See __perf_remove_from_context().
			
 
				+		 */
			
 
				+		WARN_ON_ONCE(ctx->is_active);
			
 
				+		if (ctx->task)
			
 
				+			WARN_ON_ONCE(cpuctx->task_ctx);
			
 
				 		return;
			
 
				+	}
			
 
				+
			
 
				+	ctx->is_active &= ~event_type;
			
 
				+	if (ctx->task) {
			
 
				+		WARN_ON_ONCE(cpuctx->task_ctx != ctx);
			
 
				+		if (!ctx->is_active)
			
 
				+			cpuctx->task_ctx = NULL;
			
 
				+	}
			
 
				 
			
 
				 	update_context_time(ctx);
			
 
				 	update_cgrp_time_from_cpuctx(cpuctx);
			
@@ -2518,17 +2519,21 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 
				 		raw_spin_lock(&ctx->lock);
			
 
				 		raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
			
 
				 		if (context_equiv(ctx, next_ctx)) {
			
 
				-			/*
			
 
				-			 * XXX do we need a memory barrier of sorts
			
 
				-			 * wrt to rcu_dereference() of perf_event_ctxp
			
 
				-			 */
			
 
				-			task->perf_event_ctxp[ctxn] = next_ctx;
			
 
				-			next->perf_event_ctxp[ctxn] = ctx;
			
 
				-			ctx->task = next;
			
 
				-			next_ctx->task = task;
			
 
				+			WRITE_ONCE(ctx->task, next);
			
 
				+			WRITE_ONCE(next_ctx->task, task);
			
 
				 
			
 
				 			swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
			
 
				 
			
 
				+			/*
			
 
				+			 * RCU_INIT_POINTER here is safe because we've not
			
 
				+			 * modified the ctx and the above modification of
			
 
				+			 * ctx->task and ctx->task_ctx_data are immaterial
			
 
				+			 * since those values are always verified under
			
 
				+			 * ctx->lock which we're now holding.
			
 
				+			 */
			
 
				+			RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], next_ctx);
			
 
				+			RCU_INIT_POINTER(next->perf_event_ctxp[ctxn], ctx);
			
 
				+
			
 
				 			do_switch = 0;
			
 
				 
			
 
				 			perf_event_sync_stat(ctx, next_ctx);
			
@@ -2541,8 +2546,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
 
				 
			
 
				 	if (do_switch) {
			
 
				 		raw_spin_lock(&ctx->lock);
			
 
				-		ctx_sched_out(ctx, cpuctx, EVENT_ALL);
			
 
				-		cpuctx->task_ctx = NULL;
			
 
				+		task_ctx_sched_out(cpuctx, ctx);
			
 
				 		raw_spin_unlock(&ctx->lock);
			
 
				 	}
			
 
				 }
			
@@ -2637,10 +2641,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
 
				 		perf_cgroup_sched_out(task, next);
			
 
				 }
			
 
				 
			
 
				-static void task_ctx_sched_out(struct perf_event_context *ctx)
			
 
				+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
			
 
				+			       struct perf_event_context *ctx)
			
 
				 {
			
 
				-	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
			
 
				-
			
 
				 	if (!cpuctx->task_ctx)
			
 
				 		return;
			
 
				 
			
@@ -2648,7 +2651,6 @@ static void task_ctx_sched_out(struct perf_event_context *ctx)
 
				 		return;
			
 
				 
			
 
				 	ctx_sched_out(ctx, cpuctx, EVENT_ALL);
			
 
				-	cpuctx->task_ctx = NULL;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2725,13 +2727,22 @@ ctx_sched_in(struct perf_event_context *ctx,
 
				 	     enum event_type_t event_type,
			
 
				 	     struct task_struct *task)
			
 
				 {
			
 
				-	u64 now;
			
 
				 	int is_active = ctx->is_active;
			
 
				+	u64 now;
			
 
				+
			
 
				+	lockdep_assert_held(&ctx->lock);
			
 
				 
			
 
				-	ctx->is_active |= event_type;
			
 
				 	if (likely(!ctx->nr_events))
			
 
				 		return;
			
 
				 
			
 
				+	ctx->is_active |= event_type;
			
 
				+	if (ctx->task) {
			
 
				+		if (!is_active)
			
 
				+			cpuctx->task_ctx = ctx;
			
 
				+		else
			
 
				+			WARN_ON_ONCE(cpuctx->task_ctx != ctx);
			
 
				+	}
			
 
				+
			
 
				 	now = perf_clock();
			
 
				 	ctx->timestamp = now;
			
 
				 	perf_cgroup_set_timestamp(task, ctx);
			
@@ -2773,12 +2784,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
 
				 	 * cpu flexible, task flexible.
			
 
				 	 */
			
 
				 	cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
			
 
				-
			
 
				-	if (ctx->nr_events)
			
 
				-		cpuctx->task_ctx = ctx;
			
 
				-
			
 
				-	perf_event_sched_in(cpuctx, cpuctx->task_ctx, task);
			
 
				-
			
 
				+	perf_event_sched_in(cpuctx, ctx, task);
			
 
				 	perf_pmu_enable(ctx->pmu);
			
 
				 	perf_ctx_unlock(cpuctx, ctx);
			
 
				 }
			
@@ -2800,6 +2806,16 @@ void __perf_event_task_sched_in(struct task_struct *prev,
 
				 	struct perf_event_context *ctx;
			
 
				 	int ctxn;
			
 
				 
			
 
				+	/*
			
 
				+	 * If cgroup events exist on this CPU, then we need to check if we have
			
 
				+	 * to switch in PMU state; cgroup event are system-wide mode only.
			
 
				+	 *
			
 
				+	 * Since cgroup events are CPU events, we must schedule these in before
			
 
				+	 * we schedule in the task events.
			
 
				+	 */
			
 
				+	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
			
 
				+		perf_cgroup_sched_in(prev, task);
			
 
				+
			
 
				 	for_each_task_context_nr(ctxn) {
			
 
				 		ctx = task->perf_event_ctxp[ctxn];
			
 
				 		if (likely(!ctx))
			
@@ -2807,13 +2823,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
 
				 
			
 
				 		perf_event_context_sched_in(ctx, task);
			
 
				 	}
			
 
				-	/*
			
 
				-	 * if cgroup events exist on this CPU, then we need
			
 
				-	 * to check if we have to switch in PMU state.
			
 
				-	 * cgroup event are system-wide mode only
			
 
				-	 */
			
 
				-	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
			
 
				-		perf_cgroup_sched_in(prev, task);
			
 
				 
			
 
				 	if (atomic_read(&nr_switch_events))
			
 
				 		perf_event_switch(task, prev, true);
			
@@ -3099,46 +3108,30 @@ static int event_enable_on_exec(struct perf_event *event,
 
				 static void perf_event_enable_on_exec(int ctxn)
			
 
				 {
			
 
				 	struct perf_event_context *ctx, *clone_ctx = NULL;
			
 
				+	struct perf_cpu_context *cpuctx;
			
 
				 	struct perf_event *event;
			
 
				 	unsigned long flags;
			
 
				 	int enabled = 0;
			
 
				-	int ret;
			
 
				 
			
 
				 	local_irq_save(flags);
			
 
				 	ctx = current->perf_event_ctxp[ctxn];
			
 
				 	if (!ctx || !ctx->nr_events)
			
 
				 		goto out;
			
 
				 
			
 
				-	/*
			
 
				-	 * We must ctxsw out cgroup events to avoid conflict
			
 
				-	 * when invoking perf_task_event_sched_in() later on
			
 
				-	 * in this function. Otherwise we end up trying to
			
 
				-	 * ctxswin cgroup events which are already scheduled
			
 
				-	 * in.
			
 
				-	 */
			
 
				-	perf_cgroup_sched_out(current, NULL);
			
 
				-
			
 
				-	raw_spin_lock(&ctx->lock);
			
 
				-	task_ctx_sched_out(ctx);
			
 
				-
			
 
				-	list_for_each_entry(event, &ctx->event_list, event_entry) {
			
 
				-		ret = event_enable_on_exec(event, ctx);
			
 
				-		if (ret)
			
 
				-			enabled = 1;
			
 
				-	}
			
 
				+	cpuctx = __get_cpu_context(ctx);
			
 
				+	perf_ctx_lock(cpuctx, ctx);
			
 
				+	list_for_each_entry(event, &ctx->event_list, event_entry)
			
 
				+		enabled |= event_enable_on_exec(event, ctx);
			
 
				 
			
 
				 	/*
			
 
				-	 * Unclone this context if we enabled any event.
			
 
				+	 * Unclone and reschedule this context if we enabled any event.
			
 
				 	 */
			
 
				-	if (enabled)
			
 
				+	if (enabled) {
			
 
				 		clone_ctx = unclone_ctx(ctx);
			
 
				+		ctx_resched(cpuctx, ctx);
			
 
				+	}
			
 
				+	perf_ctx_unlock(cpuctx, ctx);
			
 
				 
			
 
				-	raw_spin_unlock(&ctx->lock);
			
 
				-
			
 
				-	/*
			
 
				-	 * Also calls ctxswin for cgroup events, if any:
			
 
				-	 */
			
 
				-	perf_event_context_sched_in(ctx, ctx->task);
			
 
				 out:
			
 
				 	local_irq_restore(flags);
			
 
				 
			
@@ -3334,7 +3327,6 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
 
				 	INIT_LIST_HEAD(&ctx->flexible_groups);
			
 
				 	INIT_LIST_HEAD(&ctx->event_list);
			
 
				 	atomic_set(&ctx->refcount, 1);
			
 
				-	INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
			
 
				 }
			
 
				 
			
 
				 static struct perf_event_context *
			
@@ -3521,11 +3513,13 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
 
				 
			
 
				 static void unaccount_event(struct perf_event *event)
			
 
				 {
			
 
				+	bool dec = false;
			
 
				+
			
 
				 	if (event->parent)
			
 
				 		return;
			
 
				 
			
 
				 	if (event->attach_state & PERF_ATTACH_TASK)
			
 
				-		static_key_slow_dec_deferred(&perf_sched_events);
			
 
				+		dec = true;
			
 
				 	if (event->attr.mmap || event->attr.mmap_data)
			
 
				 		atomic_dec(&nr_mmap_events);
			
 
				 	if (event->attr.comm)
			
@@ -3535,12 +3529,15 @@ static void unaccount_event(struct perf_event *event)
 
				 	if (event->attr.freq)
			
 
				 		atomic_dec(&nr_freq_events);
			
 
				 	if (event->attr.context_switch) {
			
 
				-		static_key_slow_dec_deferred(&perf_sched_events);
			
 
				+		dec = true;
			
 
				 		atomic_dec(&nr_switch_events);
			
 
				 	}
			
 
				 	if (is_cgroup_event(event))
			
 
				-		static_key_slow_dec_deferred(&perf_sched_events);
			
 
				+		dec = true;
			
 
				 	if (has_branch_stack(event))
			
 
				+		dec = true;
			
 
				+
			
 
				+	if (dec)
			
 
				 		static_key_slow_dec_deferred(&perf_sched_events);
			
 
				 
			
 
				 	unaccount_event_cpu(event, event->cpu);
			
@@ -3556,7 +3553,7 @@ static void unaccount_event(struct perf_event *event)
 
				  *  3) two matching events on the same context.
			
 
				  *
			
 
				  * The former two cases are handled in the allocation path (perf_event_alloc(),
			
 
				- * __free_event()), the latter -- before the first perf_install_in_context().
			
 
				+ * _free_event()), the latter -- before the first perf_install_in_context().
			
 
				  */
			
 
				 static int exclusive_event_init(struct perf_event *event)
			
 
				 {
			
@@ -3631,29 +3628,6 @@ static bool exclusive_event_installable(struct perf_event *event,
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				-static void __free_event(struct perf_event *event)
			
 
				-{
			
 
				-	if (!event->parent) {
			
 
				-		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
			
 
				-			put_callchain_buffers();
			
 
				-	}
			
 
				-
			
 
				-	perf_event_free_bpf_prog(event);
			
 
				-
			
 
				-	if (event->destroy)
			
 
				-		event->destroy(event);
			
 
				-
			
 
				-	if (event->ctx)
			
 
				-		put_ctx(event->ctx);
			
 
				-
			
 
				-	if (event->pmu) {
			
 
				-		exclusive_event_destroy(event);
			
 
				-		module_put(event->pmu->module);
			
 
				-	}
			
 
				-
			
 
				-	call_rcu(&event->rcu_head, free_event_rcu);
			
 
				-}
			
 
				-
			
 
				 static void _free_event(struct perf_event *event)
			
 
				 {
			
 
				 	irq_work_sync(&event->pending);
			
@@ -3675,7 +3649,25 @@ static void _free_event(struct perf_event *event)
 
				 	if (is_cgroup_event(event))
			
 
				 		perf_detach_cgroup(event);
			
 
				 
			
 
				-	__free_event(event);
			
 
				+	if (!event->parent) {
			
 
				+		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
			
 
				+			put_callchain_buffers();
			
 
				+	}
			
 
				+
			
 
				+	perf_event_free_bpf_prog(event);
			
 
				+
			
 
				+	if (event->destroy)
			
 
				+		event->destroy(event);
			
 
				+
			
 
				+	if (event->ctx)
			
 
				+		put_ctx(event->ctx);
			
 
				+
			
 
				+	if (event->pmu) {
			
 
				+		exclusive_event_destroy(event);
			
 
				+		module_put(event->pmu->module);
			
 
				+	}
			
 
				+
			
 
				+	call_rcu(&event->rcu_head, free_event_rcu);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3702,14 +3694,13 @@ static void perf_remove_from_owner(struct perf_event *event)
 
				 	struct task_struct *owner;
			
 
				 
			
 
				 	rcu_read_lock();
			
 
				-	owner = ACCESS_ONCE(event->owner);
			
 
				 	/*
			
 
				-	 * Matches the smp_wmb() in perf_event_exit_task(). If we observe
			
 
				-	 * !owner it means the list deletion is complete and we can indeed
			
 
				-	 * free this event, otherwise we need to serialize on
			
 
				+	 * Matches the smp_store_release() in perf_event_exit_task(). If we
			
 
				+	 * observe !owner it means the list deletion is complete and we can
			
 
				+	 * indeed free this event, otherwise we need to serialize on
			
 
				 	 * owner->perf_event_mutex.
			
 
				 	 */
			
 
				-	smp_read_barrier_depends();
			
 
				+	owner = lockless_dereference(event->owner);
			
 
				 	if (owner) {
			
 
				 		/*
			
 
				 		 * Since delayed_put_task_struct() also drops the last
			
@@ -3737,8 +3728,10 @@ static void perf_remove_from_owner(struct perf_event *event)
 
				 		 * ensured they're done, and we can proceed with freeing the
			
 
				 		 * event.
			
 
				 		 */
			
 
				-		if (event->owner)
			
 
				+		if (event->owner) {
			
 
				 			list_del_init(&event->owner_entry);
			
 
				+			smp_store_release(&event->owner, NULL);
			
 
				+		}
			
 
				 		mutex_unlock(&owner->perf_event_mutex);
			
 
				 		put_task_struct(owner);
			
 
				 	}
			
@@ -3746,36 +3739,98 @@ static void perf_remove_from_owner(struct perf_event *event)
 
				 
			
 
				 static void put_event(struct perf_event *event)
			
 
				 {
			
 
				-	struct perf_event_context *ctx;
			
 
				-
			
 
				 	if (!atomic_long_dec_and_test(&event->refcount))
			
 
				 		return;
			
 
				 
			
 
				+	_free_event(event);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Kill an event dead; while event:refcount will preserve the event
			
 
				+ * object, it will not preserve its functionality. Once the last 'user'
			
 
				+ * gives up the object, we'll destroy the thing.
			
 
				+ */
			
 
				+int perf_event_release_kernel(struct perf_event *event)
			
 
				+{
			
 
				+	struct perf_event_context *ctx;
			
 
				+	struct perf_event *child, *tmp;
			
 
				+
			
 
				 	if (!is_kernel_event(event))
			
 
				 		perf_remove_from_owner(event);
			
 
				 
			
 
				+	ctx = perf_event_ctx_lock(event);
			
 
				+	WARN_ON_ONCE(ctx->parent_ctx);
			
 
				+	perf_remove_from_context(event, DETACH_GROUP | DETACH_STATE);
			
 
				+	perf_event_ctx_unlock(event, ctx);
			
 
				+
			
 
				 	/*
			
 
				-	 * There are two ways this annotation is useful:
			
 
				+	 * At this point we must have event->state == PERF_EVENT_STATE_EXIT,
			
 
				+	 * either from the above perf_remove_from_context() or through
			
 
				+	 * perf_event_exit_event().
			
 
				 	 *
			
 
				-	 *  1) there is a lock recursion from perf_event_exit_task
			
 
				-	 *     see the comment there.
			
 
				+	 * Therefore, anybody acquiring event->child_mutex after the below
			
 
				+	 * loop _must_ also see this, most importantly inherit_event() which
			
 
				+	 * will avoid placing more children on the list.
			
 
				 	 *
			
 
				-	 *  2) there is a lock-inversion with mmap_sem through
			
 
				-	 *     perf_read_group(), which takes faults while
			
 
				-	 *     holding ctx->mutex, however this is called after
			
 
				-	 *     the last filedesc died, so there is no possibility
			
 
				-	 *     to trigger the AB-BA case.
			
 
				+	 * Thus this guarantees that we will in fact observe and kill _ALL_
			
 
				+	 * child events.
			
 
				 	 */
			
 
				-	ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
			
 
				-	WARN_ON_ONCE(ctx->parent_ctx);
			
 
				-	perf_remove_from_context(event, true);
			
 
				-	perf_event_ctx_unlock(event, ctx);
			
 
				+	WARN_ON_ONCE(event->state != PERF_EVENT_STATE_EXIT);
			
 
				 
			
 
				-	_free_event(event);
			
 
				-}
			
 
				+again:
			
 
				+	mutex_lock(&event->child_mutex);
			
 
				+	list_for_each_entry(child, &event->child_list, child_list) {
			
 
				 
			
 
				-int perf_event_release_kernel(struct perf_event *event)
			
 
				-{
			
 
				+		/*
			
 
				+		 * Cannot change, child events are not migrated, see the
			
 
				+		 * comment with perf_event_ctx_lock_nested().
			
 
				+		 */
			
 
				+		ctx = lockless_dereference(child->ctx);
			
 
				+		/*
			
 
				+		 * Since child_mutex nests inside ctx::mutex, we must jump
			
 
				+		 * through hoops. We start by grabbing a reference on the ctx.
			
 
				+		 *
			
 
				+		 * Since the event cannot get freed while we hold the
			
 
				+		 * child_mutex, the context must also exist and have a !0
			
 
				+		 * reference count.
			
 
				+		 */
			
 
				+		get_ctx(ctx);
			
 
				+
			
 
				+		/*
			
 
				+		 * Now that we have a ctx ref, we can drop child_mutex, and
			
 
				+		 * acquire ctx::mutex without fear of it going away. Then we
			
 
				+		 * can re-acquire child_mutex.
			
 
				+		 */
			
 
				+		mutex_unlock(&event->child_mutex);
			
 
				+		mutex_lock(&ctx->mutex);
			
 
				+		mutex_lock(&event->child_mutex);
			
 
				+
			
 
				+		/*
			
 
				+		 * Now that we hold ctx::mutex and child_mutex, revalidate our
			
 
				+		 * state, if child is still the first entry, it didn't get freed
			
 
				+		 * and we can continue doing so.
			
 
				+		 */
			
 
				+		tmp = list_first_entry_or_null(&event->child_list,
			
 
				+					       struct perf_event, child_list);
			
 
				+		if (tmp == child) {
			
 
				+			perf_remove_from_context(child, DETACH_GROUP);
			
 
				+			list_del(&child->child_list);
			
 
				+			free_event(child);
			
 
				+			/*
			
 
				+			 * This matches the refcount bump in inherit_event();
			
 
				+			 * this can't be the last reference.
			
 
				+			 */
			
 
				+			put_event(event);
			
 
				+		}
			
 
				+
			
 
				+		mutex_unlock(&event->child_mutex);
			
 
				+		mutex_unlock(&ctx->mutex);
			
 
				+		put_ctx(ctx);
			
 
				+		goto again;
			
 
				+	}
			
 
				+	mutex_unlock(&event->child_mutex);
			
 
				+
			
 
				+	/* Must be the last reference */
			
 
				 	put_event(event);
			
 
				 	return 0;
			
 
				 }
			
@@ -3786,46 +3841,10 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
 
				  */
			
 
				 static int perf_release(struct inode *inode, struct file *file)
			
 
				 {
			
 
				-	put_event(file->private_data);
			
 
				+	perf_event_release_kernel(file->private_data);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				- * Remove all orphanes events from the context.
			
 
				- */
			
 
				-static void orphans_remove_work(struct work_struct *work)
			
 
				-{
			
 
				-	struct perf_event_context *ctx;
			
 
				-	struct perf_event *event, *tmp;
			
 
				-
			
 
				-	ctx = container_of(work, struct perf_event_context,
			
 
				-			   orphans_remove.work);
			
 
				-
			
 
				-	mutex_lock(&ctx->mutex);
			
 
				-	list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
			
 
				-		struct perf_event *parent_event = event->parent;
			
 
				-
			
 
				-		if (!is_orphaned_child(event))
			
 
				-			continue;
			
 
				-
			
 
				-		perf_remove_from_context(event, true);
			
 
				-
			
 
				-		mutex_lock(&parent_event->child_mutex);
			
 
				-		list_del_init(&event->child_list);
			
 
				-		mutex_unlock(&parent_event->child_mutex);
			
 
				-
			
 
				-		free_event(event);
			
 
				-		put_event(parent_event);
			
 
				-	}
			
 
				-
			
 
				-	raw_spin_lock_irq(&ctx->lock);
			
 
				-	ctx->orphans_remove_sched = false;
			
 
				-	raw_spin_unlock_irq(&ctx->lock);
			
 
				-	mutex_unlock(&ctx->mutex);
			
 
				-
			
 
				-	put_ctx(ctx);
			
 
				-}
			
 
				-
			
 
				 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
			
 
				 {
			
 
				 	struct perf_event *child;
			
@@ -4054,7 +4073,7 @@ static void _perf_event_reset(struct perf_event *event)
 
				 /*
			
 
				  * Holding the top-level event's child_mutex means that any
			
 
				  * descendant process that has inherited this event will block
			
 
				- * in sync_child_event if it goes to exit, thus satisfying the
			
 
				+ * in perf_event_exit_event() if it goes to exit, thus satisfying the
			
 
				  * task existence requirements of perf_event_enable/disable.
			
 
				  */
			
 
				 static void perf_event_for_each_child(struct perf_event *event,
			
@@ -4086,36 +4105,14 @@ static void perf_event_for_each(struct perf_event *event,
 
				 		perf_event_for_each_child(sibling, func);
			
 
				 }
			
 
				 
			
 
				-struct period_event {
			
 
				-	struct perf_event *event;
			
 
				-	u64 value;
			
 
				-};
			
 
				-
			
 
				-static void ___perf_event_period(void *info)
			
 
				-{
			
 
				-	struct period_event *pe = info;
			
 
				-	struct perf_event *event = pe->event;
			
 
				-	u64 value = pe->value;
			
 
				-
			
 
				-	if (event->attr.freq) {
			
 
				-		event->attr.sample_freq = value;
			
 
				-	} else {
			
 
				-		event->attr.sample_period = value;
			
 
				-		event->hw.sample_period = value;
			
 
				-	}
			
 
				-
			
 
				-	local64_set(&event->hw.period_left, 0);
			
 
				-}
			
 
				-
			
 
				-static int __perf_event_period(void *info)
			
 
				+static void __perf_event_period(struct perf_event *event,
			
 
				+				struct perf_cpu_context *cpuctx,
			
 
				+				struct perf_event_context *ctx,
			
 
				+				void *info)
			
 
				 {
			
 
				-	struct period_event *pe = info;
			
 
				-	struct perf_event *event = pe->event;
			
 
				-	struct perf_event_context *ctx = event->ctx;
			
 
				-	u64 value = pe->value;
			
 
				+	u64 value = *((u64 *)info);
			
 
				 	bool active;
			
 
				 
			
 
				-	raw_spin_lock(&ctx->lock);
			
 
				 	if (event->attr.freq) {
			
 
				 		event->attr.sample_freq = value;
			
 
				 	} else {
			
@@ -4135,14 +4132,10 @@ static int __perf_event_period(void *info)
 
				 		event->pmu->start(event, PERF_EF_RELOAD);
			
 
				 		perf_pmu_enable(ctx->pmu);
			
 
				 	}
			
 
				-	raw_spin_unlock(&ctx->lock);
			
 
				-
			
 
				-	return 0;
			
 
				 }
			
 
				 
			
 
				 static int perf_event_period(struct perf_event *event, u64 __user *arg)
			
 
				 {
			
 
				-	struct period_event pe = { .event = event, };
			
 
				 	u64 value;
			
 
				 
			
 
				 	if (!is_sampling_event(event))
			
@@ -4157,10 +4150,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
 
				 	if (event->attr.freq && value > sysctl_perf_event_sample_rate)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	pe.value = value;
			
 
				-
			
 
				-	event_function_call(event, __perf_event_period,
			
 
				-			    ___perf_event_period, &pe);
			
 
				+	event_function_call(event, __perf_event_period, &value);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -4932,7 +4922,7 @@ static void perf_pending_event(struct irq_work *entry)
 
				 
			
 
				 	if (event->pending_disable) {
			
 
				 		event->pending_disable = 0;
			
 
				-		__perf_event_disable(event);
			
 
				+		perf_event_disable_local(event);
			
 
				 	}
			
 
				 
			
 
				 	if (event->pending_wakeup) {
			
@@ -7753,11 +7743,13 @@ static void account_event_cpu(struct perf_event *event, int cpu)
 
				 
			
 
				 static void account_event(struct perf_event *event)
			
 
				 {
			
 
				+	bool inc = false;
			
 
				+
			
 
				 	if (event->parent)
			
 
				 		return;
			
 
				 
			
 
				 	if (event->attach_state & PERF_ATTACH_TASK)
			
 
				-		static_key_slow_inc(&perf_sched_events.key);
			
 
				+		inc = true;
			
 
				 	if (event->attr.mmap || event->attr.mmap_data)
			
 
				 		atomic_inc(&nr_mmap_events);
			
 
				 	if (event->attr.comm)
			
@@ -7770,11 +7762,14 @@ static void account_event(struct perf_event *event)
 
				 	}
			
 
				 	if (event->attr.context_switch) {
			
 
				 		atomic_inc(&nr_switch_events);
			
 
				-		static_key_slow_inc(&perf_sched_events.key);
			
 
				+		inc = true;
			
 
				 	}
			
 
				 	if (has_branch_stack(event))
			
 
				-		static_key_slow_inc(&perf_sched_events.key);
			
 
				+		inc = true;
			
 
				 	if (is_cgroup_event(event))
			
 
				+		inc = true;
			
 
				+
			
 
				+	if (inc)
			
 
				 		static_key_slow_inc(&perf_sched_events.key);
			
 
				 
			
 
				 	account_event_cpu(event, event->cpu);
			
@@ -8422,11 +8417,11 @@ SYSCALL_DEFINE5(perf_event_open,
 
				 		 * See perf_event_ctx_lock() for comments on the details
			
 
				 		 * of swizzling perf_event::ctx.
			
 
				 		 */
			
 
				-		perf_remove_from_context(group_leader, false);
			
 
				+		perf_remove_from_context(group_leader, 0);
			
 
				 
			
 
				 		list_for_each_entry(sibling, &group_leader->sibling_list,
			
 
				 				    group_entry) {
			
 
				-			perf_remove_from_context(sibling, false);
			
 
				+			perf_remove_from_context(sibling, 0);
			
 
				 			put_ctx(gctx);
			
 
				 		}
			
 
				 
			
@@ -8479,6 +8474,8 @@ SYSCALL_DEFINE5(perf_event_open,
 
				 	perf_event__header_size(event);
			
 
				 	perf_event__id_header_size(event);
			
 
				 
			
 
				+	event->owner = current;
			
 
				+
			
 
				 	perf_install_in_context(ctx, event, event->cpu);
			
 
				 	perf_unpin_context(ctx);
			
 
				 
			
@@ -8488,8 +8485,6 @@ SYSCALL_DEFINE5(perf_event_open,
 
				 
			
 
				 	put_online_cpus();
			
 
				 
			
 
				-	event->owner = current;
			
 
				-
			
 
				 	mutex_lock(&current->perf_event_mutex);
			
 
				 	list_add_tail(&event->owner_entry, &current->perf_event_list);
			
 
				 	mutex_unlock(&current->perf_event_mutex);
			
@@ -8556,7 +8551,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 
				 	}
			
 
				 
			
 
				 	/* Mark owner so we could distinguish it from user events. */
			
 
				-	event->owner = EVENT_OWNER_KERNEL;
			
 
				+	event->owner = TASK_TOMBSTONE;
			
 
				 
			
 
				 	account_event(event);
			
 
				 
			
@@ -8606,7 +8601,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
 
				 	mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
			
 
				 	list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
			
 
				 				 event_entry) {
			
 
				-		perf_remove_from_context(event, false);
			
 
				+		perf_remove_from_context(event, 0);
			
 
				 		unaccount_event_cpu(event, src_cpu);
			
 
				 		put_ctx(src_ctx);
			
 
				 		list_add(&event->migrate_entry, &events);
			
@@ -8673,33 +8668,15 @@ static void sync_child_event(struct perf_event *child_event,
 
				 		     &parent_event->child_total_time_enabled);
			
 
				 	atomic64_add(child_event->total_time_running,
			
 
				 		     &parent_event->child_total_time_running);
			
 
				-
			
 
				-	/*
			
 
				-	 * Remove this event from the parent's list
			
 
				-	 */
			
 
				-	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
			
 
				-	mutex_lock(&parent_event->child_mutex);
			
 
				-	list_del_init(&child_event->child_list);
			
 
				-	mutex_unlock(&parent_event->child_mutex);
			
 
				-
			
 
				-	/*
			
 
				-	 * Make sure user/parent get notified, that we just
			
 
				-	 * lost one event.
			
 
				-	 */
			
 
				-	perf_event_wakeup(parent_event);
			
 
				-
			
 
				-	/*
			
 
				-	 * Release the parent event, if this was the last
			
 
				-	 * reference to it.
			
 
				-	 */
			
 
				-	put_event(parent_event);
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				-__perf_event_exit_task(struct perf_event *child_event,
			
 
				-			 struct perf_event_context *child_ctx,
			
 
				-			 struct task_struct *child)
			
 
				+perf_event_exit_event(struct perf_event *child_event,
			
 
				+		      struct perf_event_context *child_ctx,
			
 
				+		      struct task_struct *child)
			
 
				 {
			
 
				+	struct perf_event *parent_event = child_event->parent;
			
 
				+
			
 
				 	/*
			
 
				 	 * Do not destroy the 'original' grouping; because of the context
			
 
				 	 * switch optimization the original events could've ended up in a
			
@@ -8712,57 +8689,86 @@ __perf_event_exit_task(struct perf_event *child_event,
 
				 	 * Do destroy all inherited groups, we don't care about those
			
 
				 	 * and being thorough is better.
			
 
				 	 */
			
 
				-	perf_remove_from_context(child_event, !!child_event->parent);
			
 
				+	raw_spin_lock_irq(&child_ctx->lock);
			
 
				+	WARN_ON_ONCE(child_ctx->is_active);
			
 
				+
			
 
				+	if (parent_event)
			
 
				+		perf_group_detach(child_event);
			
 
				+	list_del_event(child_event, child_ctx);
			
 
				+	child_event->state = PERF_EVENT_STATE_EXIT; /* see perf_event_release_kernel() */
			
 
				+	raw_spin_unlock_irq(&child_ctx->lock);
			
 
				 
			
 
				 	/*
			
 
				-	 * It can happen that the parent exits first, and has events
			
 
				-	 * that are still around due to the child reference. These
			
 
				-	 * events need to be zapped.
			
 
				+	 * Parent events are governed by their filedesc, retain them.
			
 
				 	 */
			
 
				-	if (child_event->parent) {
			
 
				-		sync_child_event(child_event, child);
			
 
				-		free_event(child_event);
			
 
				-	} else {
			
 
				-		child_event->state = PERF_EVENT_STATE_EXIT;
			
 
				+	if (!parent_event) {
			
 
				 		perf_event_wakeup(child_event);
			
 
				+		return;
			
 
				 	}
			
 
				+	/*
			
 
				+	 * Child events can be cleaned up.
			
 
				+	 */
			
 
				+
			
 
				+	sync_child_event(child_event, child);
			
 
				+
			
 
				+	/*
			
 
				+	 * Remove this event from the parent's list
			
 
				+	 */
			
 
				+	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
			
 
				+	mutex_lock(&parent_event->child_mutex);
			
 
				+	list_del_init(&child_event->child_list);
			
 
				+	mutex_unlock(&parent_event->child_mutex);
			
 
				+
			
 
				+	/*
			
 
				+	 * Kick perf_poll() for is_event_hup().
			
 
				+	 */
			
 
				+	perf_event_wakeup(parent_event);
			
 
				+	free_event(child_event);
			
 
				+	put_event(parent_event);
			
 
				 }
			
 
				 
			
 
				 static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
			
 
				 {
			
 
				-	struct perf_event *child_event, *next;
			
 
				 	struct perf_event_context *child_ctx, *clone_ctx = NULL;
			
 
				-	unsigned long flags;
			
 
				+	struct perf_event *child_event, *next;
			
 
				+
			
 
				+	WARN_ON_ONCE(child != current);
			
 
				 
			
 
				-	if (likely(!child->perf_event_ctxp[ctxn]))
			
 
				+	child_ctx = perf_pin_task_context(child, ctxn);
			
 
				+	if (!child_ctx)
			
 
				 		return;
			
 
				 
			
 
				-	local_irq_save(flags);
			
 
				 	/*
			
 
				-	 * We can't reschedule here because interrupts are disabled,
			
 
				-	 * and either child is current or it is a task that can't be
			
 
				-	 * scheduled, so we are now safe from rescheduling changing
			
 
				-	 * our context.
			
 
				+	 * In order to reduce the amount of tricky in ctx tear-down, we hold
			
 
				+	 * ctx::mutex over the entire thing. This serializes against almost
			
 
				+	 * everything that wants to access the ctx.
			
 
				+	 *
			
 
				+	 * The exception is sys_perf_event_open() /
			
 
				+	 * perf_event_create_kernel_count() which does find_get_context()
			
 
				+	 * without ctx::mutex (it cannot because of the move_group double mutex
			
 
				+	 * lock thing). See the comments in perf_install_in_context().
			
 
				 	 */
			
 
				-	child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
			
 
				+	mutex_lock(&child_ctx->mutex);
			
 
				 
			
 
				 	/*
			
 
				-	 * Take the context lock here so that if find_get_context is
			
 
				-	 * reading child->perf_event_ctxp, we wait until it has
			
 
				-	 * incremented the context's refcount before we do put_ctx below.
			
 
				+	 * In a single ctx::lock section, de-schedule the events and detach the
			
 
				+	 * context from the task such that we cannot ever get it scheduled back
			
 
				+	 * in.
			
 
				 	 */
			
 
				-	raw_spin_lock(&child_ctx->lock);
			
 
				-	task_ctx_sched_out(child_ctx);
			
 
				-	child->perf_event_ctxp[ctxn] = NULL;
			
 
				+	raw_spin_lock_irq(&child_ctx->lock);
			
 
				+	task_ctx_sched_out(__get_cpu_context(child_ctx), child_ctx);
			
 
				 
			
 
				 	/*
			
 
				-	 * If this context is a clone; unclone it so it can't get
			
 
				-	 * swapped to another process while we're removing all
			
 
				-	 * the events from it.
			
 
				+	 * Now that the context is inactive, destroy the task <-> ctx relation
			
 
				+	 * and mark the context dead.
			
 
				 	 */
			
 
				+	RCU_INIT_POINTER(child->perf_event_ctxp[ctxn], NULL);
			
 
				+	put_ctx(child_ctx); /* cannot be last */
			
 
				+	WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
			
 
				+	put_task_struct(current); /* cannot be last */
			
 
				+
			
 
				 	clone_ctx = unclone_ctx(child_ctx);
			
 
				-	update_context_time(child_ctx);
			
 
				-	raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
			
 
				+	raw_spin_unlock_irq(&child_ctx->lock);
			
 
				 
			
 
				 	if (clone_ctx)
			
 
				 		put_ctx(clone_ctx);
			
@@ -8774,20 +8780,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 
				 	 */
			
 
				 	perf_event_task(child, child_ctx, 0);
			
 
				 
			
 
				-	/*
			
 
				-	 * We can recurse on the same lock type through:
			
 
				-	 *
			
 
				-	 *   __perf_event_exit_task()
			
 
				-	 *     sync_child_event()
			
 
				-	 *       put_event()
			
 
				-	 *         mutex_lock(&ctx->mutex)
			
 
				-	 *
			
 
				-	 * But since its the parent context it won't be the same instance.
			
 
				-	 */
			
 
				-	mutex_lock(&child_ctx->mutex);
			
 
				-
			
 
				 	list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
			
 
				-		__perf_event_exit_task(child_event, child_ctx, child);
			
 
				+		perf_event_exit_event(child_event, child_ctx, child);
			
 
				 
			
 
				 	mutex_unlock(&child_ctx->mutex);
			
 
				 
			
@@ -8812,8 +8806,7 @@ void perf_event_exit_task(struct task_struct *child)
 
				 		 * the owner, closes a race against perf_release() where
			
 
				 		 * we need to serialize on the owner->perf_event_mutex.
			
 
				 		 */
			
 
				-		smp_wmb();
			
 
				-		event->owner = NULL;
			
 
				+		smp_store_release(&event->owner, NULL);
			
 
				 	}
			
 
				 	mutex_unlock(&child->perf_event_mutex);
			
 
				 
			
@@ -8896,21 +8889,20 @@ void perf_event_delayed_put(struct task_struct *task)
 
				 		WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
			
 
				 }
			
 
				 
			
 
				-struct perf_event *perf_event_get(unsigned int fd)
			
 
				+struct file *perf_event_get(unsigned int fd)
			
 
				 {
			
 
				-	int err;
			
 
				-	struct fd f;
			
 
				-	struct perf_event *event;
			
 
				+	struct file *file;
			
 
				 
			
 
				-	err = perf_fget_light(fd, &f);
			
 
				-	if (err)
			
 
				-		return ERR_PTR(err);
			
 
				+	file = fget_raw(fd);
			
 
				+	if (!file)
			
 
				+		return ERR_PTR(-EBADF);
			
 
				 
			
 
				-	event = f.file->private_data;
			
 
				-	atomic_long_inc(&event->refcount);
			
 
				-	fdput(f);
			
 
				+	if (file->f_op != &perf_fops) {
			
 
				+		fput(file);
			
 
				+		return ERR_PTR(-EBADF);
			
 
				+	}
			
 
				 
			
 
				-	return event;
			
 
				+	return file;
			
 
				 }
			
 
				 
			
 
				 const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
			
@@ -8953,8 +8945,16 @@ inherit_event(struct perf_event *parent_event,
 
				 	if (IS_ERR(child_event))
			
 
				 		return child_event;
			
 
				 
			
 
				+	/*
			
 
				+	 * is_orphaned_event() and list_add_tail(&parent_event->child_list)
			
 
				+	 * must be under the same lock in order to serialize against
			
 
				+	 * perf_event_release_kernel(), such that either we must observe
			
 
				+	 * is_orphaned_event() or they will observe us on the child_list.
			
 
				+	 */
			
 
				+	mutex_lock(&parent_event->child_mutex);
			
 
				 	if (is_orphaned_event(parent_event) ||
			
 
				 	    !atomic_long_inc_not_zero(&parent_event->refcount)) {
			
 
				+		mutex_unlock(&parent_event->child_mutex);
			
 
				 		free_event(child_event);
			
 
				 		return NULL;
			
 
				 	}
			
@@ -9002,8 +9002,6 @@ inherit_event(struct perf_event *parent_event,
 
				 	/*
			
 
				 	 * Link this into the parent event's child list
			
 
				 	 */
			
 
				-	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
			
 
				-	mutex_lock(&parent_event->child_mutex);
			
 
				 	list_add_tail(&child_event->child_list, &parent_event->child_list);
			
 
				 	mutex_unlock(&parent_event->child_mutex);
			
 
				 
			
@@ -9221,13 +9219,14 @@ static void perf_event_init_cpu(int cpu)
 
				 #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
			
 
				 static void __perf_event_exit_context(void *__info)
			
 
				 {
			
 
				-	struct remove_event re = { .detach_group = true };
			
 
				 	struct perf_event_context *ctx = __info;
			
 
				+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
			
 
				+	struct perf_event *event;
			
 
				 
			
 
				-	rcu_read_lock();
			
 
				-	list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
			
 
				-		__perf_remove_from_context(&re);
			
 
				-	rcu_read_unlock();
			
 
				+	raw_spin_lock(&ctx->lock);
			
 
				+	list_for_each_entry(event, &ctx->event_list, event_entry)
			
 
				+		__perf_remove_from_context(event, cpuctx, ctx, (void *)DETACH_GROUP);
			
 
				+	raw_spin_unlock(&ctx->lock);
			
 
				 }
			
 
				 
			
 
				 static void perf_event_exit_cpu_context(int cpu)
			
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -444,7 +444,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
 
				 	 * current task.
			
 
				 	 */
			
 
				 	if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
			
 
				-		__perf_event_disable(bp);
			
 
				+		perf_event_disable_local(bp);
			
 
				 	else
			
 
				 		perf_event_disable(bp);
			
 
				 
			
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -459,6 +459,25 @@ static void rb_free_aux_page(struct ring_buffer *rb, int idx)
 
				 	__free_page(page);
			
 
				 }
			
 
				 
			
 
				+static void __rb_free_aux(struct ring_buffer *rb)
			
 
				+{
			
 
				+	int pg;
			
 
				+
			
 
				+	if (rb->aux_priv) {
			
 
				+		rb->free_aux(rb->aux_priv);
			
 
				+		rb->free_aux = NULL;
			
 
				+		rb->aux_priv = NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (rb->aux_nr_pages) {
			
 
				+		for (pg = 0; pg < rb->aux_nr_pages; pg++)
			
 
				+			rb_free_aux_page(rb, pg);
			
 
				+
			
 
				+		kfree(rb->aux_pages);
			
 
				+		rb->aux_nr_pages = 0;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
			
 
				 		 pgoff_t pgoff, int nr_pages, long watermark, int flags)
			
 
				 {
			
@@ -547,30 +566,11 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
 
				 	if (!ret)
			
 
				 		rb->aux_pgoff = pgoff;
			
 
				 	else
			
 
				-		rb_free_aux(rb);
			
 
				+		__rb_free_aux(rb);
			
 
				 
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static void __rb_free_aux(struct ring_buffer *rb)
			
 
				-{
			
 
				-	int pg;
			
 
				-
			
 
				-	if (rb->aux_priv) {
			
 
				-		rb->free_aux(rb->aux_priv);
			
 
				-		rb->free_aux = NULL;
			
 
				-		rb->aux_priv = NULL;
			
 
				-	}
			
 
				-
			
 
				-	if (rb->aux_nr_pages) {
			
 
				-		for (pg = 0; pg < rb->aux_nr_pages; pg++)
			
 
				-			rb_free_aux_page(rb, pg);
			
 
				-
			
 
				-		kfree(rb->aux_pages);
			
 
				-		rb->aux_nr_pages = 0;
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 void rb_free_aux(struct ring_buffer *rb)
			
 
				 {
			
 
				 	if (atomic_dec_and_test(&rb->aux_refcount))
			
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -191,14 +191,17 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
 
				 	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
			
 
				 	struct bpf_array *array = container_of(map, struct bpf_array, map);
			
 
				 	struct perf_event *event;
			
 
				+	struct file *file;
			
 
				 
			
 
				 	if (unlikely(index >= array->map.max_entries))
			
 
				 		return -E2BIG;
			
 
				 
			
 
				-	event = (struct perf_event *)array->ptrs[index];
			
 
				-	if (!event)
			
 
				+	file = (struct file *)array->ptrs[index];
			
 
				+	if (unlikely(!file))
			
 
				 		return -ENOENT;
			
 
				 
			
 
				+	event = file->private_data;
			
 
				+
			
 
				 	/* make sure event is local and doesn't have pmu::count */
			
 
				 	if (event->oncpu != smp_processor_id() ||
			
 
				 	    event->pmu->count)
			
@@ -228,6 +231,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
 
				 	void *data = (void *) (long) r4;
			
 
				 	struct perf_sample_data sample_data;
			
 
				 	struct perf_event *event;
			
 
				+	struct file *file;
			
 
				 	struct perf_raw_record raw = {
			
 
				 		.size = size,
			
 
				 		.data = data,
			
@@ -236,10 +240,12 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
 
				 	if (unlikely(index >= array->map.max_entries))
			
 
				 		return -E2BIG;
			
 
				 
			
 
				-	event = (struct perf_event *)array->ptrs[index];
			
 
				-	if (unlikely(!event))
			
 
				+	file = (struct file *)array->ptrs[index];
			
 
				+	if (unlikely(!file))
			
 
				 		return -ENOENT;
			
 
				 
			
 
				+	event = file->private_data;
			
 
				+
			
 
				 	if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
			
 
				 		     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
			
 
				 		return -EINVAL;
			
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -77,6 +77,9 @@ include config/utilities.mak
 
				 # Define NO_AUXTRACE if you do not want AUX area tracing support
			
 
				 #
			
 
				 # Define NO_LIBBPF if you do not want BPF support
			
 
				+#
			
 
				+# Define FEATURES_DUMP to provide features detection dump file
			
 
				+# and bypass the feature detection
			
 
				 
			
 
				 # As per kernel Makefile, avoid funny character set dependencies
			
 
				 unexport LC_ALL
			
@@ -166,6 +169,15 @@ ifeq ($(config),1)
 
				 include config/Makefile
			
 
				 endif
			
 
				 
			
 
				+# The FEATURE_DUMP_EXPORT holds location of the actual
			
 
				+# FEATURE_DUMP file to be used to bypass feature detection
			
 
				+# (for bpf or any other subproject)
			
 
				+ifeq ($(FEATURES_DUMP),)
			
 
				+FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP)
			
 
				+else
			
 
				+FEATURE_DUMP_EXPORT := $(FEATURES_DUMP)
			
 
				+endif
			
 
				+
			
 
				 export prefix bindir sharedir sysconfdir DESTDIR
			
 
				 
			
 
				 # sparse is architecture-neutral, which means that we need to tell it
			
@@ -436,7 +448,7 @@ $(LIBAPI)-clean:
 
				 	$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
			
 
				 
			
 
				 $(LIBBPF): fixdep FORCE
			
 
				-	$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(realpath $(OUTPUT)FEATURE-DUMP)
			
 
				+	$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
			
 
				 
			
 
				 $(LIBBPF)-clean:
			
 
				 	$(call QUIET_CLEAN, libbpf)
			
@@ -610,6 +622,17 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean
 
				 	$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
			
 
				 	$(python-clean)
			
 
				 
			
 
				+#
			
 
				+# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
			
 
				+# file if defined, with no further action.
			
 
				+feature-dump:
			
 
				+ifdef FEATURE_DUMP_COPY
			
 
				+	@cp $(OUTPUT)FEATURE-DUMP $(FEATURE_DUMP_COPY)
			
 
				+	@echo "FEATURE-DUMP file copied into $(FEATURE_DUMP_COPY)"
			
 
				+else
			
 
				+	@echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP"
			
 
				+endif
			
 
				+
			
 
				 #
			
 
				 # Trick: if ../../.git does not exist - we are building out of tree for example,
			
 
				 # then force version regeneration:
			
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -17,7 +17,7 @@ static pid_t spawn(void)
 
				 	if (pid)
			
 
				 		return pid;
			
 
				 
			
 
				-	while(1);
			
 
				+	while(1)
			
 
				 		sleep(5);
			
 
				 	return 0;
			
 
				 }
			
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -181,7 +181,11 @@ LDFLAGS += -Wl,-z,noexecstack
 
				 
			
 
				 EXTLIBS = -lpthread -lrt -lm -ldl
			
 
				 
			
 
				+ifeq ($(FEATURES_DUMP),)
			
 
				 include $(srctree)/tools/build/Makefile.feature
			
 
				+else
			
 
				+include $(FEATURES_DUMP)
			
 
				+endif
			
 
				 
			
 
				 ifeq ($(feature-stackprotector-all), 1)
			
 
				   CFLAGS += -fstack-protector-all
			
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -5,7 +5,7 @@ ifeq ($(MAKECMDGOALS),)
 
				 # no target specified, trigger the whole suite
			
 
				 all:
			
 
				 	@echo "Testing Makefile";      $(MAKE) -sf tests/make MK=Makefile
			
 
				-	@echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf
			
 
				+	@echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf SET_PARALLEL=1 SET_O=1
			
 
				 else
			
 
				 # run only specific test over 'Makefile'
			
 
				 %:
			
@@ -13,6 +13,26 @@ else
 
				 endif
			
 
				 else
			
 
				 PERF := .
			
 
				+PERF_O := $(PERF)
			
 
				+O_OPT :=
			
 
				+
			
 
				+ifneq ($(O),)
			
 
				+  FULL_O := $(shell readlink -f $(O) || echo $(O))
			
 
				+  PERF_O := $(FULL_O)
			
 
				+  ifeq ($(SET_O),1)
			
 
				+    O_OPT := 'O=$(FULL_O)'
			
 
				+  endif
			
 
				+  K_O_OPT := 'O=$(FULL_O)'
			
 
				+endif
			
 
				+
			
 
				+PARALLEL_OPT=
			
 
				+ifeq ($(SET_PARALLEL),1)
			
 
				+  cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
			
 
				+  ifeq ($(cores),0)
			
 
				+    cores := 1
			
 
				+  endif
			
 
				+  PARALLEL_OPT="-j$(cores)"
			
 
				+endif
			
 
				 
			
 
				 # As per kernel Makefile, avoid funny character set dependencies
			
 
				 unexport LC_ALL
			
@@ -156,11 +176,11 @@ test_make_doc    := $(test_ok)
 
				 test_make_help_O := $(test_ok)
			
 
				 test_make_doc_O  := $(test_ok)
			
 
				 
			
 
				-test_make_python_perf_so := test -f $(PERF)/python/perf.so
			
 
				+test_make_python_perf_so := test -f $(PERF_O)/python/perf.so
			
 
				 
			
 
				-test_make_perf_o           := test -f $(PERF)/perf.o
			
 
				-test_make_util_map_o       := test -f $(PERF)/util/map.o
			
 
				-test_make_util_pmu_bison_o := test -f $(PERF)/util/pmu-bison.o
			
 
				+test_make_perf_o           := test -f $(PERF_O)/perf.o
			
 
				+test_make_util_map_o       := test -f $(PERF_O)/util/map.o
			
 
				+test_make_util_pmu_bison_o := test -f $(PERF_O)/util/pmu-bison.o
			
 
				 
			
 
				 define test_dest_files
			
 
				   for file in $(1); do				\
			
@@ -227,7 +247,7 @@ test_make_perf_o_O            := test -f $$TMP_O/perf.o
 
				 test_make_util_map_o_O        := test -f $$TMP_O/util/map.o
			
 
				 test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o
			
 
				 
			
 
				-test_default = test -x $(PERF)/perf
			
 
				+test_default = test -x $(PERF_O)/perf
			
 
				 test = $(if $(test_$1),$(test_$1),$(test_default))
			
 
				 
			
 
				 test_default_O = test -x $$TMP_O/perf
			
@@ -247,12 +267,12 @@ endif
 
				 
			
 
				 MAKEFLAGS := --no-print-directory
			
 
				 
			
 
				-clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null)
			
 
				+clean := @(cd $(PERF); make -s -f $(MK) $(O_OPT) clean >/dev/null)
			
 
				 
			
 
				 $(run):
			
 
				 	$(call clean)
			
 
				 	@TMP_DEST=$$(mktemp -d); \
			
 
				-	cmd="cd $(PERF) && make -f $(MK) DESTDIR=$$TMP_DEST $($@)"; \
			
 
				+	cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \
			
 
				 	echo "- $@: $$cmd" && echo $$cmd > $@ && \
			
 
				 	( eval $$cmd ) >> $@ 2>&1; \
			
 
				 	echo "  test: $(call test,$@)" >> $@ 2>&1; \
			
@@ -263,7 +283,7 @@ $(run_O):
 
				 	$(call clean)
			
 
				 	@TMP_O=$$(mktemp -d); \
			
 
				 	TMP_DEST=$$(mktemp -d); \
			
 
				-	cmd="cd $(PERF) && make -f $(MK) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
			
 
				+	cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \
			
 
				 	echo "- $@: $$cmd" && echo $$cmd > $@ && \
			
 
				 	( eval $$cmd ) >> $@ 2>&1 && \
			
 
				 	echo "  test: $(call test_O,$@)" >> $@ 2>&1; \
			
@@ -276,17 +296,22 @@ tarpkg:
 
				 	( eval $$cmd ) >> $@ 2>&1 && \
			
 
				 	rm -f $@
			
 
				 
			
 
				+KERNEL_O := ../..
			
 
				+ifneq ($(O),)
			
 
				+  KERNEL_O := $(O)
			
 
				+endif
			
 
				+
			
 
				 make_kernelsrc:
			
 
				-	@echo "- make -C <kernelsrc> tools/perf"
			
 
				+	@echo "- make -C <kernelsrc> $(PARALLEL_OPT) $(K_O_OPT) tools/perf"
			
 
				 	$(call clean); \
			
 
				-	(make -C ../.. tools/perf) > $@ 2>&1 && \
			
 
				-	test -x perf && rm -f $@ || (cat $@ ; false)
			
 
				+	(make -C ../.. $(PARALLEL_OPT) $(K_O_OPT) tools/perf) > $@ 2>&1 && \
			
 
				+	test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
			
 
				 
			
 
				 make_kernelsrc_tools:
			
 
				-	@echo "- make -C <kernelsrc>/tools perf"
			
 
				+	@echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) $(K_O_OPT) perf"
			
 
				 	$(call clean); \
			
 
				-	(make -C ../../tools perf) > $@ 2>&1 && \
			
 
				-	test -x perf && rm -f $@ || (cat $@ ; false)
			
 
				+	(make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
			
 
				+	test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
			
 
				 
			
 
				 all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
			
 
				 	@echo OK
			
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -755,11 +755,11 @@ static int annotate_browser__run(struct annotate_browser *browser,
 
				 				nd = browser->curr_hot;
			
 
				 			break;
			
 
				 		case K_UNTAB:
			
 
				-			if (nd != NULL)
			
 
				+			if (nd != NULL) {
			
 
				 				nd = rb_next(nd);
			
 
				 				if (nd == NULL)
			
 
				 					nd = rb_first(&browser->entries);
			
 
				-			else
			
 
				+			} else
			
 
				 				nd = browser->curr_hot;
			
 
				 			break;
			
 
				 		case K_F1:
			
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -131,6 +131,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 
				 			symlen = unresolved_col_width + 4 + 2;
			
 
				 			hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
			
 
				 					   symlen);
			
 
				+			hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
			
 
				+					   symlen);
			
 
				 		}
			
 
				 
			
 
				 		if (h->mem_info->iaddr.sym) {
			
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1149,7 +1149,7 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
 
				 
			
 
				 		machine = machines__find(machines, pid);
			
 
				 		if (!machine)
			
 
				-			machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID);
			
 
				+			machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
			
 
				 		return machine;
			
 
				 	}
			
 
				 
			
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -310,7 +310,6 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 
				 	int i, ret;
			
 
				 
			
 
				 	aggr->val = aggr->ena = aggr->run = 0;
			
 
				-	init_stats(ps->res_stats);
			
 
				 
			
 
				 	if (counter->per_pkg)
			
 
				 		zero_per_pkg(counter);
			
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1466,7 +1466,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 
				 	 * Read the build id if possible. This is required for
			
 
				 	 * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
			
 
				 	 */
			
 
				-	if (filename__read_build_id(dso->name, build_id, BUILD_ID_SIZE) > 0)
			
 
				+	if (filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0)
			
 
				 		dso__set_build_id(dso, build_id);
			
 
				 
			
 
				 	/*